aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ppc64')
-rw-r--r--arch/ppc64/Kconfig4
-rw-r--r--arch/ppc64/boot/addRamDisk.c207
-rw-r--r--arch/ppc64/kernel/Makefile16
-rw-r--r--arch/ppc64/kernel/asm-offsets.c1
-rw-r--r--arch/ppc64/kernel/cpu_setup_power4.S233
-rw-r--r--arch/ppc64/kernel/eeh.c943
-rw-r--r--arch/ppc64/kernel/firmware.c47
-rw-r--r--arch/ppc64/kernel/head.S84
-rw-r--r--arch/ppc64/kernel/idle.c1
-rw-r--r--arch/ppc64/kernel/ioctl32.c49
-rw-r--r--arch/ppc64/kernel/irq.c519
-rw-r--r--arch/ppc64/kernel/lparcfg.c611
-rw-r--r--arch/ppc64/kernel/misc.S8
-rw-r--r--arch/ppc64/kernel/nvram.c5
-rw-r--r--arch/ppc64/kernel/pacaData.c143
-rw-r--r--arch/ppc64/kernel/pci.c10
-rw-r--r--arch/ppc64/kernel/pci_dn.c21
-rw-r--r--arch/ppc64/kernel/proc_ppc64.c128
-rw-r--r--arch/ppc64/kernel/prom.c9
-rw-r--r--arch/ppc64/kernel/prom_init.c3
-rw-r--r--arch/ppc64/kernel/rtas_pci.c512
-rw-r--r--arch/ppc64/kernel/scanlog.c235
-rw-r--r--arch/ppc64/kernel/sysfs.c384
-rw-r--r--arch/ppc64/kernel/vdso.c5
24 files changed, 157 insertions, 4021 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index 29552348e581..c9d32db9d76a 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -297,6 +297,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
297 def_bool y 297 def_bool y
298 depends on NEED_MULTIPLE_NODES 298 depends on NEED_MULTIPLE_NODES
299 299
300config ARCH_MEMORY_PROBE
301 def_bool y
302 depends on MEMORY_HOTPLUG
303
300# Some NUMA nodes have memory ranges that span 304# Some NUMA nodes have memory ranges that span
301# other nodes. Even though a pfn is valid and 305# other nodes. Even though a pfn is valid and
302# between a node's start and end pfns, it may not 306# between a node's start and end pfns, it may not
diff --git a/arch/ppc64/boot/addRamDisk.c b/arch/ppc64/boot/addRamDisk.c
index 7f2c09473394..c02a99952be7 100644
--- a/arch/ppc64/boot/addRamDisk.c
+++ b/arch/ppc64/boot/addRamDisk.c
@@ -5,11 +5,59 @@
5#include <sys/types.h> 5#include <sys/types.h>
6#include <sys/stat.h> 6#include <sys/stat.h>
7#include <string.h> 7#include <string.h>
8#include <elf.h>
8 9
9#define ElfHeaderSize (64 * 1024) 10#define ElfHeaderSize (64 * 1024)
10#define ElfPages (ElfHeaderSize / 4096) 11#define ElfPages (ElfHeaderSize / 4096)
11#define KERNELBASE (0xc000000000000000) 12#define KERNELBASE (0xc000000000000000)
13#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
12 14
15struct addr_range {
16 unsigned long long addr;
17 unsigned long memsize;
18 unsigned long offset;
19};
20
21static int check_elf64(void *p, int size, struct addr_range *r)
22{
23 Elf64_Ehdr *elf64 = p;
24 Elf64_Phdr *elf64ph;
25
26 if (elf64->e_ident[EI_MAG0] != ELFMAG0 ||
27 elf64->e_ident[EI_MAG1] != ELFMAG1 ||
28 elf64->e_ident[EI_MAG2] != ELFMAG2 ||
29 elf64->e_ident[EI_MAG3] != ELFMAG3 ||
30 elf64->e_ident[EI_CLASS] != ELFCLASS64 ||
31 elf64->e_ident[EI_DATA] != ELFDATA2MSB ||
32 elf64->e_type != ET_EXEC || elf64->e_machine != EM_PPC64)
33 return 0;
34
35 if ((elf64->e_phoff + sizeof(Elf64_Phdr)) > size)
36 return 0;
37
38 elf64ph = (Elf64_Phdr *) ((unsigned long)elf64 +
39 (unsigned long)elf64->e_phoff);
40
41 r->memsize = (unsigned long)elf64ph->p_memsz;
42 r->offset = (unsigned long)elf64ph->p_offset;
43 r->addr = (unsigned long long)elf64ph->p_vaddr;
44
45#ifdef DEBUG
46 printf("PPC64 ELF file, ph:\n");
47 printf("p_type 0x%08x\n", elf64ph->p_type);
48 printf("p_flags 0x%08x\n", elf64ph->p_flags);
49 printf("p_offset 0x%016llx\n", elf64ph->p_offset);
50 printf("p_vaddr 0x%016llx\n", elf64ph->p_vaddr);
51 printf("p_paddr 0x%016llx\n", elf64ph->p_paddr);
52 printf("p_filesz 0x%016llx\n", elf64ph->p_filesz);
53 printf("p_memsz 0x%016llx\n", elf64ph->p_memsz);
54 printf("p_align 0x%016llx\n", elf64ph->p_align);
55 printf("... skipping 0x%08lx bytes of ELF header\n",
56 (unsigned long)elf64ph->p_offset);
57#endif
58
59 return 64;
60}
13void get4k(FILE *file, char *buf ) 61void get4k(FILE *file, char *buf )
14{ 62{
15 unsigned j; 63 unsigned j;
@@ -34,97 +82,92 @@ void death(const char *msg, FILE *fdesc, const char *fname)
34int main(int argc, char **argv) 82int main(int argc, char **argv)
35{ 83{
36 char inbuf[4096]; 84 char inbuf[4096];
37 FILE *ramDisk = NULL; 85 struct addr_range vmlinux;
38 FILE *sysmap = NULL; 86 FILE *ramDisk;
39 FILE *inputVmlinux = NULL; 87 FILE *inputVmlinux;
40 FILE *outputVmlinux = NULL; 88 FILE *outputVmlinux;
41 89
42 unsigned i = 0; 90 char *rd_name, *lx_name, *out_name;
43 unsigned long ramFileLen = 0; 91
44 unsigned long ramLen = 0; 92 size_t i;
45 unsigned long roundR = 0; 93 unsigned long ramFileLen;
46 94 unsigned long ramLen;
47 unsigned long sysmapFileLen = 0; 95 unsigned long roundR;
48 unsigned long sysmapLen = 0; 96 unsigned long offset_end;
49 unsigned long sysmapPages = 0; 97
50 char* ptr_end = NULL; 98 unsigned long kernelLen;
51 unsigned long offset_end = 0; 99 unsigned long actualKernelLen;
52 100 unsigned long round;
53 unsigned long kernelLen = 0; 101 unsigned long roundedKernelLen;
54 unsigned long actualKernelLen = 0; 102 unsigned long ramStartOffs;
55 unsigned long round = 0; 103 unsigned long ramPages;
56 unsigned long roundedKernelLen = 0; 104 unsigned long roundedKernelPages;
57 unsigned long ramStartOffs = 0; 105 unsigned long hvReleaseData;
58 unsigned long ramPages = 0;
59 unsigned long roundedKernelPages = 0;
60 unsigned long hvReleaseData = 0;
61 u_int32_t eyeCatcher = 0xc8a5d9c4; 106 u_int32_t eyeCatcher = 0xc8a5d9c4;
62 unsigned long naca = 0; 107 unsigned long naca;
63 unsigned long xRamDisk = 0; 108 unsigned long xRamDisk;
64 unsigned long xRamDiskSize = 0; 109 unsigned long xRamDiskSize;
65 long padPages = 0; 110 long padPages;
66 111
67 112
68 if (argc < 2) { 113 if (argc < 2) {
69 fprintf(stderr, "Name of RAM disk file missing.\n"); 114 fprintf(stderr, "Name of RAM disk file missing.\n");
70 exit(1); 115 exit(1);
71 } 116 }
117 rd_name = argv[1];
72 118
73 if (argc < 3) { 119 if (argc < 3) {
74 fprintf(stderr, "Name of System Map input file is missing.\n");
75 exit(1);
76 }
77
78 if (argc < 4) {
79 fprintf(stderr, "Name of vmlinux file missing.\n"); 120 fprintf(stderr, "Name of vmlinux file missing.\n");
80 exit(1); 121 exit(1);
81 } 122 }
123 lx_name = argv[2];
82 124
83 if (argc < 5) { 125 if (argc < 4) {
84 fprintf(stderr, "Name of vmlinux output file missing.\n"); 126 fprintf(stderr, "Name of vmlinux output file missing.\n");
85 exit(1); 127 exit(1);
86 } 128 }
129 out_name = argv[3];
87 130
88 131
89 ramDisk = fopen(argv[1], "r"); 132 ramDisk = fopen(rd_name, "r");
90 if ( ! ramDisk ) { 133 if ( ! ramDisk ) {
91 fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", argv[1]); 134 fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", rd_name);
92 exit(1); 135 exit(1);
93 } 136 }
94 137
95 sysmap = fopen(argv[2], "r"); 138 inputVmlinux = fopen(lx_name, "r");
96 if ( ! sysmap ) {
97 fprintf(stderr, "System Map file \"%s\" failed to open.\n", argv[2]);
98 exit(1);
99 }
100
101 inputVmlinux = fopen(argv[3], "r");
102 if ( ! inputVmlinux ) { 139 if ( ! inputVmlinux ) {
103 fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", argv[3]); 140 fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", lx_name);
104 exit(1); 141 exit(1);
105 } 142 }
106 143
107 outputVmlinux = fopen(argv[4], "w+"); 144 outputVmlinux = fopen(out_name, "w+");
108 if ( ! outputVmlinux ) { 145 if ( ! outputVmlinux ) {
109 fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", argv[4]); 146 fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", out_name);
110 exit(1); 147 exit(1);
111 } 148 }
112 149
113 150 i = fread(inbuf, 1, sizeof(inbuf), inputVmlinux);
114 151 if (i != sizeof(inbuf)) {
152 fprintf(stderr, "can not read vmlinux file %s: %u\n", lx_name, i);
153 exit(1);
154 }
155
156 i = check_elf64(inbuf, sizeof(inbuf), &vmlinux);
157 if (i == 0) {
158 fprintf(stderr, "You must have a linux kernel specified as argv[2]\n");
159 exit(1);
160 }
161
115 /* Input Vmlinux file */ 162 /* Input Vmlinux file */
116 fseek(inputVmlinux, 0, SEEK_END); 163 fseek(inputVmlinux, 0, SEEK_END);
117 kernelLen = ftell(inputVmlinux); 164 kernelLen = ftell(inputVmlinux);
118 fseek(inputVmlinux, 0, SEEK_SET); 165 fseek(inputVmlinux, 0, SEEK_SET);
119 printf("kernel file size = %d\n", kernelLen); 166 printf("kernel file size = %lu\n", kernelLen);
120 if ( kernelLen == 0 ) {
121 fprintf(stderr, "You must have a linux kernel specified as argv[3]\n");
122 exit(1);
123 }
124 167
125 actualKernelLen = kernelLen - ElfHeaderSize; 168 actualKernelLen = kernelLen - ElfHeaderSize;
126 169
127 printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); 170 printf("actual kernel length (minus ELF header) = %lu\n", actualKernelLen);
128 171
129 round = actualKernelLen % 4096; 172 round = actualKernelLen % 4096;
130 roundedKernelLen = actualKernelLen; 173 roundedKernelLen = actualKernelLen;
@@ -134,39 +177,7 @@ int main(int argc, char **argv)
134 roundedKernelPages = roundedKernelLen / 4096; 177 roundedKernelPages = roundedKernelLen / 4096;
135 printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages); 178 printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages);
136 179
137 180 offset_end = _ALIGN_UP(vmlinux.memsize, 4096);
138
139 /* Input System Map file */
140 /* (needs to be processed simply to determine if we need to add pad pages due to the static variables not being included in the vmlinux) */
141 fseek(sysmap, 0, SEEK_END);
142 sysmapFileLen = ftell(sysmap);
143 fseek(sysmap, 0, SEEK_SET);
144 printf("%s file size = %ld/0x%lx \n", argv[2], sysmapFileLen, sysmapFileLen);
145
146 sysmapLen = sysmapFileLen;
147
148 roundR = 4096 - (sysmapLen % 4096);
149 if (roundR) {
150 printf("Rounding System Map file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR);
151 sysmapLen += roundR;
152 }
153 printf("Rounded System Map size is %ld/0x%lx \n", sysmapLen, sysmapLen);
154
155 /* Process the Sysmap file to determine where _end is */
156 sysmapPages = sysmapLen / 4096;
157 /* read the whole file line by line, expect that it doesn't fail */
158 while ( fgets(inbuf, 4096, sysmap) ) ;
159 /* search for _end in the last page of the system map */
160 ptr_end = strstr(inbuf, " _end");
161 if (!ptr_end) {
162 fprintf(stderr, "Unable to find _end in the sysmap file \n");
163 fprintf(stderr, "inbuf: \n");
164 fprintf(stderr, "%s \n", inbuf);
165 exit(1);
166 }
167 printf("Found _end in the last page of the sysmap - backing up 10 characters it looks like %s", ptr_end-10);
168 /* convert address of _end in system map to hex offset. */
169 offset_end = (unsigned int)strtol(ptr_end-10, NULL, 16);
170 /* calc how many pages we need to insert between the vmlinux and the start of the ram disk */ 181 /* calc how many pages we need to insert between the vmlinux and the start of the ram disk */
171 padPages = offset_end/4096 - roundedKernelPages; 182 padPages = offset_end/4096 - roundedKernelPages;
172 183
@@ -194,7 +205,7 @@ int main(int argc, char **argv)
194 fseek(ramDisk, 0, SEEK_END); 205 fseek(ramDisk, 0, SEEK_END);
195 ramFileLen = ftell(ramDisk); 206 ramFileLen = ftell(ramDisk);
196 fseek(ramDisk, 0, SEEK_SET); 207 fseek(ramDisk, 0, SEEK_SET);
197 printf("%s file size = %ld/0x%lx \n", argv[1], ramFileLen, ramFileLen); 208 printf("%s file size = %ld/0x%lx \n", rd_name, ramFileLen, ramFileLen);
198 209
199 ramLen = ramFileLen; 210 ramLen = ramFileLen;
200 211
@@ -248,19 +259,19 @@ int main(int argc, char **argv)
248 /* fseek to the hvReleaseData pointer */ 259 /* fseek to the hvReleaseData pointer */
249 fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET); 260 fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET);
250 if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) { 261 if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) {
251 death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[4]); 262 death("Could not read hvReleaseData pointer\n", outputVmlinux, out_name);
252 } 263 }
253 hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */ 264 hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */
254 printf("hvReleaseData is at %08x\n", hvReleaseData); 265 printf("hvReleaseData is at %08lx\n", hvReleaseData);
255 266
256 /* fseek to the hvReleaseData */ 267 /* fseek to the hvReleaseData */
257 fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET); 268 fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET);
258 if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) { 269 if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) {
259 death("Could not read hvReleaseData\n", outputVmlinux, argv[4]); 270 death("Could not read hvReleaseData\n", outputVmlinux, out_name);
260 } 271 }
261 /* Check hvReleaseData sanity */ 272 /* Check hvReleaseData sanity */
262 if (memcmp(inbuf, &eyeCatcher, 4) != 0) { 273 if (memcmp(inbuf, &eyeCatcher, 4) != 0) {
263 death("hvReleaseData is invalid\n", outputVmlinux, argv[4]); 274 death("hvReleaseData is invalid\n", outputVmlinux, out_name);
264 } 275 }
265 /* Get the naca pointer */ 276 /* Get the naca pointer */
266 naca = ntohl(*((u_int32_t*) &inbuf[0x0C])) - KERNELBASE; 277 naca = ntohl(*((u_int32_t*) &inbuf[0x0C])) - KERNELBASE;
@@ -269,13 +280,13 @@ int main(int argc, char **argv)
269 /* fseek to the naca */ 280 /* fseek to the naca */
270 fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); 281 fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
271 if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) { 282 if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) {
272 death("Could not read naca\n", outputVmlinux, argv[4]); 283 death("Could not read naca\n", outputVmlinux, out_name);
273 } 284 }
274 xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c])); 285 xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c]));
275 xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14])); 286 xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14]));
276 /* Make sure a RAM disk isn't already present */ 287 /* Make sure a RAM disk isn't already present */
277 if ((xRamDisk != 0) || (xRamDiskSize != 0)) { 288 if ((xRamDisk != 0) || (xRamDiskSize != 0)) {
278 death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[4]); 289 death("RAM disk is already attached to this kernel\n", outputVmlinux, out_name);
279 } 290 }
280 /* Fill in the values */ 291 /* Fill in the values */
281 *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs); 292 *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs);
@@ -285,15 +296,15 @@ int main(int argc, char **argv)
285 fflush(outputVmlinux); 296 fflush(outputVmlinux);
286 fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); 297 fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
287 if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) { 298 if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) {
288 death("Could not write naca\n", outputVmlinux, argv[4]); 299 death("Could not write naca\n", outputVmlinux, out_name);
289 } 300 }
290 printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08x\n", 301 printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08lx\n",
291 ramPages, ramStartOffs); 302 ramPages, ramStartOffs);
292 303
293 /* Done */ 304 /* Done */
294 fclose(outputVmlinux); 305 fclose(outputVmlinux);
295 /* Set permission to executable */ 306 /* Set permission to executable */
296 chmod(argv[4], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); 307 chmod(out_name, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
297 308
298 return 0; 309 return 0;
299} 310}
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
index c441aebe7648..58b19f107656 100644
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -11,12 +11,11 @@ obj-y := misc.o prom.o
11 11
12endif 12endif
13 13
14obj-y += irq.o idle.o dma.o \ 14obj-y += idle.o dma.o \
15 align.o pacaData.o \ 15 align.o \
16 udbg.o ioctl32.o \ 16 udbg.o \
17 rtc.o \ 17 rtc.o \
18 cpu_setup_power4.o \ 18 iommu.o vdso.o
19 iommu.o sysfs.o vdso.o firmware.o
20obj-y += vdso32/ vdso64/ 19obj-y += vdso32/ vdso64/
21 20
22pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o 21pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o
@@ -31,15 +30,10 @@ endif
31obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o 30obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
32 31
33obj-$(CONFIG_KEXEC) += machine_kexec.o 32obj-$(CONFIG_KEXEC) += machine_kexec.o
34obj-$(CONFIG_EEH) += eeh.o
35obj-$(CONFIG_PROC_FS) += proc_ppc64.o
36obj-$(CONFIG_MODULES) += module.o 33obj-$(CONFIG_MODULES) += module.o
37ifneq ($(CONFIG_PPC_MERGE),y) 34ifneq ($(CONFIG_PPC_MERGE),y)
38obj-$(CONFIG_MODULES) += ppc_ksyms.o 35obj-$(CONFIG_MODULES) += ppc_ksyms.o
39endif 36endif
40obj-$(CONFIG_PPC_RTAS) += rtas_pci.o
41obj-$(CONFIG_SCANLOG) += scanlog.o
42obj-$(CONFIG_LPARCFG) += lparcfg.o
43obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o 37obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
44ifneq ($(CONFIG_PPC_MERGE),y) 38ifneq ($(CONFIG_PPC_MERGE),y)
45obj-$(CONFIG_BOOTX_TEXT) += btext.o 39obj-$(CONFIG_BOOTX_TEXT) += btext.o
@@ -52,8 +46,6 @@ obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
52 46
53obj-$(CONFIG_KPROBES) += kprobes.o 47obj-$(CONFIG_KPROBES) += kprobes.o
54 48
55CFLAGS_ioctl32.o += -Ifs/
56
57ifneq ($(CONFIG_PPC_MERGE),y) 49ifneq ($(CONFIG_PPC_MERGE),y)
58ifeq ($(CONFIG_PPC_ISERIES),y) 50ifeq ($(CONFIG_PPC_ISERIES),y)
59arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s 51arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
index bce9065da6cb..84ab5c18ef52 100644
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -74,7 +74,6 @@ int main(void)
74 DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); 74 DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
75 DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); 75 DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
76 DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); 76 DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
77 DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
78 DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); 77 DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
79 78
80 /* paca */ 79 /* paca */
diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S
deleted file mode 100644
index 1fb673c511ff..000000000000
--- a/arch/ppc64/kernel/cpu_setup_power4.S
+++ /dev/null
@@ -1,233 +0,0 @@
1/*
2 * This file contains low level CPU setup functions.
3 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 */
11
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/page.h>
15#include <asm/cputable.h>
16#include <asm/ppc_asm.h>
17#include <asm/asm-offsets.h>
18#include <asm/cache.h>
19
20_GLOBAL(__970_cpu_preinit)
21 /*
22 * Do nothing if not running in HV mode
23 */
24 mfmsr r0
25 rldicl. r0,r0,4,63
26 beqlr
27
28 /*
29 * Deal only with PPC970 and PPC970FX.
30 */
31 mfspr r0,SPRN_PVR
32 srwi r0,r0,16
33 cmpwi r0,0x39
34 beq 1f
35 cmpwi r0,0x3c
36 beq 1f
37 cmpwi r0,0x44
38 bnelr
391:
40
41 /* Make sure HID4:rm_ci is off before MMU is turned off, that large
42 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
43 * HID5:DCBZ32_ill
44 */
45 li r0,0
46 mfspr r3,SPRN_HID4
47 rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
48 rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
49 sync
50 mtspr SPRN_HID4,r3
51 isync
52 sync
53 mfspr r3,SPRN_HID5
54 rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
55 sync
56 mtspr SPRN_HID5,r3
57 isync
58 sync
59
60 /* Setup some basic HID1 features */
61 mfspr r0,SPRN_HID1
62 li r3,0x1200 /* enable i-fetch cacheability */
63 sldi r3,r3,44 /* and prefetch */
64 or r0,r0,r3
65 mtspr SPRN_HID1,r0
66 mtspr SPRN_HID1,r0
67 isync
68
69 /* Clear HIOR */
70 li r0,0
71 sync
72 mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
73 isync
74 blr
75
76_GLOBAL(__setup_cpu_power4)
77 blr
78
79_GLOBAL(__setup_cpu_be)
80 /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
81 addi r3, 0, 0
82 ori r3, r3, HID6_LB
83 sldi r3, r3, 32
84 nor r3, r3, r3
85 mfspr r4, SPRN_HID6
86 and r4, r4, r3
87 addi r3, 0, 0x02000
88 sldi r3, r3, 32
89 or r4, r4, r3
90 mtspr SPRN_HID6, r4
91 blr
92
93_GLOBAL(__setup_cpu_ppc970)
94 mfspr r0,SPRN_HID0
95 li r11,5 /* clear DOZE and SLEEP */
96 rldimi r0,r11,52,8 /* set NAP and DPM */
97 mtspr SPRN_HID0,r0
98 mfspr r0,SPRN_HID0
99 mfspr r0,SPRN_HID0
100 mfspr r0,SPRN_HID0
101 mfspr r0,SPRN_HID0
102 mfspr r0,SPRN_HID0
103 mfspr r0,SPRN_HID0
104 sync
105 isync
106 blr
107
108/* Definitions for the table use to save CPU states */
109#define CS_HID0 0
110#define CS_HID1 8
111#define CS_HID4 16
112#define CS_HID5 24
113#define CS_SIZE 32
114
115 .data
116 .balign L1_CACHE_BYTES,0
117cpu_state_storage:
118 .space CS_SIZE
119 .balign L1_CACHE_BYTES,0
120 .text
121
122/* Called in normal context to backup CPU 0 state. This
123 * does not include cache settings. This function is also
124 * called for machine sleep. This does not include the MMU
125 * setup, BATs, etc... but rather the "special" registers
126 * like HID0, HID1, HID4, etc...
127 */
128_GLOBAL(__save_cpu_setup)
129 /* Some CR fields are volatile, we back it up all */
130 mfcr r7
131
132 /* Get storage ptr */
133 LOADADDR(r5,cpu_state_storage)
134
135 /* We only deal with 970 for now */
136 mfspr r0,SPRN_PVR
137 srwi r0,r0,16
138 cmpwi r0,0x39
139 beq 1f
140 cmpwi r0,0x3c
141 beq 1f
142 cmpwi r0,0x44
143 bne 2f
144
1451: /* Save HID0,1,4 and 5 */
146 mfspr r3,SPRN_HID0
147 std r3,CS_HID0(r5)
148 mfspr r3,SPRN_HID1
149 std r3,CS_HID1(r5)
150 mfspr r3,SPRN_HID4
151 std r3,CS_HID4(r5)
152 mfspr r3,SPRN_HID5
153 std r3,CS_HID5(r5)
154
1552:
156 mtcr r7
157 blr
158
159/* Called with no MMU context (typically MSR:IR/DR off) to
160 * restore CPU state as backed up by the previous
161 * function. This does not include cache setting
162 */
163_GLOBAL(__restore_cpu_setup)
164 /* Get storage ptr (FIXME when using anton reloc as we
165 * are running with translation disabled here
166 */
167 LOADADDR(r5,cpu_state_storage)
168
169 /* We only deal with 970 for now */
170 mfspr r0,SPRN_PVR
171 srwi r0,r0,16
172 cmpwi r0,0x39
173 beq 1f
174 cmpwi r0,0x3c
175 beq 1f
176 cmpwi r0,0x44
177 bnelr
178
1791: /* Before accessing memory, we make sure rm_ci is clear */
180 li r0,0
181 mfspr r3,SPRN_HID4
182 rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
183 sync
184 mtspr SPRN_HID4,r3
185 isync
186 sync
187
188 /* Clear interrupt prefix */
189 li r0,0
190 sync
191 mtspr SPRN_HIOR,0
192 isync
193
194 /* Restore HID0 */
195 ld r3,CS_HID0(r5)
196 sync
197 isync
198 mtspr SPRN_HID0,r3
199 mfspr r3,SPRN_HID0
200 mfspr r3,SPRN_HID0
201 mfspr r3,SPRN_HID0
202 mfspr r3,SPRN_HID0
203 mfspr r3,SPRN_HID0
204 mfspr r3,SPRN_HID0
205 sync
206 isync
207
208 /* Restore HID1 */
209 ld r3,CS_HID1(r5)
210 sync
211 isync
212 mtspr SPRN_HID1,r3
213 mtspr SPRN_HID1,r3
214 sync
215 isync
216
217 /* Restore HID4 */
218 ld r3,CS_HID4(r5)
219 sync
220 isync
221 mtspr SPRN_HID4,r3
222 sync
223 isync
224
225 /* Restore HID5 */
226 ld r3,CS_HID5(r5)
227 sync
228 isync
229 mtspr SPRN_HID5,r3
230 sync
231 isync
232 blr
233
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
deleted file mode 100644
index 035d1b14a207..000000000000
--- a/arch/ppc64/kernel/eeh.c
+++ /dev/null
@@ -1,943 +0,0 @@
1/*
2 * eeh.c
3 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/bootmem.h>
21#include <linux/init.h>
22#include <linux/list.h>
23#include <linux/mm.h>
24#include <linux/notifier.h>
25#include <linux/pci.h>
26#include <linux/proc_fs.h>
27#include <linux/rbtree.h>
28#include <linux/seq_file.h>
29#include <linux/spinlock.h>
30#include <asm/eeh.h>
31#include <asm/io.h>
32#include <asm/machdep.h>
33#include <asm/rtas.h>
34#include <asm/atomic.h>
35#include <asm/systemcfg.h>
36#include <asm/ppc-pci.h>
37
38#undef DEBUG
39
40/** Overview:
41 * EEH, or "Extended Error Handling" is a PCI bridge technology for
42 * dealing with PCI bus errors that can't be dealt with within the
43 * usual PCI framework, except by check-stopping the CPU. Systems
44 * that are designed for high-availability/reliability cannot afford
45 * to crash due to a "mere" PCI error, thus the need for EEH.
46 * An EEH-capable bridge operates by converting a detected error
47 * into a "slot freeze", taking the PCI adapter off-line, making
48 * the slot behave, from the OS'es point of view, as if the slot
49 * were "empty": all reads return 0xff's and all writes are silently
50 * ignored. EEH slot isolation events can be triggered by parity
51 * errors on the address or data busses (e.g. during posted writes),
52 * which in turn might be caused by dust, vibration, humidity,
53 * radioactivity or plain-old failed hardware.
54 *
55 * Note, however, that one of the leading causes of EEH slot
56 * freeze events are buggy device drivers, buggy device microcode,
57 * or buggy device hardware. This is because any attempt by the
58 * device to bus-master data to a memory address that is not
59 * assigned to the device will trigger a slot freeze. (The idea
60 * is to prevent devices-gone-wild from corrupting system memory).
61 * Buggy hardware/drivers will have a miserable time co-existing
62 * with EEH.
63 *
64 * Ideally, a PCI device driver, when suspecting that an isolation
65 * event has occured (e.g. by reading 0xff's), will then ask EEH
66 * whether this is the case, and then take appropriate steps to
67 * reset the PCI slot, the PCI device, and then resume operations.
68 * However, until that day, the checking is done here, with the
69 * eeh_check_failure() routine embedded in the MMIO macros. If
70 * the slot is found to be isolated, an "EEH Event" is synthesized
71 * and sent out for processing.
72 */
73
74/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
75#define BUID_HI(buid) ((buid) >> 32)
76#define BUID_LO(buid) ((buid) & 0xffffffff)
77
78/* EEH event workqueue setup. */
79static DEFINE_SPINLOCK(eeh_eventlist_lock);
80LIST_HEAD(eeh_eventlist);
81static void eeh_event_handler(void *);
82DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL);
83
84static struct notifier_block *eeh_notifier_chain;
85
86/*
87 * If a device driver keeps reading an MMIO register in an interrupt
88 * handler after a slot isolation event has occurred, we assume it
89 * is broken and panic. This sets the threshold for how many read
90 * attempts we allow before panicking.
91 */
92#define EEH_MAX_FAILS 1000
93static atomic_t eeh_fail_count;
94
95/* RTAS tokens */
96static int ibm_set_eeh_option;
97static int ibm_set_slot_reset;
98static int ibm_read_slot_reset_state;
99static int ibm_read_slot_reset_state2;
100static int ibm_slot_error_detail;
101
102static int eeh_subsystem_enabled;
103
104/* Buffer for reporting slot-error-detail rtas calls */
105static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
106static DEFINE_SPINLOCK(slot_errbuf_lock);
107static int eeh_error_buf_size;
108
109/* System monitoring statistics */
110static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
111static DEFINE_PER_CPU(unsigned long, false_positives);
112static DEFINE_PER_CPU(unsigned long, ignored_failures);
113static DEFINE_PER_CPU(unsigned long, slot_resets);
114
115/**
116 * The pci address cache subsystem. This subsystem places
117 * PCI device address resources into a red-black tree, sorted
118 * according to the address range, so that given only an i/o
119 * address, the corresponding PCI device can be **quickly**
120 * found. It is safe to perform an address lookup in an interrupt
121 * context; this ability is an important feature.
122 *
123 * Currently, the only customer of this code is the EEH subsystem;
124 * thus, this code has been somewhat tailored to suit EEH better.
125 * In particular, the cache does *not* hold the addresses of devices
126 * for which EEH is not enabled.
127 *
128 * (Implementation Note: The RB tree seems to be better/faster
129 * than any hash algo I could think of for this problem, even
130 * with the penalty of slow pointer chases for d-cache misses).
131 */
132struct pci_io_addr_range
133{
134 struct rb_node rb_node;
135 unsigned long addr_lo;
136 unsigned long addr_hi;
137 struct pci_dev *pcidev;
138 unsigned int flags;
139};
140
141static struct pci_io_addr_cache
142{
143 struct rb_root rb_root;
144 spinlock_t piar_lock;
145} pci_io_addr_cache_root;
146
147static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
148{
149 struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
150
151 while (n) {
152 struct pci_io_addr_range *piar;
153 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
154
155 if (addr < piar->addr_lo) {
156 n = n->rb_left;
157 } else {
158 if (addr > piar->addr_hi) {
159 n = n->rb_right;
160 } else {
161 pci_dev_get(piar->pcidev);
162 return piar->pcidev;
163 }
164 }
165 }
166
167 return NULL;
168}
169
170/**
171 * pci_get_device_by_addr - Get device, given only address
172 * @addr: mmio (PIO) phys address or i/o port number
173 *
174 * Given an mmio phys address, or a port number, find a pci device
175 * that implements this address. Be sure to pci_dev_put the device
176 * when finished. I/O port numbers are assumed to be offset
177 * from zero (that is, they do *not* have pci_io_addr added in).
178 * It is safe to call this function within an interrupt.
179 */
180static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
181{
182 struct pci_dev *dev;
183 unsigned long flags;
184
185 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
186 dev = __pci_get_device_by_addr(addr);
187 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
188 return dev;
189}
190
191#ifdef DEBUG
192/*
193 * Handy-dandy debug print routine, does nothing more
194 * than print out the contents of our addr cache.
195 */
196static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
197{
198 struct rb_node *n;
199 int cnt = 0;
200
201 n = rb_first(&cache->rb_root);
202 while (n) {
203 struct pci_io_addr_range *piar;
204 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
205 printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
206 (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
207 piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
208 cnt++;
209 n = rb_next(n);
210 }
211}
212#endif
213
214/* Insert address range into the rb tree. */
215static struct pci_io_addr_range *
216pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
217 unsigned long ahi, unsigned int flags)
218{
219 struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
220 struct rb_node *parent = NULL;
221 struct pci_io_addr_range *piar;
222
223 /* Walk tree, find a place to insert into tree */
224 while (*p) {
225 parent = *p;
226 piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
227 if (alo < piar->addr_lo) {
228 p = &parent->rb_left;
229 } else if (ahi > piar->addr_hi) {
230 p = &parent->rb_right;
231 } else {
232 if (dev != piar->pcidev ||
233 alo != piar->addr_lo || ahi != piar->addr_hi) {
234 printk(KERN_WARNING "PIAR: overlapping address range\n");
235 }
236 return piar;
237 }
238 }
239 piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
240 if (!piar)
241 return NULL;
242
243 piar->addr_lo = alo;
244 piar->addr_hi = ahi;
245 piar->pcidev = dev;
246 piar->flags = flags;
247
248 rb_link_node(&piar->rb_node, parent, p);
249 rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
250
251 return piar;
252}
253
254static void __pci_addr_cache_insert_device(struct pci_dev *dev)
255{
256 struct device_node *dn;
257 struct pci_dn *pdn;
258 int i;
259 int inserted = 0;
260
261 dn = pci_device_to_OF_node(dev);
262 if (!dn) {
263 printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n",
264 pci_name(dev));
265 return;
266 }
267
268 /* Skip any devices for which EEH is not enabled. */
269 pdn = dn->data;
270 if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
271 pdn->eeh_mode & EEH_MODE_NOCHECK) {
272#ifdef DEBUG
273 printk(KERN_INFO "PCI: skip building address cache for=%s\n",
274 pci_name(dev));
275#endif
276 return;
277 }
278
279 /* The cache holds a reference to the device... */
280 pci_dev_get(dev);
281
282 /* Walk resources on this device, poke them into the tree */
283 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
284 unsigned long start = pci_resource_start(dev,i);
285 unsigned long end = pci_resource_end(dev,i);
286 unsigned int flags = pci_resource_flags(dev,i);
287
288 /* We are interested only bus addresses, not dma or other stuff */
289 if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
290 continue;
291 if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
292 continue;
293 pci_addr_cache_insert(dev, start, end, flags);
294 inserted = 1;
295 }
296
297 /* If there was nothing to add, the cache has no reference... */
298 if (!inserted)
299 pci_dev_put(dev);
300}
301
302/**
303 * pci_addr_cache_insert_device - Add a device to the address cache
304 * @dev: PCI device whose I/O addresses we are interested in.
305 *
306 * In order to support the fast lookup of devices based on addresses,
307 * we maintain a cache of devices that can be quickly searched.
308 * This routine adds a device to that cache.
309 */
310void pci_addr_cache_insert_device(struct pci_dev *dev)
311{
312 unsigned long flags;
313
314 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
315 __pci_addr_cache_insert_device(dev);
316 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
317}
318
319static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
320{
321 struct rb_node *n;
322 int removed = 0;
323
324restart:
325 n = rb_first(&pci_io_addr_cache_root.rb_root);
326 while (n) {
327 struct pci_io_addr_range *piar;
328 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
329
330 if (piar->pcidev == dev) {
331 rb_erase(n, &pci_io_addr_cache_root.rb_root);
332 removed = 1;
333 kfree(piar);
334 goto restart;
335 }
336 n = rb_next(n);
337 }
338
339 /* The cache no longer holds its reference to this device... */
340 if (removed)
341 pci_dev_put(dev);
342}
343
344/**
345 * pci_addr_cache_remove_device - remove pci device from addr cache
346 * @dev: device to remove
347 *
348 * Remove a device from the addr-cache tree.
349 * This is potentially expensive, since it will walk
350 * the tree multiple times (once per resource).
351 * But so what; device removal doesn't need to be that fast.
352 */
353void pci_addr_cache_remove_device(struct pci_dev *dev)
354{
355 unsigned long flags;
356
357 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
358 __pci_addr_cache_remove_device(dev);
359 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
360}
361
362/**
363 * pci_addr_cache_build - Build a cache of I/O addresses
364 *
365 * Build a cache of pci i/o addresses. This cache will be used to
366 * find the pci device that corresponds to a given address.
367 * This routine scans all pci busses to build the cache.
368 * Must be run late in boot process, after the pci controllers
369 * have been scaned for devices (after all device resources are known).
370 */
371void __init pci_addr_cache_build(void)
372{
373 struct pci_dev *dev = NULL;
374
375 spin_lock_init(&pci_io_addr_cache_root.piar_lock);
376
377 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
378 /* Ignore PCI bridges ( XXX why ??) */
379 if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
380 continue;
381 }
382 pci_addr_cache_insert_device(dev);
383 }
384
385#ifdef DEBUG
386 /* Verify tree built up above, echo back the list of addrs. */
387 pci_addr_cache_print(&pci_io_addr_cache_root);
388#endif
389}
390
391/* --------------------------------------------------------------- */
392/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
393
394/**
395 * eeh_register_notifier - Register to find out about EEH events.
396 * @nb: notifier block to callback on events
397 */
398int eeh_register_notifier(struct notifier_block *nb)
399{
400 return notifier_chain_register(&eeh_notifier_chain, nb);
401}
402
403/**
404 * eeh_unregister_notifier - Unregister to an EEH event notifier.
405 * @nb: notifier block to callback on events
406 */
407int eeh_unregister_notifier(struct notifier_block *nb)
408{
409 return notifier_chain_unregister(&eeh_notifier_chain, nb);
410}
411
412/**
413 * read_slot_reset_state - Read the reset state of a device node's slot
414 * @dn: device node to read
415 * @rets: array to return results in
416 */
417static int read_slot_reset_state(struct device_node *dn, int rets[])
418{
419 int token, outputs;
420 struct pci_dn *pdn = dn->data;
421
422 if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
423 token = ibm_read_slot_reset_state2;
424 outputs = 4;
425 } else {
426 token = ibm_read_slot_reset_state;
427 outputs = 3;
428 }
429
430 return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
431 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
432}
433
434/**
435 * eeh_panic - call panic() for an eeh event that cannot be handled.
436 * The philosophy of this routine is that it is better to panic and
437 * halt the OS than it is to risk possible data corruption by
438 * oblivious device drivers that don't know better.
439 *
440 * @dev pci device that had an eeh event
441 * @reset_state current reset state of the device slot
442 */
443static void eeh_panic(struct pci_dev *dev, int reset_state)
444{
445 /*
446 * XXX We should create a separate sysctl for this.
447 *
448 * Since the panic_on_oops sysctl is used to halt the system
449 * in light of potential corruption, we can use it here.
450 */
451 if (panic_on_oops)
452 panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,
453 pci_name(dev));
454 else {
455 __get_cpu_var(ignored_failures)++;
456 printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",
457 reset_state, pci_name(dev));
458 }
459}
460
461/**
462 * eeh_event_handler - dispatch EEH events. The detection of a frozen
463 * slot can occur inside an interrupt, where it can be hard to do
464 * anything about it. The goal of this routine is to pull these
465 * detection events out of the context of the interrupt handler, and
466 * re-dispatch them for processing at a later time in a normal context.
467 *
468 * @dummy - unused
469 */
470static void eeh_event_handler(void *dummy)
471{
472 unsigned long flags;
473 struct eeh_event *event;
474
475 while (1) {
476 spin_lock_irqsave(&eeh_eventlist_lock, flags);
477 event = NULL;
478 if (!list_empty(&eeh_eventlist)) {
479 event = list_entry(eeh_eventlist.next, struct eeh_event, list);
480 list_del(&event->list);
481 }
482 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
483 if (event == NULL)
484 break;
485
486 printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
487 "%s\n", event->reset_state,
488 pci_name(event->dev));
489
490 atomic_set(&eeh_fail_count, 0);
491 notifier_call_chain (&eeh_notifier_chain,
492 EEH_NOTIFY_FREEZE, event);
493
494 __get_cpu_var(slot_resets)++;
495
496 pci_dev_put(event->dev);
497 kfree(event);
498 }
499}
500
501/**
502 * eeh_token_to_phys - convert EEH address token to phys address
503 * @token i/o token, should be address in the form 0xE....
504 */
505static inline unsigned long eeh_token_to_phys(unsigned long token)
506{
507 pte_t *ptep;
508 unsigned long pa;
509
510 ptep = find_linux_pte(init_mm.pgd, token);
511 if (!ptep)
512 return token;
513 pa = pte_pfn(*ptep) << PAGE_SHIFT;
514
515 return pa | (token & (PAGE_SIZE-1));
516}
517
518/**
519 * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
520 * @dn device node
521 * @dev pci device, if known
522 *
523 * Check for an EEH failure for the given device node. Call this
524 * routine if the result of a read was all 0xff's and you want to
525 * find out if this is due to an EEH slot freeze. This routine
526 * will query firmware for the EEH status.
527 *
528 * Returns 0 if there has not been an EEH error; otherwise returns
529 * a non-zero value and queues up a solt isolation event notification.
530 *
531 * It is safe to call this routine in an interrupt context.
532 */
533int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
534{
535 int ret;
536 int rets[3];
537 unsigned long flags;
538 int rc, reset_state;
539 struct eeh_event *event;
540 struct pci_dn *pdn;
541
542 __get_cpu_var(total_mmio_ffs)++;
543
544 if (!eeh_subsystem_enabled)
545 return 0;
546
547 if (!dn)
548 return 0;
549 pdn = dn->data;
550
551 /* Access to IO BARs might get this far and still not want checking. */
552 if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
553 pdn->eeh_mode & EEH_MODE_NOCHECK) {
554 return 0;
555 }
556
557 if (!pdn->eeh_config_addr) {
558 return 0;
559 }
560
561 /*
562 * If we already have a pending isolation event for this
563 * slot, we know it's bad already, we don't need to check...
564 */
565 if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
566 atomic_inc(&eeh_fail_count);
567 if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
568 /* re-read the slot reset state */
569 if (read_slot_reset_state(dn, rets) != 0)
570 rets[0] = -1; /* reset state unknown */
571 eeh_panic(dev, rets[0]);
572 }
573 return 0;
574 }
575
576 /*
577 * Now test for an EEH failure. This is VERY expensive.
578 * Note that the eeh_config_addr may be a parent device
579 * in the case of a device behind a bridge, or it may be
580 * function zero of a multi-function device.
581 * In any case they must share a common PHB.
582 */
583 ret = read_slot_reset_state(dn, rets);
584 if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4))) {
585 __get_cpu_var(false_positives)++;
586 return 0;
587 }
588
589 /* prevent repeated reports of this failure */
590 pdn->eeh_mode |= EEH_MODE_ISOLATED;
591
592 reset_state = rets[0];
593
594 spin_lock_irqsave(&slot_errbuf_lock, flags);
595 memset(slot_errbuf, 0, eeh_error_buf_size);
596
597 rc = rtas_call(ibm_slot_error_detail,
598 8, 1, NULL, pdn->eeh_config_addr,
599 BUID_HI(pdn->phb->buid),
600 BUID_LO(pdn->phb->buid), NULL, 0,
601 virt_to_phys(slot_errbuf),
602 eeh_error_buf_size,
603 1 /* Temporary Error */);
604
605 if (rc == 0)
606 log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
607 spin_unlock_irqrestore(&slot_errbuf_lock, flags);
608
609 printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
610 rets[0], dn->name, dn->full_name);
611 event = kmalloc(sizeof(*event), GFP_ATOMIC);
612 if (event == NULL) {
613 eeh_panic(dev, reset_state);
614 return 1;
615 }
616
617 event->dev = dev;
618 event->dn = dn;
619 event->reset_state = reset_state;
620
621 /* We may or may not be called in an interrupt context */
622 spin_lock_irqsave(&eeh_eventlist_lock, flags);
623 list_add(&event->list, &eeh_eventlist);
624 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
625
626 /* Most EEH events are due to device driver bugs. Having
627 * a stack trace will help the device-driver authors figure
628 * out what happened. So print that out. */
629 dump_stack();
630 schedule_work(&eeh_event_wq);
631
632 return 0;
633}
634
635EXPORT_SYMBOL(eeh_dn_check_failure);
636
637/**
638 * eeh_check_failure - check if all 1's data is due to EEH slot freeze
639 * @token i/o token, should be address in the form 0xA....
640 * @val value, should be all 1's (XXX why do we need this arg??)
641 *
642 * Check for an eeh failure at the given token address.
643 * Check for an EEH failure at the given token address. Call this
644 * routine if the result of a read was all 0xff's and you want to
645 * find out if this is due to an EEH slot freeze event. This routine
646 * will query firmware for the EEH status.
647 *
648 * Note this routine is safe to call in an interrupt context.
649 */
650unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
651{
652 unsigned long addr;
653 struct pci_dev *dev;
654 struct device_node *dn;
655
656 /* Finding the phys addr + pci device; this is pretty quick. */
657 addr = eeh_token_to_phys((unsigned long __force) token);
658 dev = pci_get_device_by_addr(addr);
659 if (!dev)
660 return val;
661
662 dn = pci_device_to_OF_node(dev);
663 eeh_dn_check_failure (dn, dev);
664
665 pci_dev_put(dev);
666 return val;
667}
668
669EXPORT_SYMBOL(eeh_check_failure);
670
671struct eeh_early_enable_info {
672 unsigned int buid_hi;
673 unsigned int buid_lo;
674};
675
676/* Enable eeh for the given device node. */
677static void *early_enable_eeh(struct device_node *dn, void *data)
678{
679 struct eeh_early_enable_info *info = data;
680 int ret;
681 char *status = get_property(dn, "status", NULL);
682 u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
683 u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
684 u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
685 u32 *regs;
686 int enable;
687 struct pci_dn *pdn = dn->data;
688
689 pdn->eeh_mode = 0;
690
691 if (status && strcmp(status, "ok") != 0)
692 return NULL; /* ignore devices with bad status */
693
694 /* Ignore bad nodes. */
695 if (!class_code || !vendor_id || !device_id)
696 return NULL;
697
698 /* There is nothing to check on PCI to ISA bridges */
699 if (dn->type && !strcmp(dn->type, "isa")) {
700 pdn->eeh_mode |= EEH_MODE_NOCHECK;
701 return NULL;
702 }
703
704 /*
705 * Now decide if we are going to "Disable" EEH checking
706 * for this device. We still run with the EEH hardware active,
707 * but we won't be checking for ff's. This means a driver
708 * could return bad data (very bad!), an interrupt handler could
709 * hang waiting on status bits that won't change, etc.
710 * But there are a few cases like display devices that make sense.
711 */
712 enable = 1; /* i.e. we will do checking */
713 if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
714 enable = 0;
715
716 if (!enable)
717 pdn->eeh_mode |= EEH_MODE_NOCHECK;
718
719 /* Ok... see if this device supports EEH. Some do, some don't,
720 * and the only way to find out is to check each and every one. */
721 regs = (u32 *)get_property(dn, "reg", NULL);
722 if (regs) {
723 /* First register entry is addr (00BBSS00) */
724 /* Try to enable eeh */
725 ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
726 regs[0], info->buid_hi, info->buid_lo,
727 EEH_ENABLE);
728 if (ret == 0) {
729 eeh_subsystem_enabled = 1;
730 pdn->eeh_mode |= EEH_MODE_SUPPORTED;
731 pdn->eeh_config_addr = regs[0];
732#ifdef DEBUG
733 printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
734#endif
735 } else {
736
737 /* This device doesn't support EEH, but it may have an
738 * EEH parent, in which case we mark it as supported. */
739 if (dn->parent && dn->parent->data
740 && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
741 /* Parent supports EEH. */
742 pdn->eeh_mode |= EEH_MODE_SUPPORTED;
743 pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
744 return NULL;
745 }
746 }
747 } else {
748 printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
749 dn->full_name);
750 }
751
752 return NULL;
753}
754
755/*
756 * Initialize EEH by trying to enable it for all of the adapters in the system.
757 * As a side effect we can determine here if eeh is supported at all.
758 * Note that we leave EEH on so failed config cycles won't cause a machine
759 * check. If a user turns off EEH for a particular adapter they are really
760 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
761 * grant access to a slot if EEH isn't enabled, and so we always enable
762 * EEH for all slots/all devices.
763 *
764 * The eeh-force-off option disables EEH checking globally, for all slots.
765 * Even if force-off is set, the EEH hardware is still enabled, so that
766 * newer systems can boot.
767 */
768void __init eeh_init(void)
769{
770 struct device_node *phb, *np;
771 struct eeh_early_enable_info info;
772
773 np = of_find_node_by_path("/rtas");
774 if (np == NULL)
775 return;
776
777 ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
778 ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
779 ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
780 ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
781 ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
782
783 if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
784 return;
785
786 eeh_error_buf_size = rtas_token("rtas-error-log-max");
787 if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
788 eeh_error_buf_size = 1024;
789 }
790 if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
791 printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
792 "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
793 eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
794 }
795
796 /* Enable EEH for all adapters. Note that eeh requires buid's */
797 for (phb = of_find_node_by_name(NULL, "pci"); phb;
798 phb = of_find_node_by_name(phb, "pci")) {
799 unsigned long buid;
800 struct pci_dn *pci;
801
802 buid = get_phb_buid(phb);
803 if (buid == 0 || phb->data == NULL)
804 continue;
805
806 pci = phb->data;
807 info.buid_lo = BUID_LO(buid);
808 info.buid_hi = BUID_HI(buid);
809 traverse_pci_devices(phb, early_enable_eeh, &info);
810 }
811
812 if (eeh_subsystem_enabled)
813 printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
814 else
815 printk(KERN_WARNING "EEH: No capable adapters found\n");
816}
817
818/**
819 * eeh_add_device_early - enable EEH for the indicated device_node
820 * @dn: device node for which to set up EEH
821 *
822 * This routine must be used to perform EEH initialization for PCI
823 * devices that were added after system boot (e.g. hotplug, dlpar).
824 * This routine must be called before any i/o is performed to the
825 * adapter (inluding any config-space i/o).
826 * Whether this actually enables EEH or not for this device depends
827 * on the CEC architecture, type of the device, on earlier boot
828 * command-line arguments & etc.
829 */
830void eeh_add_device_early(struct device_node *dn)
831{
832 struct pci_controller *phb;
833 struct eeh_early_enable_info info;
834
835 if (!dn || !dn->data)
836 return;
837 phb = PCI_DN(dn)->phb;
838 if (NULL == phb || 0 == phb->buid) {
839 printk(KERN_WARNING "EEH: Expected buid but found none\n");
840 return;
841 }
842
843 info.buid_hi = BUID_HI(phb->buid);
844 info.buid_lo = BUID_LO(phb->buid);
845 early_enable_eeh(dn, &info);
846}
847EXPORT_SYMBOL(eeh_add_device_early);
848
849/**
850 * eeh_add_device_late - perform EEH initialization for the indicated pci device
851 * @dev: pci device for which to set up EEH
852 *
853 * This routine must be used to complete EEH initialization for PCI
854 * devices that were added after system boot (e.g. hotplug, dlpar).
855 */
856void eeh_add_device_late(struct pci_dev *dev)
857{
858 if (!dev || !eeh_subsystem_enabled)
859 return;
860
861#ifdef DEBUG
862 printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
863#endif
864
865 pci_addr_cache_insert_device (dev);
866}
867EXPORT_SYMBOL(eeh_add_device_late);
868
869/**
870 * eeh_remove_device - undo EEH setup for the indicated pci device
871 * @dev: pci device to be removed
872 *
873 * This routine should be when a device is removed from a running
874 * system (e.g. by hotplug or dlpar).
875 */
876void eeh_remove_device(struct pci_dev *dev)
877{
878 if (!dev || !eeh_subsystem_enabled)
879 return;
880
881 /* Unregister the device with the EEH/PCI address search system */
882#ifdef DEBUG
883 printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
884#endif
885 pci_addr_cache_remove_device(dev);
886}
887EXPORT_SYMBOL(eeh_remove_device);
888
889static int proc_eeh_show(struct seq_file *m, void *v)
890{
891 unsigned int cpu;
892 unsigned long ffs = 0, positives = 0, failures = 0;
893 unsigned long resets = 0;
894
895 for_each_cpu(cpu) {
896 ffs += per_cpu(total_mmio_ffs, cpu);
897 positives += per_cpu(false_positives, cpu);
898 failures += per_cpu(ignored_failures, cpu);
899 resets += per_cpu(slot_resets, cpu);
900 }
901
902 if (0 == eeh_subsystem_enabled) {
903 seq_printf(m, "EEH Subsystem is globally disabled\n");
904 seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
905 } else {
906 seq_printf(m, "EEH Subsystem is enabled\n");
907 seq_printf(m, "eeh_total_mmio_ffs=%ld\n"
908 "eeh_false_positives=%ld\n"
909 "eeh_ignored_failures=%ld\n"
910 "eeh_slot_resets=%ld\n"
911 "eeh_fail_count=%d\n",
912 ffs, positives, failures, resets,
913 eeh_fail_count.counter);
914 }
915
916 return 0;
917}
918
919static int proc_eeh_open(struct inode *inode, struct file *file)
920{
921 return single_open(file, proc_eeh_show, NULL);
922}
923
924static struct file_operations proc_eeh_operations = {
925 .open = proc_eeh_open,
926 .read = seq_read,
927 .llseek = seq_lseek,
928 .release = single_release,
929};
930
931static int __init eeh_init_proc(void)
932{
933 struct proc_dir_entry *e;
934
935 if (systemcfg->platform & PLATFORM_PSERIES) {
936 e = create_proc_entry("ppc64/eeh", 0, NULL);
937 if (e)
938 e->proc_fops = &proc_eeh_operations;
939 }
940
941 return 0;
942}
943__initcall(eeh_init_proc);
diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c
deleted file mode 100644
index d8432c0fb27d..000000000000
--- a/arch/ppc64/kernel/firmware.c
+++ /dev/null
@@ -1,47 +0,0 @@
1/*
2 * arch/ppc64/kernel/firmware.c
3 *
4 * Extracted from cputable.c
5 *
6 * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
7 *
8 * Modifications for ppc64:
9 * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
10 * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19
20#include <asm/firmware.h>
21
22unsigned long ppc64_firmware_features;
23
24#ifdef CONFIG_PPC_PSERIES
25firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
26 {FW_FEATURE_PFT, "hcall-pft"},
27 {FW_FEATURE_TCE, "hcall-tce"},
28 {FW_FEATURE_SPRG0, "hcall-sprg0"},
29 {FW_FEATURE_DABR, "hcall-dabr"},
30 {FW_FEATURE_COPY, "hcall-copy"},
31 {FW_FEATURE_ASR, "hcall-asr"},
32 {FW_FEATURE_DEBUG, "hcall-debug"},
33 {FW_FEATURE_PERF, "hcall-perf"},
34 {FW_FEATURE_DUMP, "hcall-dump"},
35 {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
36 {FW_FEATURE_MIGRATE, "hcall-migrate"},
37 {FW_FEATURE_PERFMON, "hcall-perfmon"},
38 {FW_FEATURE_CRQ, "hcall-crq"},
39 {FW_FEATURE_VIO, "hcall-vio"},
40 {FW_FEATURE_RDMA, "hcall-rdma"},
41 {FW_FEATURE_LLAN, "hcall-lLAN"},
42 {FW_FEATURE_BULK, "hcall-bulk"},
43 {FW_FEATURE_XDABR, "hcall-xdabr"},
44 {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
45 {FW_FEATURE_SPLPAR, "hcall-splpar"},
46};
47#endif
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index 9e8050ea1225..1c869ea72d28 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -28,7 +28,6 @@
28#include <asm/processor.h> 28#include <asm/processor.h>
29#include <asm/page.h> 29#include <asm/page.h>
30#include <asm/mmu.h> 30#include <asm/mmu.h>
31#include <asm/systemcfg.h>
32#include <asm/ppc_asm.h> 31#include <asm/ppc_asm.h>
33#include <asm/asm-offsets.h> 32#include <asm/asm-offsets.h>
34#include <asm/bug.h> 33#include <asm/bug.h>
@@ -1701,21 +1700,9 @@ _GLOBAL(__secondary_start)
1701 HMT_MEDIUM /* Set thread priority to MEDIUM */ 1700 HMT_MEDIUM /* Set thread priority to MEDIUM */
1702 1701
1703 ld r2,PACATOC(r13) 1702 ld r2,PACATOC(r13)
1704 li r6,0 1703
1705 stb r6,PACAPROCENABLED(r13) 1704 /* Do early setup for that CPU */
1706 1705 bl .early_setup_secondary
1707#ifndef CONFIG_PPC_ISERIES
1708 /* Initialize the page table pointer register. */
1709 LOADADDR(r6,_SDR1)
1710 ld r6,0(r6) /* get the value of _SDR1 */
1711 mtspr SPRN_SDR1,r6 /* set the htab location */
1712#endif
1713 /* Initialize the first segment table (or SLB) entry */
1714 ld r3,PACASTABVIRT(r13) /* get addr of segment table */
1715BEGIN_FTR_SECTION
1716 bl .stab_initialize
1717END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
1718 bl .slb_initialize
1719 1706
1720 /* Initialize the kernel stack. Just a repeat for iSeries. */ 1707 /* Initialize the kernel stack. Just a repeat for iSeries. */
1721 LOADADDR(r3,current_set) 1708 LOADADDR(r3,current_set)
@@ -1724,37 +1711,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
1724 addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD 1711 addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
1725 std r1,PACAKSAVE(r13) 1712 std r1,PACAKSAVE(r13)
1726 1713
1727 ld r3,PACASTABREAL(r13) /* get raddr of segment table */
1728 ori r4,r3,1 /* turn on valid bit */
1729
1730#ifdef CONFIG_PPC_ISERIES
1731 li r0,-1 /* hypervisor call */
1732 li r3,1
1733 sldi r3,r3,63 /* 0x8000000000000000 */
1734 ori r3,r3,4 /* 0x8000000000000004 */
1735 sc /* HvCall_setASR */
1736#else
1737 /* set the ASR */
1738 ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
1739 ld r3,0(r3)
1740 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1741 andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
1742 beq 98f /* branch if result is 0 */
1743 mfspr r3,SPRN_PVR
1744 srwi r3,r3,16
1745 cmpwi r3,0x37 /* SStar */
1746 beq 97f
1747 cmpwi r3,0x36 /* IStar */
1748 beq 97f
1749 cmpwi r3,0x34 /* Pulsar */
1750 bne 98f
175197: li r3,H_SET_ASR /* hcall = H_SET_ASR */
1752 HVSC /* Invoking hcall */
1753 b 99f
175498: /* !(rpa hypervisor) || !(star) */
1755 mtasr r4 /* set the stab location */
175699:
1757#endif
1758 li r7,0 1714 li r7,0
1759 mtlr r7 1715 mtlr r7
1760 1716
@@ -1896,40 +1852,6 @@ _STATIC(start_here_multiplatform)
1896 mr r3,r31 1852 mr r3,r31
1897 bl .early_setup 1853 bl .early_setup
1898 1854
1899 /* set the ASR */
1900 ld r3,PACASTABREAL(r13)
1901 ori r4,r3,1 /* turn on valid bit */
1902 ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
1903 ld r3,0(r3)
1904 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1905 andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
1906 beq 98f /* branch if result is 0 */
1907 mfspr r3,SPRN_PVR
1908 srwi r3,r3,16
1909 cmpwi r3,0x37 /* SStar */
1910 beq 97f
1911 cmpwi r3,0x36 /* IStar */
1912 beq 97f
1913 cmpwi r3,0x34 /* Pulsar */
1914 bne 98f
191597: li r3,H_SET_ASR /* hcall = H_SET_ASR */
1916 HVSC /* Invoking hcall */
1917 b 99f
191898: /* !(rpa hypervisor) || !(star) */
1919 mtasr r4 /* set the stab location */
192099:
1921 /* Set SDR1 (hash table pointer) */
1922 ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
1923 ld r3,0(r3)
1924 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1925 /* Test if bit 0 is set (LPAR bit) */
1926 andi. r3,r3,PLATFORM_LPAR
1927 bne 98f /* branch if result is !0 */
1928 LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
1929 sub r6,r6,r26
1930 ld r6,0(r6) /* get the value of _SDR1 */
1931 mtspr SPRN_SDR1,r6 /* set the htab location */
193298:
1933 LOADADDR(r3,.start_here_common) 1855 LOADADDR(r3,.start_here_common)
1934 SET_REG_TO_CONST(r4, MSR_KERNEL) 1856 SET_REG_TO_CONST(r4, MSR_KERNEL)
1935 mtspr SPRN_SRR0,r3 1857 mtspr SPRN_SRR0,r3
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
index 715bc0e71e0f..b879d3057ef8 100644
--- a/arch/ppc64/kernel/idle.c
+++ b/arch/ppc64/kernel/idle.c
@@ -26,7 +26,6 @@
26#include <asm/processor.h> 26#include <asm/processor.h>
27#include <asm/cputable.h> 27#include <asm/cputable.h>
28#include <asm/time.h> 28#include <asm/time.h>
29#include <asm/systemcfg.h>
30#include <asm/machdep.h> 29#include <asm/machdep.h>
31#include <asm/smp.h> 30#include <asm/smp.h>
32 31
diff --git a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c
deleted file mode 100644
index ba4a899045c2..000000000000
--- a/arch/ppc64/kernel/ioctl32.c
+++ /dev/null
@@ -1,49 +0,0 @@
1/*
2 * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
3 *
4 * Based on sparc64 ioctl32.c by:
5 *
6 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
7 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
8 *
9 * ppc64 changes:
10 *
11 * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com)
12 * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com)
13 *
14 * These routines maintain argument size conversion between 32bit and 64bit
15 * ioctls.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 */
22
23#define INCLUDES
24#include "compat_ioctl.c"
25#include <linux/syscalls.h>
26
27#define CODE
28#include "compat_ioctl.c"
29
30#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
31#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
32
33#define IOCTL_TABLE_START \
34 struct ioctl_trans ioctl_start[] = {
35#define IOCTL_TABLE_END \
36 };
37
38IOCTL_TABLE_START
39#include <linux/compat_ioctl.h>
40#define DECLARES
41#include "compat_ioctl.c"
42
43/* Little p (/dev/rtc, /dev/envctrl, etc.) */
44COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
45COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
46
47IOCTL_TABLE_END
48
49int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c
deleted file mode 100644
index 87474584033f..000000000000
--- a/arch/ppc64/kernel/irq.c
+++ /dev/null
@@ -1,519 +0,0 @@
1/*
2 * arch/ppc/kernel/irq.c
3 *
4 * Derived from arch/i386/kernel/irq.c
5 * Copyright (C) 1992 Linus Torvalds
6 * Adapted from arch/i386 by Gary Thomas
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 * Updated and modified by Cort Dougan (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Cort Dougan
10 * Adapted for Power Macintosh by Paul Mackerras
11 * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
12 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * This file contains the code used by various IRQ handling routines:
20 * asking for different IRQ's should be done through these routines
21 * instead of just grabbing them. Thus setups with different IRQ numbers
22 * shouldn't result in any weird surprises, and installing new handlers
23 * should be easier.
24 */
25
26#include <linux/errno.h>
27#include <linux/module.h>
28#include <linux/threads.h>
29#include <linux/kernel_stat.h>
30#include <linux/signal.h>
31#include <linux/sched.h>
32#include <linux/ioport.h>
33#include <linux/interrupt.h>
34#include <linux/timex.h>
35#include <linux/config.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/pci.h>
39#include <linux/delay.h>
40#include <linux/irq.h>
41#include <linux/proc_fs.h>
42#include <linux/random.h>
43#include <linux/kallsyms.h>
44#include <linux/profile.h>
45#include <linux/bitops.h>
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <asm/io.h>
50#include <asm/pgtable.h>
51#include <asm/irq.h>
52#include <asm/cache.h>
53#include <asm/prom.h>
54#include <asm/ptrace.h>
55#include <asm/iseries/it_lp_queue.h>
56#include <asm/machdep.h>
57#include <asm/paca.h>
58
59#ifdef CONFIG_SMP
60extern void iSeries_smp_message_recv( struct pt_regs * );
61#endif
62
63extern irq_desc_t irq_desc[NR_IRQS];
64EXPORT_SYMBOL(irq_desc);
65
66int distribute_irqs = 1;
67int __irq_offset_value;
68int ppc_spurious_interrupts;
69u64 ppc64_interrupt_controller;
70
71int show_interrupts(struct seq_file *p, void *v)
72{
73 int i = *(loff_t *) v, j;
74 struct irqaction * action;
75 irq_desc_t *desc;
76 unsigned long flags;
77
78 if (i == 0) {
79 seq_printf(p, " ");
80 for (j=0; j<NR_CPUS; j++) {
81 if (cpu_online(j))
82 seq_printf(p, "CPU%d ",j);
83 }
84 seq_putc(p, '\n');
85 }
86
87 if (i < NR_IRQS) {
88 desc = get_irq_desc(i);
89 spin_lock_irqsave(&desc->lock, flags);
90 action = desc->action;
91 if (!action || !action->handler)
92 goto skip;
93 seq_printf(p, "%3d: ", i);
94#ifdef CONFIG_SMP
95 for (j = 0; j < NR_CPUS; j++) {
96 if (cpu_online(j))
97 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
98 }
99#else
100 seq_printf(p, "%10u ", kstat_irqs(i));
101#endif /* CONFIG_SMP */
102 if (desc->handler)
103 seq_printf(p, " %s ", desc->handler->typename );
104 else
105 seq_printf(p, " None ");
106 seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
107 seq_printf(p, " %s",action->name);
108 for (action=action->next; action; action = action->next)
109 seq_printf(p, ", %s", action->name);
110 seq_putc(p, '\n');
111skip:
112 spin_unlock_irqrestore(&desc->lock, flags);
113 } else if (i == NR_IRQS)
114 seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
115 return 0;
116}
117
118#ifdef CONFIG_HOTPLUG_CPU
119void fixup_irqs(cpumask_t map)
120{
121 unsigned int irq;
122 static int warned;
123
124 for_each_irq(irq) {
125 cpumask_t mask;
126
127 if (irq_desc[irq].status & IRQ_PER_CPU)
128 continue;
129
130 cpus_and(mask, irq_affinity[irq], map);
131 if (any_online_cpu(mask) == NR_CPUS) {
132 printk("Breaking affinity for irq %i\n", irq);
133 mask = map;
134 }
135 if (irq_desc[irq].handler->set_affinity)
136 irq_desc[irq].handler->set_affinity(irq, mask);
137 else if (irq_desc[irq].action && !(warned++))
138 printk("Cannot set affinity for irq %i\n", irq);
139 }
140
141 local_irq_enable();
142 mdelay(1);
143 local_irq_disable();
144}
145#endif
146
147extern int noirqdebug;
148
149/*
150 * Eventually, this should take an array of interrupts and an array size
151 * so it can dispatch multiple interrupts.
152 */
153void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq)
154{
155 int status;
156 struct irqaction *action;
157 int cpu = smp_processor_id();
158 irq_desc_t *desc = get_irq_desc(irq);
159 irqreturn_t action_ret;
160#ifdef CONFIG_IRQSTACKS
161 struct thread_info *curtp, *irqtp;
162#endif
163
164 kstat_cpu(cpu).irqs[irq]++;
165
166 if (desc->status & IRQ_PER_CPU) {
167 /* no locking required for CPU-local interrupts: */
168 ack_irq(irq);
169 action_ret = handle_IRQ_event(irq, regs, desc->action);
170 desc->handler->end(irq);
171 return;
172 }
173
174 spin_lock(&desc->lock);
175 ack_irq(irq);
176 /*
177 REPLAY is when Linux resends an IRQ that was dropped earlier
178 WAITING is used by probe to mark irqs that are being tested
179 */
180 status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
181 status |= IRQ_PENDING; /* we _want_ to handle it */
182
183 /*
184 * If the IRQ is disabled for whatever reason, we cannot
185 * use the action we have.
186 */
187 action = NULL;
188 if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
189 action = desc->action;
190 if (!action || !action->handler) {
191 ppc_spurious_interrupts++;
192 printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq);
193 /* We can't call disable_irq here, it would deadlock */
194 if (!desc->depth)
195 desc->depth = 1;
196 desc->status |= IRQ_DISABLED;
197 /* This is not a real spurrious interrupt, we
198 * have to eoi it, so we jump to out
199 */
200 mask_irq(irq);
201 goto out;
202 }
203 status &= ~IRQ_PENDING; /* we commit to handling */
204 status |= IRQ_INPROGRESS; /* we are handling it */
205 }
206 desc->status = status;
207
208 /*
209 * If there is no IRQ handler or it was disabled, exit early.
210 Since we set PENDING, if another processor is handling
211 a different instance of this same irq, the other processor
212 will take care of it.
213 */
214 if (unlikely(!action))
215 goto out;
216
217 /*
218 * Edge triggered interrupts need to remember
219 * pending events.
220 * This applies to any hw interrupts that allow a second
221 * instance of the same irq to arrive while we are in do_IRQ
222 * or in the handler. But the code here only handles the _second_
223 * instance of the irq, not the third or fourth. So it is mostly
224 * useful for irq hardware that does not mask cleanly in an
225 * SMP environment.
226 */
227 for (;;) {
228 spin_unlock(&desc->lock);
229
230#ifdef CONFIG_IRQSTACKS
231 /* Switch to the irq stack to handle this */
232 curtp = current_thread_info();
233 irqtp = hardirq_ctx[smp_processor_id()];
234 if (curtp != irqtp) {
235 irqtp->task = curtp->task;
236 irqtp->flags = 0;
237 action_ret = call_handle_IRQ_event(irq, regs, action, irqtp);
238 irqtp->task = NULL;
239 if (irqtp->flags)
240 set_bits(irqtp->flags, &curtp->flags);
241 } else
242#endif
243 action_ret = handle_IRQ_event(irq, regs, action);
244
245 spin_lock(&desc->lock);
246 if (!noirqdebug)
247 note_interrupt(irq, desc, action_ret, regs);
248 if (likely(!(desc->status & IRQ_PENDING)))
249 break;
250 desc->status &= ~IRQ_PENDING;
251 }
252out:
253 desc->status &= ~IRQ_INPROGRESS;
254 /*
255 * The ->end() handler has to deal with interrupts which got
256 * disabled while the handler was running.
257 */
258 if (desc->handler) {
259 if (desc->handler->end)
260 desc->handler->end(irq);
261 else if (desc->handler->enable)
262 desc->handler->enable(irq);
263 }
264 spin_unlock(&desc->lock);
265}
266
267#ifdef CONFIG_PPC_ISERIES
268void do_IRQ(struct pt_regs *regs)
269{
270 struct paca_struct *lpaca;
271
272 irq_enter();
273
274#ifdef CONFIG_DEBUG_STACKOVERFLOW
275 /* Debugging check for stack overflow: is there less than 2KB free? */
276 {
277 long sp;
278
279 sp = __get_SP() & (THREAD_SIZE-1);
280
281 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
282 printk("do_IRQ: stack overflow: %ld\n",
283 sp - sizeof(struct thread_info));
284 dump_stack();
285 }
286 }
287#endif
288
289 lpaca = get_paca();
290#ifdef CONFIG_SMP
291 if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
292 lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
293 iSeries_smp_message_recv(regs);
294 }
295#endif /* CONFIG_SMP */
296 if (hvlpevent_is_pending())
297 process_hvlpevents(regs);
298
299 irq_exit();
300
301 if (lpaca->lppaca.int_dword.fields.decr_int) {
302 lpaca->lppaca.int_dword.fields.decr_int = 0;
303 /* Signal a fake decrementer interrupt */
304 timer_interrupt(regs);
305 }
306}
307
308#else /* CONFIG_PPC_ISERIES */
309
310void do_IRQ(struct pt_regs *regs)
311{
312 int irq;
313
314 irq_enter();
315
316#ifdef CONFIG_DEBUG_STACKOVERFLOW
317 /* Debugging check for stack overflow: is there less than 2KB free? */
318 {
319 long sp;
320
321 sp = __get_SP() & (THREAD_SIZE-1);
322
323 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
324 printk("do_IRQ: stack overflow: %ld\n",
325 sp - sizeof(struct thread_info));
326 dump_stack();
327 }
328 }
329#endif
330
331 irq = ppc_md.get_irq(regs);
332
333 if (irq >= 0)
334 ppc_irq_dispatch_handler(regs, irq);
335 else
336 /* That's not SMP safe ... but who cares ? */
337 ppc_spurious_interrupts++;
338
339 irq_exit();
340}
341#endif /* CONFIG_PPC_ISERIES */
342
343void __init init_IRQ(void)
344{
345 static int once = 0;
346
347 if (once)
348 return;
349
350 once++;
351
352 ppc_md.init_IRQ();
353 irq_ctx_init();
354}
355
356#ifndef CONFIG_PPC_ISERIES
357/*
358 * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
359 */
360
361#define UNDEFINED_IRQ 0xffffffff
362unsigned int virt_irq_to_real_map[NR_IRQS];
363
364/*
365 * Don't use virtual irqs 0, 1, 2 for devices.
366 * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
367 * and 2 is the XICS IPI interrupt.
368 * We limit virtual irqs to 17 less than NR_IRQS so that when we
369 * offset them by 16 (to reserve the first 16 for ISA interrupts)
370 * we don't end up with an interrupt number >= NR_IRQS.
371 */
372#define MIN_VIRT_IRQ 3
373#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1)
374#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
375
376void
377virt_irq_init(void)
378{
379 int i;
380 for (i = 0; i < NR_IRQS; i++)
381 virt_irq_to_real_map[i] = UNDEFINED_IRQ;
382}
383
384/* Create a mapping for a real_irq if it doesn't already exist.
385 * Return the virtual irq as a convenience.
386 */
387int virt_irq_create_mapping(unsigned int real_irq)
388{
389 unsigned int virq, first_virq;
390 static int warned;
391
392 if (ppc64_interrupt_controller == IC_OPEN_PIC)
393 return real_irq; /* no mapping for openpic (for now) */
394
395 if (ppc64_interrupt_controller == IC_CELL_PIC)
396 return real_irq; /* no mapping for iic either */
397
398 /* don't map interrupts < MIN_VIRT_IRQ */
399 if (real_irq < MIN_VIRT_IRQ) {
400 virt_irq_to_real_map[real_irq] = real_irq;
401 return real_irq;
402 }
403
404 /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
405 virq = real_irq;
406 if (virq > MAX_VIRT_IRQ)
407 virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
408
409 /* search for this number or a free slot */
410 first_virq = virq;
411 while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
412 if (virt_irq_to_real_map[virq] == real_irq)
413 return virq;
414 if (++virq > MAX_VIRT_IRQ)
415 virq = MIN_VIRT_IRQ;
416 if (virq == first_virq)
417 goto nospace; /* oops, no free slots */
418 }
419
420 virt_irq_to_real_map[virq] = real_irq;
421 return virq;
422
423 nospace:
424 if (!warned) {
425 printk(KERN_CRIT "Interrupt table is full\n");
426 printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
427 "in your kernel sources and rebuild.\n", NR_IRQS);
428 warned = 1;
429 }
430 return NO_IRQ;
431}
432
433/*
434 * In most cases will get a hit on the very first slot checked in the
435 * virt_irq_to_real_map. Only when there are a large number of
436 * IRQs will this be expensive.
437 */
438unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
439{
440 unsigned int virq;
441 unsigned int first_virq;
442
443 virq = real_irq;
444
445 if (virq > MAX_VIRT_IRQ)
446 virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
447
448 first_virq = virq;
449
450 do {
451 if (virt_irq_to_real_map[virq] == real_irq)
452 return virq;
453
454 virq++;
455
456 if (virq >= MAX_VIRT_IRQ)
457 virq = 0;
458
459 } while (first_virq != virq);
460
461 return NO_IRQ;
462
463}
464
465#endif /* CONFIG_PPC_ISERIES */
466
467#ifdef CONFIG_IRQSTACKS
468struct thread_info *softirq_ctx[NR_CPUS];
469struct thread_info *hardirq_ctx[NR_CPUS];
470
471void irq_ctx_init(void)
472{
473 struct thread_info *tp;
474 int i;
475
476 for_each_cpu(i) {
477 memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
478 tp = softirq_ctx[i];
479 tp->cpu = i;
480 tp->preempt_count = SOFTIRQ_OFFSET;
481
482 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
483 tp = hardirq_ctx[i];
484 tp->cpu = i;
485 tp->preempt_count = HARDIRQ_OFFSET;
486 }
487}
488
489void do_softirq(void)
490{
491 unsigned long flags;
492 struct thread_info *curtp, *irqtp;
493
494 if (in_interrupt())
495 return;
496
497 local_irq_save(flags);
498
499 if (local_softirq_pending()) {
500 curtp = current_thread_info();
501 irqtp = softirq_ctx[smp_processor_id()];
502 irqtp->task = curtp->task;
503 call_do_softirq(irqtp);
504 irqtp->task = NULL;
505 }
506
507 local_irq_restore(flags);
508}
509EXPORT_SYMBOL(do_softirq);
510
511#endif /* CONFIG_IRQSTACKS */
512
513static int __init setup_noirqdistrib(char *str)
514{
515 distribute_irqs = 0;
516 return 1;
517}
518
519__setup("noirqdistrib", setup_noirqdistrib);
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c
deleted file mode 100644
index 3e7b2f28ec83..000000000000
--- a/arch/ppc64/kernel/lparcfg.c
+++ /dev/null
@@ -1,611 +0,0 @@
1/*
2 * PowerPC64 LPAR Configuration Information Driver
3 *
4 * Dave Engebretsen engebret@us.ibm.com
5 * Copyright (c) 2003 Dave Engebretsen
6 * Will Schmidt willschm@us.ibm.com
7 * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
8 * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
9 * Nathan Lynch nathanl@austin.ibm.com
10 * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * This driver creates a proc file at /proc/ppc64/lparcfg which contains
18 * keyword - value pairs that specify the configuration of the partition.
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/errno.h>
25#include <linux/proc_fs.h>
26#include <linux/init.h>
27#include <linux/seq_file.h>
28#include <asm/uaccess.h>
29#include <asm/iseries/hv_lp_config.h>
30#include <asm/lppaca.h>
31#include <asm/hvcall.h>
32#include <asm/firmware.h>
33#include <asm/rtas.h>
34#include <asm/system.h>
35#include <asm/time.h>
36#include <asm/iseries/it_exp_vpd_panel.h>
37#include <asm/prom.h>
38
39#define MODULE_VERS "1.6"
40#define MODULE_NAME "lparcfg"
41
42/* #define LPARCFG_DEBUG */
43
44/* find a better place for this function... */
45void log_plpar_hcall_return(unsigned long rc, char *tag)
46{
47 if (rc == 0) /* success, return */
48 return;
49/* check for null tag ? */
50 if (rc == H_Hardware)
51 printk(KERN_INFO
52 "plpar-hcall (%s) failed with hardware fault\n", tag);
53 else if (rc == H_Function)
54 printk(KERN_INFO
55 "plpar-hcall (%s) failed; function not allowed\n", tag);
56 else if (rc == H_Authority)
57 printk(KERN_INFO
58 "plpar-hcall (%s) failed; not authorized to this function\n",
59 tag);
60 else if (rc == H_Parameter)
61 printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
62 tag);
63 else
64 printk(KERN_INFO
65 "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
66 tag, rc);
67
68}
69
70static struct proc_dir_entry *proc_ppc64_lparcfg;
71#define LPARCFG_BUFF_SIZE 4096
72
73#ifdef CONFIG_PPC_ISERIES
74
75/*
76 * For iSeries legacy systems, the PPA purr function is available from the
77 * emulated_time_base field in the paca.
78 */
79static unsigned long get_purr(void)
80{
81 unsigned long sum_purr = 0;
82 int cpu;
83 struct paca_struct *lpaca;
84
85 for_each_cpu(cpu) {
86 lpaca = paca + cpu;
87 sum_purr += lpaca->lppaca.emulated_time_base;
88
89#ifdef PURR_DEBUG
90 printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
91 cpu, lpaca->lppaca.emulated_time_base);
92#endif
93 }
94 return sum_purr;
95}
96
97#define lparcfg_write NULL
98
99/*
100 * Methods used to fetch LPAR data when running on an iSeries platform.
101 */
102static int lparcfg_data(struct seq_file *m, void *v)
103{
104 unsigned long pool_id, lp_index;
105 int shared, entitled_capacity, max_entitled_capacity;
106 int processors, max_processors;
107 struct paca_struct *lpaca = get_paca();
108 unsigned long purr = get_purr();
109
110 seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
111
112 shared = (int)(lpaca->lppaca_ptr->shared_proc);
113 seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
114 e2a(xItExtVpdPanel.mfgID[2]),
115 e2a(xItExtVpdPanel.mfgID[3]),
116 e2a(xItExtVpdPanel.systemSerial[1]),
117 e2a(xItExtVpdPanel.systemSerial[2]),
118 e2a(xItExtVpdPanel.systemSerial[3]),
119 e2a(xItExtVpdPanel.systemSerial[4]),
120 e2a(xItExtVpdPanel.systemSerial[5]));
121
122 seq_printf(m, "system_type=%c%c%c%c\n",
123 e2a(xItExtVpdPanel.machineType[0]),
124 e2a(xItExtVpdPanel.machineType[1]),
125 e2a(xItExtVpdPanel.machineType[2]),
126 e2a(xItExtVpdPanel.machineType[3]));
127
128 lp_index = HvLpConfig_getLpIndex();
129 seq_printf(m, "partition_id=%d\n", (int)lp_index);
130
131 seq_printf(m, "system_active_processors=%d\n",
132 (int)HvLpConfig_getSystemPhysicalProcessors());
133
134 seq_printf(m, "system_potential_processors=%d\n",
135 (int)HvLpConfig_getSystemPhysicalProcessors());
136
137 processors = (int)HvLpConfig_getPhysicalProcessors();
138 seq_printf(m, "partition_active_processors=%d\n", processors);
139
140 max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
141 seq_printf(m, "partition_potential_processors=%d\n", max_processors);
142
143 if (shared) {
144 entitled_capacity = HvLpConfig_getSharedProcUnits();
145 max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
146 } else {
147 entitled_capacity = processors * 100;
148 max_entitled_capacity = max_processors * 100;
149 }
150 seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
151
152 seq_printf(m, "partition_max_entitled_capacity=%d\n",
153 max_entitled_capacity);
154
155 if (shared) {
156 pool_id = HvLpConfig_getSharedPoolIndex();
157 seq_printf(m, "pool=%d\n", (int)pool_id);
158 seq_printf(m, "pool_capacity=%d\n",
159 (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
160 100));
161 seq_printf(m, "purr=%ld\n", purr);
162 }
163
164 seq_printf(m, "shared_processor_mode=%d\n", shared);
165
166 return 0;
167}
168#endif /* CONFIG_PPC_ISERIES */
169
170#ifdef CONFIG_PPC_PSERIES
171/*
172 * Methods used to fetch LPAR data when running on a pSeries platform.
173 */
174
175/*
176 * H_GET_PPP hcall returns info in 4 parms.
177 * entitled_capacity,unallocated_capacity,
178 * aggregation, resource_capability).
179 *
180 * R4 = Entitled Processor Capacity Percentage.
181 * R5 = Unallocated Processor Capacity Percentage.
182 * R6 (AABBCCDDEEFFGGHH).
183 * XXXX - reserved (0)
184 * XXXX - reserved (0)
185 * XXXX - Group Number
186 * XXXX - Pool Number.
187 * R7 (IIJJKKLLMMNNOOPP).
188 * XX - reserved. (0)
189 * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
190 * XX - variable processor Capacity Weight
191 * XX - Unallocated Variable Processor Capacity Weight.
192 * XXXX - Active processors in Physical Processor Pool.
193 * XXXX - Processors active on platform.
194 */
195static unsigned int h_get_ppp(unsigned long *entitled,
196 unsigned long *unallocated,
197 unsigned long *aggregation,
198 unsigned long *resource)
199{
200 unsigned long rc;
201 rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
202 aggregation, resource);
203
204 log_plpar_hcall_return(rc, "H_GET_PPP");
205
206 return rc;
207}
208
209static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
210{
211 unsigned long rc;
212 unsigned long dummy;
213 rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
214
215 log_plpar_hcall_return(rc, "H_PIC");
216}
217
218static unsigned long get_purr(void);
219
220/* Track sum of all purrs across all processors. This is used to further */
221/* calculate usage values by different applications */
222
223static unsigned long get_purr(void)
224{
225 unsigned long sum_purr = 0;
226 int cpu;
227 struct cpu_usage *cu;
228
229 for_each_cpu(cpu) {
230 cu = &per_cpu(cpu_usage_array, cpu);
231 sum_purr += cu->current_tb;
232 }
233 return sum_purr;
234}
235
236#define SPLPAR_CHARACTERISTICS_TOKEN 20
237#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
238
239/*
240 * parse_system_parameter_string()
241 * Retrieve the potential_processors, max_entitled_capacity and friends
242 * through the get-system-parameter rtas call. Replace keyword strings as
243 * necessary.
244 */
245static void parse_system_parameter_string(struct seq_file *m)
246{
247 int call_status;
248
249 char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
250 if (!local_buffer) {
251 printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
252 __FILE__, __FUNCTION__, __LINE__);
253 return;
254 }
255
256 spin_lock(&rtas_data_buf_lock);
257 memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
258 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
259 NULL,
260 SPLPAR_CHARACTERISTICS_TOKEN,
261 __pa(rtas_data_buf));
262 memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
263 spin_unlock(&rtas_data_buf_lock);
264
265 if (call_status != 0) {
266 printk(KERN_INFO
267 "%s %s Error calling get-system-parameter (0x%x)\n",
268 __FILE__, __FUNCTION__, call_status);
269 } else {
270 int splpar_strlen;
271 int idx, w_idx;
272 char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
273 if (!workbuffer) {
274 printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
275 __FILE__, __FUNCTION__, __LINE__);
276 kfree(local_buffer);
277 return;
278 }
279#ifdef LPARCFG_DEBUG
280 printk(KERN_INFO "success calling get-system-parameter \n");
281#endif
282 splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
283 local_buffer += 2; /* step over strlen value */
284
285 memset(workbuffer, 0, SPLPAR_MAXLENGTH);
286 w_idx = 0;
287 idx = 0;
288 while ((*local_buffer) && (idx < splpar_strlen)) {
289 workbuffer[w_idx++] = local_buffer[idx++];
290 if ((local_buffer[idx] == ',')
291 || (local_buffer[idx] == '\0')) {
292 workbuffer[w_idx] = '\0';
293 if (w_idx) {
294 /* avoid the empty string */
295 seq_printf(m, "%s\n", workbuffer);
296 }
297 memset(workbuffer, 0, SPLPAR_MAXLENGTH);
298 idx++; /* skip the comma */
299 w_idx = 0;
300 } else if (local_buffer[idx] == '=') {
301 /* code here to replace workbuffer contents
302 with different keyword strings */
303 if (0 == strcmp(workbuffer, "MaxEntCap")) {
304 strcpy(workbuffer,
305 "partition_max_entitled_capacity");
306 w_idx = strlen(workbuffer);
307 }
308 if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
309 strcpy(workbuffer,
310 "system_potential_processors");
311 w_idx = strlen(workbuffer);
312 }
313 }
314 }
315 kfree(workbuffer);
316 local_buffer -= 2; /* back up over strlen value */
317 }
318 kfree(local_buffer);
319}
320
321static int lparcfg_count_active_processors(void);
322
323/* Return the number of processors in the system.
324 * This function reads through the device tree and counts
325 * the virtual processors, this does not include threads.
326 */
327static int lparcfg_count_active_processors(void)
328{
329 struct device_node *cpus_dn = NULL;
330 int count = 0;
331
332 while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
333#ifdef LPARCFG_DEBUG
334 printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
335#endif
336 count++;
337 }
338 return count;
339}
340
341static int lparcfg_data(struct seq_file *m, void *v)
342{
343 int partition_potential_processors;
344 int partition_active_processors;
345 struct device_node *rootdn;
346 const char *model = "";
347 const char *system_id = "";
348 unsigned int *lp_index_ptr, lp_index = 0;
349 struct device_node *rtas_node;
350 int *lrdrp;
351
352 rootdn = find_path_device("/");
353 if (rootdn) {
354 model = get_property(rootdn, "model", NULL);
355 system_id = get_property(rootdn, "system-id", NULL);
356 lp_index_ptr = (unsigned int *)
357 get_property(rootdn, "ibm,partition-no", NULL);
358 if (lp_index_ptr)
359 lp_index = *lp_index_ptr;
360 }
361
362 seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
363
364 seq_printf(m, "serial_number=%s\n", system_id);
365
366 seq_printf(m, "system_type=%s\n", model);
367
368 seq_printf(m, "partition_id=%d\n", (int)lp_index);
369
370 rtas_node = find_path_device("/rtas");
371 lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
372
373 if (lrdrp == NULL) {
374 partition_potential_processors = systemcfg->processorCount;
375 } else {
376 partition_potential_processors = *(lrdrp + 4);
377 }
378
379 partition_active_processors = lparcfg_count_active_processors();
380
381 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
382 unsigned long h_entitled, h_unallocated;
383 unsigned long h_aggregation, h_resource;
384 unsigned long pool_idle_time, pool_procs;
385 unsigned long purr;
386
387 h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
388 &h_resource);
389
390 seq_printf(m, "R4=0x%lx\n", h_entitled);
391 seq_printf(m, "R5=0x%lx\n", h_unallocated);
392 seq_printf(m, "R6=0x%lx\n", h_aggregation);
393 seq_printf(m, "R7=0x%lx\n", h_resource);
394
395 purr = get_purr();
396
397 /* this call handles the ibm,get-system-parameter contents */
398 parse_system_parameter_string(m);
399
400 seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
401
402 seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
403
404 seq_printf(m, "system_active_processors=%ld\n",
405 (h_resource >> 0 * 8) & 0xffff);
406
407 /* pool related entries are apropriate for shared configs */
408 if (paca[0].lppaca.shared_proc) {
409
410 h_pic(&pool_idle_time, &pool_procs);
411
412 seq_printf(m, "pool=%ld\n",
413 (h_aggregation >> 0 * 8) & 0xffff);
414
415 /* report pool_capacity in percentage */
416 seq_printf(m, "pool_capacity=%ld\n",
417 ((h_resource >> 2 * 8) & 0xffff) * 100);
418
419 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
420
421 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
422 }
423
424 seq_printf(m, "unallocated_capacity_weight=%ld\n",
425 (h_resource >> 4 * 8) & 0xFF);
426
427 seq_printf(m, "capacity_weight=%ld\n",
428 (h_resource >> 5 * 8) & 0xFF);
429
430 seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
431
432 seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
433
434 seq_printf(m, "purr=%ld\n", purr);
435
436 } else { /* non SPLPAR case */
437
438 seq_printf(m, "system_active_processors=%d\n",
439 partition_potential_processors);
440
441 seq_printf(m, "system_potential_processors=%d\n",
442 partition_potential_processors);
443
444 seq_printf(m, "partition_max_entitled_capacity=%d\n",
445 partition_potential_processors * 100);
446
447 seq_printf(m, "partition_entitled_capacity=%d\n",
448 partition_active_processors * 100);
449 }
450
451 seq_printf(m, "partition_active_processors=%d\n",
452 partition_active_processors);
453
454 seq_printf(m, "partition_potential_processors=%d\n",
455 partition_potential_processors);
456
457 seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
458
459 return 0;
460}
461
462/*
463 * Interface for changing system parameters (variable capacity weight
464 * and entitled capacity). Format of input is "param_name=value";
465 * anything after value is ignored. Valid parameters at this time are
466 * "partition_entitled_capacity" and "capacity_weight". We use
467 * H_SET_PPP to alter parameters.
468 *
469 * This function should be invoked only on systems with
470 * FW_FEATURE_SPLPAR.
471 */
472static ssize_t lparcfg_write(struct file *file, const char __user * buf,
473 size_t count, loff_t * off)
474{
475 char *kbuf;
476 char *tmp;
477 u64 new_entitled, *new_entitled_ptr = &new_entitled;
478 u8 new_weight, *new_weight_ptr = &new_weight;
479
480 unsigned long current_entitled; /* parameters for h_get_ppp */
481 unsigned long dummy;
482 unsigned long resource;
483 u8 current_weight;
484
485 ssize_t retval = -ENOMEM;
486
487 kbuf = kmalloc(count, GFP_KERNEL);
488 if (!kbuf)
489 goto out;
490
491 retval = -EFAULT;
492 if (copy_from_user(kbuf, buf, count))
493 goto out;
494
495 retval = -EINVAL;
496 kbuf[count - 1] = '\0';
497 tmp = strchr(kbuf, '=');
498 if (!tmp)
499 goto out;
500
501 *tmp++ = '\0';
502
503 if (!strcmp(kbuf, "partition_entitled_capacity")) {
504 char *endp;
505 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
506 if (endp == tmp)
507 goto out;
508 new_weight_ptr = &current_weight;
509 } else if (!strcmp(kbuf, "capacity_weight")) {
510 char *endp;
511 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
512 if (endp == tmp)
513 goto out;
514 new_entitled_ptr = &current_entitled;
515 } else
516 goto out;
517
518 /* Get our current parameters */
519 retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
520 if (retval) {
521 retval = -EIO;
522 goto out;
523 }
524
525 current_weight = (resource >> 5 * 8) & 0xFF;
526
527 pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
528 __FUNCTION__, current_entitled, current_weight);
529
530 pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
531 __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
532
533 retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
534 *new_weight_ptr);
535
536 if (retval == H_Success || retval == H_Constrained) {
537 retval = count;
538 } else if (retval == H_Busy) {
539 retval = -EBUSY;
540 } else if (retval == H_Hardware) {
541 retval = -EIO;
542 } else if (retval == H_Parameter) {
543 retval = -EINVAL;
544 } else {
545 printk(KERN_WARNING "%s: received unknown hv return code %ld",
546 __FUNCTION__, retval);
547 retval = -EIO;
548 }
549
550 out:
551 kfree(kbuf);
552 return retval;
553}
554
555#endif /* CONFIG_PPC_PSERIES */
556
557static int lparcfg_open(struct inode *inode, struct file *file)
558{
559 return single_open(file, lparcfg_data, NULL);
560}
561
562struct file_operations lparcfg_fops = {
563 .owner = THIS_MODULE,
564 .read = seq_read,
565 .open = lparcfg_open,
566 .release = single_release,
567};
568
569int __init lparcfg_init(void)
570{
571 struct proc_dir_entry *ent;
572 mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
573
574 /* Allow writing if we have FW_FEATURE_SPLPAR */
575 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
576 lparcfg_fops.write = lparcfg_write;
577 mode |= S_IWUSR;
578 }
579
580 ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
581 if (ent) {
582 ent->proc_fops = &lparcfg_fops;
583 ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
584 if (!ent->data) {
585 printk(KERN_ERR
586 "Failed to allocate buffer for lparcfg\n");
587 remove_proc_entry("lparcfg", ent->parent);
588 return -ENOMEM;
589 }
590 } else {
591 printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
592 return -EIO;
593 }
594
595 proc_ppc64_lparcfg = ent;
596 return 0;
597}
598
599void __exit lparcfg_cleanup(void)
600{
601 if (proc_ppc64_lparcfg) {
602 kfree(proc_ppc64_lparcfg->data);
603 remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
604 }
605}
606
607module_init(lparcfg_init);
608module_exit(lparcfg_cleanup);
609MODULE_DESCRIPTION("Interface for LPAR configuration data");
610MODULE_AUTHOR("Dave Engebretsen");
611MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S
index 914632ec587d..492bca6137eb 100644
--- a/arch/ppc64/kernel/misc.S
+++ b/arch/ppc64/kernel/misc.S
@@ -78,12 +78,12 @@ _GLOBAL(call_do_softirq)
78 mtlr r0 78 mtlr r0
79 blr 79 blr
80 80
81_GLOBAL(call_handle_IRQ_event) 81_GLOBAL(call___do_IRQ)
82 mflr r0 82 mflr r0
83 std r0,16(r1) 83 std r0,16(r1)
84 stdu r1,THREAD_SIZE-112(r6) 84 stdu r1,THREAD_SIZE-112(r5)
85 mr r1,r6 85 mr r1,r5
86 bl .handle_IRQ_event 86 bl .__do_IRQ
87 ld r1,0(r1) 87 ld r1,0(r1)
88 ld r0,16(r1) 88 ld r0,16(r1)
89 mtlr r0 89 mtlr r0
diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c
index 4fb1a9f5060d..c0fcd29918ce 100644
--- a/arch/ppc64/kernel/nvram.c
+++ b/arch/ppc64/kernel/nvram.c
@@ -31,7 +31,6 @@
31#include <asm/rtas.h> 31#include <asm/rtas.h>
32#include <asm/prom.h> 32#include <asm/prom.h>
33#include <asm/machdep.h> 33#include <asm/machdep.h>
34#include <asm/systemcfg.h>
35 34
36#undef DEBUG_NVRAM 35#undef DEBUG_NVRAM
37 36
@@ -167,7 +166,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file,
167 case IOC_NVRAM_GET_OFFSET: { 166 case IOC_NVRAM_GET_OFFSET: {
168 int part, offset; 167 int part, offset;
169 168
170 if (systemcfg->platform != PLATFORM_POWERMAC) 169 if (_machine != PLATFORM_POWERMAC)
171 return -EINVAL; 170 return -EINVAL;
172 if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) 171 if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
173 return -EFAULT; 172 return -EFAULT;
@@ -450,7 +449,7 @@ static int nvram_setup_partition(void)
450 * in our nvram, as Apple defined partitions use pretty much 449 * in our nvram, as Apple defined partitions use pretty much
451 * all of the space 450 * all of the space
452 */ 451 */
453 if (systemcfg->platform == PLATFORM_POWERMAC) 452 if (_machine == PLATFORM_POWERMAC)
454 return -ENOSPC; 453 return -ENOSPC;
455 454
456 /* see if we have an OS partition that meets our needs. 455 /* see if we have an OS partition that meets our needs.
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c
deleted file mode 100644
index 3133c72b28ec..000000000000
--- a/arch/ppc64/kernel/pacaData.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/config.h>
11#include <linux/types.h>
12#include <linux/threads.h>
13#include <linux/module.h>
14
15#include <asm/processor.h>
16#include <asm/ptrace.h>
17#include <asm/page.h>
18
19#include <asm/lppaca.h>
20#include <asm/iseries/it_lp_queue.h>
21#include <asm/paca.h>
22
23static union {
24 struct systemcfg data;
25 u8 page[PAGE_SIZE];
26} systemcfg_store __attribute__((__section__(".data.page.aligned")));
27struct systemcfg *systemcfg = &systemcfg_store.data;
28EXPORT_SYMBOL(systemcfg);
29
30
31/* This symbol is provided by the linker - let it fill in the paca
32 * field correctly */
33extern unsigned long __toc_start;
34
35/* The Paca is an array with one entry per processor. Each contains an
36 * lppaca, which contains the information shared between the
37 * hypervisor and Linux. Each also contains an ItLpRegSave area which
38 * is used by the hypervisor to save registers.
39 * On systems with hardware multi-threading, there are two threads
40 * per processor. The Paca array must contain an entry for each thread.
41 * The VPD Areas will give a max logical processors = 2 * max physical
42 * processors. The processor VPD array needs one entry per physical
43 * processor (not thread).
44 */
45#define PACA_INIT_COMMON(number, start, asrr, asrv) \
46 .lock_token = 0x8000, \
47 .paca_index = (number), /* Paca Index */ \
48 .default_decr = 0x00ff0000, /* Initial Decr */ \
49 .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
50 .stab_real = (asrr), /* Real pointer to segment table */ \
51 .stab_addr = (asrv), /* Virt pointer to segment table */ \
52 .cpu_start = (start), /* Processor start */ \
53 .hw_cpu_id = 0xffff, \
54 .lppaca = { \
55 .desc = 0xd397d781, /* "LpPa" */ \
56 .size = sizeof(struct lppaca), \
57 .dyn_proc_status = 2, \
58 .decr_val = 0x00ff0000, \
59 .fpregs_in_use = 1, \
60 .end_of_quantum = 0xfffffffffffffffful, \
61 .slb_count = 64, \
62 .vmxregs_in_use = 0, \
63 }, \
64
65#ifdef CONFIG_PPC_ISERIES
66#define PACA_INIT_ISERIES(number) \
67 .lppaca_ptr = &paca[number].lppaca, \
68 .reg_save_ptr = &paca[number].reg_save, \
69 .reg_save = { \
70 .xDesc = 0xd397d9e2, /* "LpRS" */ \
71 .xSize = sizeof(struct ItLpRegSave) \
72 }
73
74#define PACA_INIT(number) \
75{ \
76 PACA_INIT_COMMON(number, 0, 0, 0) \
77 PACA_INIT_ISERIES(number) \
78}
79
80#define BOOTCPU_PACA_INIT(number) \
81{ \
82 PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \
83 PACA_INIT_ISERIES(number) \
84}
85
86#else
87#define PACA_INIT(number) \
88{ \
89 PACA_INIT_COMMON(number, 0, 0, 0) \
90}
91
92#define BOOTCPU_PACA_INIT(number) \
93{ \
94 PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \
95}
96#endif
97
98struct paca_struct paca[] = {
99 BOOTCPU_PACA_INIT(0),
100#if NR_CPUS > 1
101 PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
102#if NR_CPUS > 4
103 PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
104#if NR_CPUS > 8
105 PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
106 PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
107 PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
108 PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
109 PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
110 PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
111#if NR_CPUS > 32
112 PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
113 PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
114 PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
115 PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
116 PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
117 PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
118 PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
119 PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
120#if NR_CPUS > 64
121 PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
122 PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
123 PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
124 PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
125 PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
126 PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
127 PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
128 PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
129 PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
130 PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
131 PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
132 PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
133 PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
134 PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
135 PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
136 PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
137#endif
138#endif
139#endif
140#endif
141#endif
142};
143EXPORT_SYMBOL(paca);
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index 30247ff74972..3cef1b8f57f0 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -548,6 +548,11 @@ static int __init pcibios_init(void)
548 if (ppc64_isabridge_dev != NULL) 548 if (ppc64_isabridge_dev != NULL)
549 printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); 549 printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
550 550
551#ifdef CONFIG_PPC_MULTIPLATFORM
552 /* map in PCI I/O space */
553 phbs_remap_io();
554#endif
555
551 printk("PCI: Probing PCI hardware done\n"); 556 printk("PCI: Probing PCI hardware done\n");
552 557
553 return 0; 558 return 0;
@@ -1277,12 +1282,9 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
1277 * G5 machines... So when something asks for bus 0 io base 1282 * G5 machines... So when something asks for bus 0 io base
1278 * (bus 0 is HT root), we return the AGP one instead. 1283 * (bus 0 is HT root), we return the AGP one instead.
1279 */ 1284 */
1280#ifdef CONFIG_PPC_PMAC 1285 if (machine_is_compatible("MacRISC4"))
1281 if (systemcfg->platform == PLATFORM_POWERMAC &&
1282 machine_is_compatible("MacRISC4"))
1283 if (in_bus == 0) 1286 if (in_bus == 0)
1284 in_bus = 0xf0; 1287 in_bus = 0xf0;
1285#endif /* CONFIG_PPC_PMAC */
1286 1288
1287 /* That syscall isn't quite compatible with PCI domains, but it's 1289 /* That syscall isn't quite compatible with PCI domains, but it's
1288 * used on pre-domains setup. We return the first match 1290 * used on pre-domains setup. We return the first match
diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c
index 1a443a7ada4c..12c4c9e9bbc7 100644
--- a/arch/ppc64/kernel/pci_dn.c
+++ b/arch/ppc64/kernel/pci_dn.c
@@ -43,7 +43,7 @@ static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
43 u32 *regs; 43 u32 *regs;
44 struct pci_dn *pdn; 44 struct pci_dn *pdn;
45 45
46 if (phb->is_dynamic) 46 if (mem_init_done)
47 pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); 47 pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
48 else 48 else
49 pdn = alloc_bootmem(sizeof(*pdn)); 49 pdn = alloc_bootmem(sizeof(*pdn));
@@ -120,6 +120,14 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
120 return NULL; 120 return NULL;
121} 121}
122 122
123/**
124 * pci_devs_phb_init_dynamic - setup pci devices under this PHB
125 * phb: pci-to-host bridge (top-level bridge connecting to cpu)
126 *
127 * This routine is called both during boot, (before the memory
128 * subsystem is set up, before kmalloc is valid) and during the
129 * dynamic lpar operation of adding a PHB to a running system.
130 */
123void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) 131void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
124{ 132{
125 struct device_node * dn = (struct device_node *) phb->arch_data; 133 struct device_node * dn = (struct device_node *) phb->arch_data;
@@ -201,9 +209,14 @@ static struct notifier_block pci_dn_reconfig_nb = {
201 .notifier_call = pci_dn_reconfig_notifier, 209 .notifier_call = pci_dn_reconfig_notifier,
202}; 210};
203 211
204/* 212/**
205 * Actually initialize the phbs. 213 * pci_devs_phb_init - Initialize phbs and pci devs under them.
206 * The buswalk on this phb has not happened yet. 214 *
215 * This routine walks over all phb's (pci-host bridges) on the
216 * system, and sets up assorted pci-related structures
217 * (including pci info in the device node structs) for each
218 * pci device found underneath. This routine runs once,
219 * early in the boot sequence.
207 */ 220 */
208void __init pci_devs_phb_init(void) 221void __init pci_devs_phb_init(void)
209{ 222{
diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c
deleted file mode 100644
index 24e955ee9487..000000000000
--- a/arch/ppc64/kernel/proc_ppc64.c
+++ /dev/null
@@ -1,128 +0,0 @@
1/*
2 * arch/ppc64/kernel/proc_ppc64.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21#include <linux/config.h>
22#include <linux/init.h>
23#include <linux/mm.h>
24#include <linux/proc_fs.h>
25#include <linux/slab.h>
26#include <linux/kernel.h>
27
28#include <asm/systemcfg.h>
29#include <asm/rtas.h>
30#include <asm/uaccess.h>
31#include <asm/prom.h>
32
33static loff_t page_map_seek( struct file *file, loff_t off, int whence);
34static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
35 loff_t *ppos);
36static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
37
38static struct file_operations page_map_fops = {
39 .llseek = page_map_seek,
40 .read = page_map_read,
41 .mmap = page_map_mmap
42};
43
44/*
45 * Create the ppc64 and ppc64/rtas directories early. This allows us to
46 * assume that they have been previously created in drivers.
47 */
48static int __init proc_ppc64_create(void)
49{
50 struct proc_dir_entry *root;
51
52 root = proc_mkdir("ppc64", NULL);
53 if (!root)
54 return 1;
55
56 if (!(systemcfg->platform & (PLATFORM_PSERIES | PLATFORM_CELL)))
57 return 0;
58
59 if (!proc_mkdir("rtas", root))
60 return 1;
61
62 if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
63 return 1;
64
65 return 0;
66}
67core_initcall(proc_ppc64_create);
68
69static int __init proc_ppc64_init(void)
70{
71 struct proc_dir_entry *pde;
72
73 pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
74 if (!pde)
75 return 1;
76 pde->nlink = 1;
77 pde->data = systemcfg;
78 pde->size = PAGE_SIZE;
79 pde->proc_fops = &page_map_fops;
80
81 return 0;
82}
83__initcall(proc_ppc64_init);
84
85static loff_t page_map_seek( struct file *file, loff_t off, int whence)
86{
87 loff_t new;
88 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
89
90 switch(whence) {
91 case 0:
92 new = off;
93 break;
94 case 1:
95 new = file->f_pos + off;
96 break;
97 case 2:
98 new = dp->size + off;
99 break;
100 default:
101 return -EINVAL;
102 }
103 if ( new < 0 || new > dp->size )
104 return -EINVAL;
105 return (file->f_pos = new);
106}
107
108static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
109 loff_t *ppos)
110{
111 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
112 return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
113}
114
115static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
116{
117 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
118
119 vma->vm_flags |= VM_SHM | VM_LOCKED;
120
121 if ((vma->vm_end - vma->vm_start) > dp->size)
122 return -EINVAL;
123
124 remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
125 dp->size, vma->vm_page_prot);
126 return 0;
127}
128
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index 3402fbee62c7..fbad2c360784 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -318,7 +318,7 @@ static int __devinit finish_node_interrupts(struct device_node *np,
318 } 318 }
319 319
320 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ 320 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
321 if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) { 321 if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
322 char *name = get_property(ic->parent, "name", NULL); 322 char *name = get_property(ic->parent, "name", NULL);
323 if (name && !strcmp(name, "u3")) 323 if (name && !strcmp(name, "u3"))
324 np->intrs[intrcount].line += 128; 324 np->intrs[intrcount].line += 128;
@@ -1065,7 +1065,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1065 prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); 1065 prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
1066 if (prop == NULL) 1066 if (prop == NULL)
1067 return 0; 1067 return 0;
1068 systemcfg->platform = *prop; 1068 _machine = *prop;
1069 1069
1070 /* check if iommu is forced on or off */ 1070 /* check if iommu is forced on or off */
1071 if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) 1071 if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
@@ -1230,11 +1230,8 @@ void __init early_init_devtree(void *params)
1230 of_scan_flat_dt(early_init_dt_scan_memory, NULL); 1230 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
1231 lmb_enforce_memory_limit(memory_limit); 1231 lmb_enforce_memory_limit(memory_limit);
1232 lmb_analyze(); 1232 lmb_analyze();
1233 systemcfg->physicalMemorySize = lmb_phys_mem_size();
1234 lmb_reserve(0, __pa(klimit)); 1233 lmb_reserve(0, __pa(klimit));
1235 1234
1236 DBG("Phys. mem: %lx\n", systemcfg->physicalMemorySize);
1237
1238 /* Reserve LMB regions used by kernel, initrd, dt, etc... */ 1235 /* Reserve LMB regions used by kernel, initrd, dt, etc... */
1239 early_reserve_mem(); 1236 early_reserve_mem();
1240 1237
@@ -1753,7 +1750,7 @@ static int of_finish_dynamic_node(struct device_node *node,
1753 /* We don't support that function on PowerMac, at least 1750 /* We don't support that function on PowerMac, at least
1754 * not yet 1751 * not yet
1755 */ 1752 */
1756 if (systemcfg->platform == PLATFORM_POWERMAC) 1753 if (_machine == PLATFORM_POWERMAC)
1757 return -ENODEV; 1754 return -ENODEV;
1758 1755
1759 /* fix up new node's linux_phandle field */ 1756 /* fix up new node's linux_phandle field */
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
index e4c880dab997..6375f40b23db 100644
--- a/arch/ppc64/kernel/prom_init.c
+++ b/arch/ppc64/kernel/prom_init.c
@@ -1934,7 +1934,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long
1934 /* 1934 /*
1935 * On pSeries, inform the firmware about our capabilities 1935 * On pSeries, inform the firmware about our capabilities
1936 */ 1936 */
1937 if (RELOC(of_platform) & PLATFORM_PSERIES) 1937 if (RELOC(of_platform) == PLATFORM_PSERIES ||
1938 RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
1938 prom_send_capabilities(); 1939 prom_send_capabilities();
1939 1940
1940 /* 1941 /*
diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c
deleted file mode 100644
index 3c3f19192fcc..000000000000
--- a/arch/ppc64/kernel/rtas_pci.c
+++ /dev/null
@@ -1,512 +0,0 @@
1/*
2 * arch/ppc64/kernel/rtas_pci.c
3 *
4 * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 *
7 * RTAS specific routines for PCI.
8 *
9 * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/kernel.h>
27#include <linux/threads.h>
28#include <linux/pci.h>
29#include <linux/string.h>
30#include <linux/init.h>
31#include <linux/bootmem.h>
32
33#include <asm/io.h>
34#include <asm/pgtable.h>
35#include <asm/irq.h>
36#include <asm/prom.h>
37#include <asm/machdep.h>
38#include <asm/pci-bridge.h>
39#include <asm/iommu.h>
40#include <asm/rtas.h>
41#include <asm/mpic.h>
42#include <asm/ppc-pci.h>
43
44/* RTAS tokens */
45static int read_pci_config;
46static int write_pci_config;
47static int ibm_read_pci_config;
48static int ibm_write_pci_config;
49
50static int config_access_valid(struct pci_dn *dn, int where)
51{
52 if (where < 256)
53 return 1;
54 if (where < 4096 && dn->pci_ext_config_space)
55 return 1;
56
57 return 0;
58}
59
60static int of_device_available(struct device_node * dn)
61{
62 char * status;
63
64 status = get_property(dn, "status", NULL);
65
66 if (!status)
67 return 1;
68
69 if (!strcmp(status, "okay"))
70 return 1;
71
72 return 0;
73}
74
75static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
76{
77 int returnval = -1;
78 unsigned long buid, addr;
79 int ret;
80 struct pci_dn *pdn;
81
82 if (!dn || !dn->data)
83 return PCIBIOS_DEVICE_NOT_FOUND;
84 pdn = dn->data;
85 if (!config_access_valid(pdn, where))
86 return PCIBIOS_BAD_REGISTER_NUMBER;
87
88 addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
89 (pdn->devfn << 8) | (where & 0xff);
90 buid = pdn->phb->buid;
91 if (buid) {
92 ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
93 addr, buid >> 32, buid & 0xffffffff, size);
94 } else {
95 ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
96 }
97 *val = returnval;
98
99 if (ret)
100 return PCIBIOS_DEVICE_NOT_FOUND;
101
102 if (returnval == EEH_IO_ERROR_VALUE(size) &&
103 eeh_dn_check_failure (dn, NULL))
104 return PCIBIOS_DEVICE_NOT_FOUND;
105
106 return PCIBIOS_SUCCESSFUL;
107}
108
109static int rtas_pci_read_config(struct pci_bus *bus,
110 unsigned int devfn,
111 int where, int size, u32 *val)
112{
113 struct device_node *busdn, *dn;
114
115 if (bus->self)
116 busdn = pci_device_to_OF_node(bus->self);
117 else
118 busdn = bus->sysdata; /* must be a phb */
119
120 /* Search only direct children of the bus */
121 for (dn = busdn->child; dn; dn = dn->sibling)
122 if (dn->data && PCI_DN(dn)->devfn == devfn
123 && of_device_available(dn))
124 return rtas_read_config(dn, where, size, val);
125
126 return PCIBIOS_DEVICE_NOT_FOUND;
127}
128
129int rtas_write_config(struct device_node *dn, int where, int size, u32 val)
130{
131 unsigned long buid, addr;
132 int ret;
133 struct pci_dn *pdn;
134
135 if (!dn || !dn->data)
136 return PCIBIOS_DEVICE_NOT_FOUND;
137 pdn = dn->data;
138 if (!config_access_valid(pdn, where))
139 return PCIBIOS_BAD_REGISTER_NUMBER;
140
141 addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
142 (pdn->devfn << 8) | (where & 0xff);
143 buid = pdn->phb->buid;
144 if (buid) {
145 ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val);
146 } else {
147 ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
148 }
149
150 if (ret)
151 return PCIBIOS_DEVICE_NOT_FOUND;
152
153 return PCIBIOS_SUCCESSFUL;
154}
155
156static int rtas_pci_write_config(struct pci_bus *bus,
157 unsigned int devfn,
158 int where, int size, u32 val)
159{
160 struct device_node *busdn, *dn;
161
162 if (bus->self)
163 busdn = pci_device_to_OF_node(bus->self);
164 else
165 busdn = bus->sysdata; /* must be a phb */
166
167 /* Search only direct children of the bus */
168 for (dn = busdn->child; dn; dn = dn->sibling)
169 if (dn->data && PCI_DN(dn)->devfn == devfn
170 && of_device_available(dn))
171 return rtas_write_config(dn, where, size, val);
172 return PCIBIOS_DEVICE_NOT_FOUND;
173}
174
175struct pci_ops rtas_pci_ops = {
176 rtas_pci_read_config,
177 rtas_pci_write_config
178};
179
180int is_python(struct device_node *dev)
181{
182 char *model = (char *)get_property(dev, "model", NULL);
183
184 if (model && strstr(model, "Python"))
185 return 1;
186
187 return 0;
188}
189
190static int get_phb_reg_prop(struct device_node *dev,
191 unsigned int addr_size_words,
192 struct reg_property64 *reg)
193{
194 unsigned int *ui_ptr = NULL, len;
195
196 /* Found a PHB, now figure out where his registers are mapped. */
197 ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
198 if (ui_ptr == NULL)
199 return 1;
200
201 if (addr_size_words == 1) {
202 reg->address = ((struct reg_property32 *)ui_ptr)->address;
203 reg->size = ((struct reg_property32 *)ui_ptr)->size;
204 } else {
205 *reg = *((struct reg_property64 *)ui_ptr);
206 }
207
208 return 0;
209}
210
211static void python_countermeasures(struct device_node *dev,
212 unsigned int addr_size_words)
213{
214 struct reg_property64 reg_struct;
215 void __iomem *chip_regs;
216 volatile u32 val;
217
218 if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
219 return;
220
221 /* Python's register file is 1 MB in size. */
222 chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
223
224 /*
225 * Firmware doesn't always clear this bit which is critical
226 * for good performance - Anton
227 */
228
229#define PRG_CL_RESET_VALID 0x00010000
230
231 val = in_be32(chip_regs + 0xf6030);
232 if (val & PRG_CL_RESET_VALID) {
233 printk(KERN_INFO "Python workaround: ");
234 val &= ~PRG_CL_RESET_VALID;
235 out_be32(chip_regs + 0xf6030, val);
236 /*
237 * We must read it back for changes to
238 * take effect
239 */
240 val = in_be32(chip_regs + 0xf6030);
241 printk("reg0: %x\n", val);
242 }
243
244 iounmap(chip_regs);
245}
246
247void __init init_pci_config_tokens (void)
248{
249 read_pci_config = rtas_token("read-pci-config");
250 write_pci_config = rtas_token("write-pci-config");
251 ibm_read_pci_config = rtas_token("ibm,read-pci-config");
252 ibm_write_pci_config = rtas_token("ibm,write-pci-config");
253}
254
255unsigned long __devinit get_phb_buid (struct device_node *phb)
256{
257 int addr_cells;
258 unsigned int *buid_vals;
259 unsigned int len;
260 unsigned long buid;
261
262 if (ibm_read_pci_config == -1) return 0;
263
264 /* PHB's will always be children of the root node,
265 * or so it is promised by the current firmware. */
266 if (phb->parent == NULL)
267 return 0;
268 if (phb->parent->parent)
269 return 0;
270
271 buid_vals = (unsigned int *) get_property(phb, "reg", &len);
272 if (buid_vals == NULL)
273 return 0;
274
275 addr_cells = prom_n_addr_cells(phb);
276 if (addr_cells == 1) {
277 buid = (unsigned long) buid_vals[0];
278 } else {
279 buid = (((unsigned long)buid_vals[0]) << 32UL) |
280 (((unsigned long)buid_vals[1]) & 0xffffffff);
281 }
282 return buid;
283}
284
285static int phb_set_bus_ranges(struct device_node *dev,
286 struct pci_controller *phb)
287{
288 int *bus_range;
289 unsigned int len;
290
291 bus_range = (int *) get_property(dev, "bus-range", &len);
292 if (bus_range == NULL || len < 2 * sizeof(int)) {
293 return 1;
294 }
295
296 phb->first_busno = bus_range[0];
297 phb->last_busno = bus_range[1];
298
299 return 0;
300}
301
302static int __devinit setup_phb(struct device_node *dev,
303 struct pci_controller *phb,
304 unsigned int addr_size_words)
305{
306 pci_setup_pci_controller(phb);
307
308 if (is_python(dev))
309 python_countermeasures(dev, addr_size_words);
310
311 if (phb_set_bus_ranges(dev, phb))
312 return 1;
313
314 phb->arch_data = dev;
315 phb->ops = &rtas_pci_ops;
316 phb->buid = get_phb_buid(dev);
317
318 return 0;
319}
320
321static void __devinit add_linux_pci_domain(struct device_node *dev,
322 struct pci_controller *phb,
323 struct property *of_prop)
324{
325 memset(of_prop, 0, sizeof(struct property));
326 of_prop->name = "linux,pci-domain";
327 of_prop->length = sizeof(phb->global_number);
328 of_prop->value = (unsigned char *)&of_prop[1];
329 memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
330 prom_add_property(dev, of_prop);
331}
332
333static struct pci_controller * __init alloc_phb(struct device_node *dev,
334 unsigned int addr_size_words)
335{
336 struct pci_controller *phb;
337 struct property *of_prop;
338
339 phb = alloc_bootmem(sizeof(struct pci_controller));
340 if (phb == NULL)
341 return NULL;
342
343 of_prop = alloc_bootmem(sizeof(struct property) +
344 sizeof(phb->global_number));
345 if (!of_prop)
346 return NULL;
347
348 if (setup_phb(dev, phb, addr_size_words))
349 return NULL;
350
351 add_linux_pci_domain(dev, phb, of_prop);
352
353 return phb;
354}
355
356static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
357{
358 struct pci_controller *phb;
359
360 phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
361 GFP_KERNEL);
362 if (phb == NULL)
363 return NULL;
364
365 if (setup_phb(dev, phb, addr_size_words))
366 return NULL;
367
368 phb->is_dynamic = 1;
369
370 /* TODO: linux,pci-domain? */
371
372 return phb;
373}
374
375unsigned long __init find_and_init_phbs(void)
376{
377 struct device_node *node;
378 struct pci_controller *phb;
379 unsigned int root_size_cells = 0;
380 unsigned int index;
381 unsigned int *opprop = NULL;
382 struct device_node *root = of_find_node_by_path("/");
383
384 if (ppc64_interrupt_controller == IC_OPEN_PIC) {
385 opprop = (unsigned int *)get_property(root,
386 "platform-open-pic", NULL);
387 }
388
389 root_size_cells = prom_n_size_cells(root);
390
391 index = 0;
392
393 for (node = of_get_next_child(root, NULL);
394 node != NULL;
395 node = of_get_next_child(root, node)) {
396 if (node->type == NULL || strcmp(node->type, "pci") != 0)
397 continue;
398
399 phb = alloc_phb(node, root_size_cells);
400 if (!phb)
401 continue;
402
403 pci_process_bridge_OF_ranges(phb, node, 0);
404 pci_setup_phb_io(phb, index == 0);
405#ifdef CONFIG_PPC_PSERIES
406 if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
407 int addr = root_size_cells * (index + 2) - 1;
408 mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
409 }
410#endif
411 index++;
412 }
413
414 of_node_put(root);
415 pci_devs_phb_init();
416
417 /*
418 * pci_probe_only and pci_assign_all_buses can be set via properties
419 * in chosen.
420 */
421 if (of_chosen) {
422 int *prop;
423
424 prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
425 NULL);
426 if (prop)
427 pci_probe_only = *prop;
428
429 prop = (int *)get_property(of_chosen,
430 "linux,pci-assign-all-buses", NULL);
431 if (prop)
432 pci_assign_all_buses = *prop;
433 }
434
435 return 0;
436}
437
438struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
439{
440 struct device_node *root = of_find_node_by_path("/");
441 unsigned int root_size_cells = 0;
442 struct pci_controller *phb;
443 int primary;
444
445 root_size_cells = prom_n_size_cells(root);
446
447 primary = list_empty(&hose_list);
448 phb = alloc_phb_dynamic(dn, root_size_cells);
449 if (!phb)
450 return NULL;
451
452 pci_process_bridge_OF_ranges(phb, dn, primary);
453
454 pci_setup_phb_io_dynamic(phb, primary);
455 of_node_put(root);
456
457 pci_devs_phb_init_dynamic(phb);
458 scan_phb(phb);
459
460 return phb;
461}
462EXPORT_SYMBOL(init_phb_dynamic);
463
464/* RPA-specific bits for removing PHBs */
465int pcibios_remove_root_bus(struct pci_controller *phb)
466{
467 struct pci_bus *b = phb->bus;
468 struct resource *res;
469 int rc, i;
470
471 res = b->resource[0];
472 if (!res->flags) {
473 printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
474 b->name);
475 return 1;
476 }
477
478 rc = unmap_bus_range(b);
479 if (rc) {
480 printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
481 __FUNCTION__, b->name);
482 return 1;
483 }
484
485 if (release_resource(res)) {
486 printk(KERN_ERR "%s: failed to release IO on bus %s\n",
487 __FUNCTION__, b->name);
488 return 1;
489 }
490
491 for (i = 1; i < 3; ++i) {
492 res = b->resource[i];
493 if (!res->flags && i == 0) {
494 printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
495 __FUNCTION__, b->name);
496 return 1;
497 }
498 if (res->flags && release_resource(res)) {
499 printk(KERN_ERR
500 "%s: failed to release IO %d on bus %s\n",
501 __FUNCTION__, i, b->name);
502 return 1;
503 }
504 }
505
506 list_del(&phb->list_node);
507 if (phb->is_dynamic)
508 kfree(phb);
509
510 return 0;
511}
512EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/ppc64/kernel/scanlog.c b/arch/ppc64/kernel/scanlog.c
deleted file mode 100644
index 2edc947f7c44..000000000000
--- a/arch/ppc64/kernel/scanlog.c
+++ /dev/null
@@ -1,235 +0,0 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com>
10 *
11 * When ppc64 hardware fails the service processor dumps internal state
12 * of the system. After a reboot the operating system can access a dump
13 * of this data using this driver. A dump exists if the device-tree
14 * /chosen/ibm,scan-log-data property exists.
15 *
16 * This driver exports /proc/ppc64/scan-log-dump which can be read.
17 * The driver supports only sequential reads.
18 *
19 * The driver looks at a write to the driver for the single word "reset".
20 * If given, the driver will reset the scanlog so the platform can free it.
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/errno.h>
26#include <linux/proc_fs.h>
27#include <linux/init.h>
28#include <linux/delay.h>
29#include <asm/uaccess.h>
30#include <asm/rtas.h>
31#include <asm/prom.h>
32
33#define MODULE_VERS "1.0"
34#define MODULE_NAME "scanlog"
35
36/* Status returns from ibm,scan-log-dump */
37#define SCANLOG_COMPLETE 0
38#define SCANLOG_HWERROR -1
39#define SCANLOG_CONTINUE 1
40
41#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
42
43static int scanlog_debug;
44static unsigned int ibm_scan_log_dump; /* RTAS token */
45static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */
46
47static ssize_t scanlog_read(struct file *file, char __user *buf,
48 size_t count, loff_t *ppos)
49{
50 struct inode * inode = file->f_dentry->d_inode;
51 struct proc_dir_entry *dp;
52 unsigned int *data;
53 int status;
54 unsigned long len, off;
55 unsigned int wait_time;
56
57 dp = PDE(inode);
58 data = (unsigned int *)dp->data;
59
60 if (!data) {
61 printk(KERN_ERR "scanlog: read failed no data\n");
62 return -EIO;
63 }
64
65 if (count > RTAS_DATA_BUF_SIZE)
66 count = RTAS_DATA_BUF_SIZE;
67
68 if (count < 1024) {
69 /* This is the min supported by this RTAS call. Rather
70 * than do all the buffering we insist the user code handle
71 * larger reads. As long as cp works... :)
72 */
73 printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
74 return -EINVAL;
75 }
76
77 if (!access_ok(VERIFY_WRITE, buf, count))
78 return -EFAULT;
79
80 for (;;) {
81 wait_time = 500; /* default wait if no data */
82 spin_lock(&rtas_data_buf_lock);
83 memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
84 status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
85 (u32) __pa(rtas_data_buf), (u32) count);
86 memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
87 spin_unlock(&rtas_data_buf_lock);
88
89 DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n",
90 status, data[0], data[1], data[2]);
91 switch (status) {
92 case SCANLOG_COMPLETE:
93 DEBUG("hit eof\n");
94 return 0;
95 case SCANLOG_HWERROR:
96 DEBUG("hardware error reading scan log data\n");
97 return -EIO;
98 case SCANLOG_CONTINUE:
99 /* We may or may not have data yet */
100 len = data[1];
101 off = data[2];
102 if (len > 0) {
103 if (copy_to_user(buf, ((char *)data)+off, len))
104 return -EFAULT;
105 return len;
106 }
107 /* Break to sleep default time */
108 break;
109 default:
110 if (status > 9900 && status <= 9905) {
111 wait_time = rtas_extended_busy_delay_time(status);
112 } else {
113 printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status);
114 return -EIO;
115 }
116 }
117 /* Apparently no data yet. Wait and try again. */
118 msleep_interruptible(wait_time);
119 }
120 /*NOTREACHED*/
121}
122
123static ssize_t scanlog_write(struct file * file, const char __user * buf,
124 size_t count, loff_t *ppos)
125{
126 char stkbuf[20];
127 int status;
128
129 if (count > 19) count = 19;
130 if (copy_from_user (stkbuf, buf, count)) {
131 return -EFAULT;
132 }
133 stkbuf[count] = 0;
134
135 if (buf) {
136 if (strncmp(stkbuf, "reset", 5) == 0) {
137 DEBUG("reset scanlog\n");
138 status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
139 DEBUG("rtas returns %d\n", status);
140 } else if (strncmp(stkbuf, "debugon", 7) == 0) {
141 printk(KERN_ERR "scanlog: debug on\n");
142 scanlog_debug = 1;
143 } else if (strncmp(stkbuf, "debugoff", 8) == 0) {
144 printk(KERN_ERR "scanlog: debug off\n");
145 scanlog_debug = 0;
146 }
147 }
148 return count;
149}
150
151static int scanlog_open(struct inode * inode, struct file * file)
152{
153 struct proc_dir_entry *dp = PDE(inode);
154 unsigned int *data = (unsigned int *)dp->data;
155
156 if (!data) {
157 printk(KERN_ERR "scanlog: open failed no data\n");
158 return -EIO;
159 }
160
161 if (data[0] != 0) {
162 /* This imperfect test stops a second copy of the
163 * data (or a reset while data is being copied)
164 */
165 return -EBUSY;
166 }
167
168 data[0] = 0; /* re-init so we restart the scan */
169
170 return 0;
171}
172
173static int scanlog_release(struct inode * inode, struct file * file)
174{
175 struct proc_dir_entry *dp = PDE(inode);
176 unsigned int *data = (unsigned int *)dp->data;
177
178 if (!data) {
179 printk(KERN_ERR "scanlog: release failed no data\n");
180 return -EIO;
181 }
182 data[0] = 0;
183
184 return 0;
185}
186
187struct file_operations scanlog_fops = {
188 .owner = THIS_MODULE,
189 .read = scanlog_read,
190 .write = scanlog_write,
191 .open = scanlog_open,
192 .release = scanlog_release,
193};
194
195int __init scanlog_init(void)
196{
197 struct proc_dir_entry *ent;
198
199 ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
200 if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) {
201 printk(KERN_ERR "scan-log-dump not implemented on this system\n");
202 return -EIO;
203 }
204
205 ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL);
206 if (ent) {
207 ent->proc_fops = &scanlog_fops;
208 /* Ideally we could allocate a buffer < 4G */
209 ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
210 if (!ent->data) {
211 printk(KERN_ERR "Failed to allocate a buffer\n");
212 remove_proc_entry("scan-log-dump", ent->parent);
213 return -ENOMEM;
214 }
215 ((unsigned int *)ent->data)[0] = 0;
216 } else {
217 printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n");
218 return -EIO;
219 }
220 proc_ppc64_scan_log_dump = ent;
221
222 return 0;
223}
224
225void __exit scanlog_cleanup(void)
226{
227 if (proc_ppc64_scan_log_dump) {
228 kfree(proc_ppc64_scan_log_dump->data);
229 remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent);
230 }
231}
232
233module_init(scanlog_init);
234module_exit(scanlog_cleanup);
235MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c
deleted file mode 100644
index e99ec62c2c52..000000000000
--- a/arch/ppc64/kernel/sysfs.c
+++ /dev/null
@@ -1,384 +0,0 @@
1#include <linux/config.h>
2#include <linux/sysdev.h>
3#include <linux/cpu.h>
4#include <linux/smp.h>
5#include <linux/percpu.h>
6#include <linux/init.h>
7#include <linux/sched.h>
8#include <linux/module.h>
9#include <linux/nodemask.h>
10#include <linux/cpumask.h>
11#include <linux/notifier.h>
12
13#include <asm/current.h>
14#include <asm/processor.h>
15#include <asm/cputable.h>
16#include <asm/firmware.h>
17#include <asm/hvcall.h>
18#include <asm/prom.h>
19#include <asm/systemcfg.h>
20#include <asm/paca.h>
21#include <asm/lppaca.h>
22#include <asm/machdep.h>
23#include <asm/smp.h>
24
25static DEFINE_PER_CPU(struct cpu, cpu_devices);
26
27/* SMT stuff */
28
29#ifdef CONFIG_PPC_MULTIPLATFORM
30/* default to snooze disabled */
31DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
32
33static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
34 size_t count)
35{
36 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
37 ssize_t ret;
38 unsigned long snooze;
39
40 ret = sscanf(buf, "%lu", &snooze);
41 if (ret != 1)
42 return -EINVAL;
43
44 per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
45
46 return count;
47}
48
49static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
50{
51 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
52
53 return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
54}
55
56static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
57 store_smt_snooze_delay);
58
59/* Only parse OF options if the matching cmdline option was not specified */
60static int smt_snooze_cmdline;
61
62static int __init smt_setup(void)
63{
64 struct device_node *options;
65 unsigned int *val;
66 unsigned int cpu;
67
68 if (!cpu_has_feature(CPU_FTR_SMT))
69 return 1;
70
71 options = find_path_device("/options");
72 if (!options)
73 return 1;
74
75 val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
76 NULL);
77 if (!smt_snooze_cmdline && val) {
78 for_each_cpu(cpu)
79 per_cpu(smt_snooze_delay, cpu) = *val;
80 }
81
82 return 1;
83}
84__initcall(smt_setup);
85
86static int __init setup_smt_snooze_delay(char *str)
87{
88 unsigned int cpu;
89 int snooze;
90
91 if (!cpu_has_feature(CPU_FTR_SMT))
92 return 1;
93
94 smt_snooze_cmdline = 1;
95
96 if (get_option(&str, &snooze)) {
97 for_each_cpu(cpu)
98 per_cpu(smt_snooze_delay, cpu) = snooze;
99 }
100
101 return 1;
102}
103__setup("smt-snooze-delay=", setup_smt_snooze_delay);
104
105#endif /* CONFIG_PPC_MULTIPLATFORM */
106
107/*
108 * Enabling PMCs will slow partition context switch times so we only do
109 * it the first time we write to the PMCs.
110 */
111
112static DEFINE_PER_CPU(char, pmcs_enabled);
113
114void ppc64_enable_pmcs(void)
115{
116 /* Only need to enable them once */
117 if (__get_cpu_var(pmcs_enabled))
118 return;
119
120 __get_cpu_var(pmcs_enabled) = 1;
121
122 if (ppc_md.enable_pmcs)
123 ppc_md.enable_pmcs();
124}
125EXPORT_SYMBOL(ppc64_enable_pmcs);
126
127/* XXX convert to rusty's on_one_cpu */
128static unsigned long run_on_cpu(unsigned long cpu,
129 unsigned long (*func)(unsigned long),
130 unsigned long arg)
131{
132 cpumask_t old_affinity = current->cpus_allowed;
133 unsigned long ret;
134
135 /* should return -EINVAL to userspace */
136 if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
137 return 0;
138
139 ret = func(arg);
140
141 set_cpus_allowed(current, old_affinity);
142
143 return ret;
144}
145
146#define SYSFS_PMCSETUP(NAME, ADDRESS) \
147static unsigned long read_##NAME(unsigned long junk) \
148{ \
149 return mfspr(ADDRESS); \
150} \
151static unsigned long write_##NAME(unsigned long val) \
152{ \
153 ppc64_enable_pmcs(); \
154 mtspr(ADDRESS, val); \
155 return 0; \
156} \
157static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
158{ \
159 struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
160 unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
161 return sprintf(buf, "%lx\n", val); \
162} \
163static ssize_t __attribute_used__ \
164 store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
165{ \
166 struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
167 unsigned long val; \
168 int ret = sscanf(buf, "%lx", &val); \
169 if (ret != 1) \
170 return -EINVAL; \
171 run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
172 return count; \
173}
174
175SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
176SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
177SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
178SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
179SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
180SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
181SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
182SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
183SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
184SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
185SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
186SYSFS_PMCSETUP(purr, SPRN_PURR);
187
188static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
189static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
190static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
191static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
192static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
193static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
194static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
195static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
196static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
197static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
198static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
199static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
200
201static void register_cpu_online(unsigned int cpu)
202{
203 struct cpu *c = &per_cpu(cpu_devices, cpu);
204 struct sys_device *s = &c->sysdev;
205
206#ifndef CONFIG_PPC_ISERIES
207 if (cpu_has_feature(CPU_FTR_SMT))
208 sysdev_create_file(s, &attr_smt_snooze_delay);
209#endif
210
211 /* PMC stuff */
212
213 sysdev_create_file(s, &attr_mmcr0);
214 sysdev_create_file(s, &attr_mmcr1);
215
216 if (cpu_has_feature(CPU_FTR_MMCRA))
217 sysdev_create_file(s, &attr_mmcra);
218
219 if (cur_cpu_spec->num_pmcs >= 1)
220 sysdev_create_file(s, &attr_pmc1);
221 if (cur_cpu_spec->num_pmcs >= 2)
222 sysdev_create_file(s, &attr_pmc2);
223 if (cur_cpu_spec->num_pmcs >= 3)
224 sysdev_create_file(s, &attr_pmc3);
225 if (cur_cpu_spec->num_pmcs >= 4)
226 sysdev_create_file(s, &attr_pmc4);
227 if (cur_cpu_spec->num_pmcs >= 5)
228 sysdev_create_file(s, &attr_pmc5);
229 if (cur_cpu_spec->num_pmcs >= 6)
230 sysdev_create_file(s, &attr_pmc6);
231 if (cur_cpu_spec->num_pmcs >= 7)
232 sysdev_create_file(s, &attr_pmc7);
233 if (cur_cpu_spec->num_pmcs >= 8)
234 sysdev_create_file(s, &attr_pmc8);
235
236 if (cpu_has_feature(CPU_FTR_SMT))
237 sysdev_create_file(s, &attr_purr);
238}
239
240#ifdef CONFIG_HOTPLUG_CPU
241static void unregister_cpu_online(unsigned int cpu)
242{
243 struct cpu *c = &per_cpu(cpu_devices, cpu);
244 struct sys_device *s = &c->sysdev;
245
246 BUG_ON(c->no_control);
247
248#ifndef CONFIG_PPC_ISERIES
249 if (cpu_has_feature(CPU_FTR_SMT))
250 sysdev_remove_file(s, &attr_smt_snooze_delay);
251#endif
252
253 /* PMC stuff */
254
255 sysdev_remove_file(s, &attr_mmcr0);
256 sysdev_remove_file(s, &attr_mmcr1);
257
258 if (cpu_has_feature(CPU_FTR_MMCRA))
259 sysdev_remove_file(s, &attr_mmcra);
260
261 if (cur_cpu_spec->num_pmcs >= 1)
262 sysdev_remove_file(s, &attr_pmc1);
263 if (cur_cpu_spec->num_pmcs >= 2)
264 sysdev_remove_file(s, &attr_pmc2);
265 if (cur_cpu_spec->num_pmcs >= 3)
266 sysdev_remove_file(s, &attr_pmc3);
267 if (cur_cpu_spec->num_pmcs >= 4)
268 sysdev_remove_file(s, &attr_pmc4);
269 if (cur_cpu_spec->num_pmcs >= 5)
270 sysdev_remove_file(s, &attr_pmc5);
271 if (cur_cpu_spec->num_pmcs >= 6)
272 sysdev_remove_file(s, &attr_pmc6);
273 if (cur_cpu_spec->num_pmcs >= 7)
274 sysdev_remove_file(s, &attr_pmc7);
275 if (cur_cpu_spec->num_pmcs >= 8)
276 sysdev_remove_file(s, &attr_pmc8);
277
278 if (cpu_has_feature(CPU_FTR_SMT))
279 sysdev_remove_file(s, &attr_purr);
280}
281#endif /* CONFIG_HOTPLUG_CPU */
282
283static int __devinit sysfs_cpu_notify(struct notifier_block *self,
284 unsigned long action, void *hcpu)
285{
286 unsigned int cpu = (unsigned int)(long)hcpu;
287
288 switch (action) {
289 case CPU_ONLINE:
290 register_cpu_online(cpu);
291 break;
292#ifdef CONFIG_HOTPLUG_CPU
293 case CPU_DEAD:
294 unregister_cpu_online(cpu);
295 break;
296#endif
297 }
298 return NOTIFY_OK;
299}
300
301static struct notifier_block __devinitdata sysfs_cpu_nb = {
302 .notifier_call = sysfs_cpu_notify,
303};
304
305/* NUMA stuff */
306
307#ifdef CONFIG_NUMA
308static struct node node_devices[MAX_NUMNODES];
309
310static void register_nodes(void)
311{
312 int i;
313
314 for (i = 0; i < MAX_NUMNODES; i++) {
315 if (node_online(i)) {
316 int p_node = parent_node(i);
317 struct node *parent = NULL;
318
319 if (p_node != i)
320 parent = &node_devices[p_node];
321
322 register_node(&node_devices[i], i, parent);
323 }
324 }
325}
326#else
327static void register_nodes(void)
328{
329 return;
330}
331#endif
332
333/* Only valid if CPU is present. */
334static ssize_t show_physical_id(struct sys_device *dev, char *buf)
335{
336 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
337
338 return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
339}
340static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
341
342static int __init topology_init(void)
343{
344 int cpu;
345 struct node *parent = NULL;
346
347 register_nodes();
348
349 register_cpu_notifier(&sysfs_cpu_nb);
350
351 for_each_cpu(cpu) {
352 struct cpu *c = &per_cpu(cpu_devices, cpu);
353
354#ifdef CONFIG_NUMA
355 /* The node to which a cpu belongs can't be known
356 * until the cpu is made present.
357 */
358 parent = NULL;
359 if (cpu_present(cpu))
360 parent = &node_devices[cpu_to_node(cpu)];
361#endif
362 /*
363 * For now, we just see if the system supports making
364 * the RTAS calls for CPU hotplug. But, there may be a
365 * more comprehensive way to do this for an individual
366 * CPU. For instance, the boot cpu might never be valid
367 * for hotplugging.
368 */
369 if (!ppc_md.cpu_die)
370 c->no_control = 1;
371
372 if (cpu_online(cpu) || (c->no_control == 0)) {
373 register_cpu(c, cpu, parent);
374
375 sysdev_create_file(&c->sysdev, &attr_physical_id);
376 }
377
378 if (cpu_online(cpu))
379 register_cpu_online(cpu);
380 }
381
382 return 0;
383}
384__initcall(topology_init);
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
index 4aacf521e3e4..1bbacac44988 100644
--- a/arch/ppc64/kernel/vdso.c
+++ b/arch/ppc64/kernel/vdso.c
@@ -34,6 +34,7 @@
34#include <asm/machdep.h> 34#include <asm/machdep.h>
35#include <asm/cputable.h> 35#include <asm/cputable.h>
36#include <asm/sections.h> 36#include <asm/sections.h>
37#include <asm/systemcfg.h>
37#include <asm/vdso.h> 38#include <asm/vdso.h>
38 39
39#undef DEBUG 40#undef DEBUG
@@ -179,7 +180,7 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
179 * Last page is systemcfg. 180 * Last page is systemcfg.
180 */ 181 */
181 if ((vma->vm_end - address) <= PAGE_SIZE) 182 if ((vma->vm_end - address) <= PAGE_SIZE)
182 pg = virt_to_page(systemcfg); 183 pg = virt_to_page(_systemcfg);
183 else 184 else
184 pg = virt_to_page(vbase + offset); 185 pg = virt_to_page(vbase + offset);
185 186
@@ -604,7 +605,7 @@ void __init vdso_init(void)
604 get_page(pg); 605 get_page(pg);
605 } 606 }
606 607
607 get_page(virt_to_page(systemcfg)); 608 get_page(virt_to_page(_systemcfg));
608} 609}
609 610
610int in_gate_area_no_task(unsigned long addr) 611int in_gate_area_no_task(unsigned long addr)