aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2005-10-10 08:03:41 -0400
committerPaul Mackerras <paulus@samba.org>2005-10-10 08:03:41 -0400
commit69a80d3f69d0b2d7fae5a73c6e034d402d434d8a (patch)
treedd66f34510eae791a04c8dd77ce77a223b2f46d6 /arch/powerpc
parentab1f9dac6eea25ee59e4c8e1cf0b7476afbbfe07 (diff)
powerpc: move pSeries files to arch/powerpc/platforms/pseries
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/platforms/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/Makefile4
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S131
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c606
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c517
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c148
-rw-r--r--arch/powerpc/platforms/pseries/pci.c142
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c426
-rw-r--r--arch/powerpc/platforms/pseries/setup.c622
-rw-r--r--arch/powerpc/platforms/pseries/smp.c471
-rw-r--r--arch/powerpc/platforms/pseries/vio.c274
11 files changed, 3342 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 7637ff3642c3..181ae612b2d3 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -4,4 +4,5 @@ endif
4obj-$(CONFIG_4xx) += 4xx/ 4obj-$(CONFIG_4xx) += 4xx/
5obj-$(CONFIG_83xx) += 83xx/ 5obj-$(CONFIG_83xx) += 83xx/
6obj-$(CONFIG_85xx) += 85xx/ 6obj-$(CONFIG_85xx) += 85xx/
7obj-$(CONFIG_PPC_PSERIES) += pseries/
7obj-$(CONFIG_PPC_ISERIES) += iseries/ 8obj-$(CONFIG_PPC_ISERIES) += iseries/
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
new file mode 100644
index 000000000000..9ebb34180a10
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -0,0 +1,4 @@
1obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \
2 setup.o iommu.o
3obj-$(CONFIG_SMP) += smp.o
4obj-$(CONFIG_IBMVIO) += vio.o
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
new file mode 100644
index 000000000000..176e8da76466
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -0,0 +1,131 @@
1/*
2 * arch/ppc64/kernel/pSeries_hvCall.S
3 *
4 * This file contains the generic code to perform a call to the
5 * pSeries LPAR hypervisor.
6 * NOTE: this file will go away when we move to inline this work.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <asm/hvcall.h>
14#include <asm/processor.h>
15#include <asm/ppc_asm.h>
16
17#define STK_PARM(i) (48 + ((i)-3)*8)
18
19 .text
20
21/* long plpar_hcall(unsigned long opcode, R3
22 unsigned long arg1, R4
23 unsigned long arg2, R5
24 unsigned long arg3, R6
25 unsigned long arg4, R7
26 unsigned long *out1, R8
27 unsigned long *out2, R9
28 unsigned long *out3); R10
29 */
30_GLOBAL(plpar_hcall)
31 HMT_MEDIUM
32
33 mfcr r0
34
35 std r8,STK_PARM(r8)(r1) /* Save out ptrs */
36 std r9,STK_PARM(r9)(r1)
37 std r10,STK_PARM(r10)(r1)
38
39 stw r0,8(r1)
40
41 HVSC /* invoke the hypervisor */
42
43 lwz r0,8(r1)
44
45 ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */
46 ld r9,STK_PARM(r9)(r1)
47 ld r10,STK_PARM(r10)(r1)
48 std r4,0(r8)
49 std r5,0(r9)
50 std r6,0(r10)
51
52 mtcrf 0xff,r0
53 blr /* return r3 = status */
54
55
56/* Simple interface with no output values (other than status) */
57_GLOBAL(plpar_hcall_norets)
58 HMT_MEDIUM
59
60 mfcr r0
61 stw r0,8(r1)
62
63 HVSC /* invoke the hypervisor */
64
65 lwz r0,8(r1)
66 mtcrf 0xff,r0
67 blr /* return r3 = status */
68
69
70/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3
71 unsigned long arg1, R4
72 unsigned long arg2, R5
73 unsigned long arg3, R6
74 unsigned long arg4, R7
75 unsigned long arg5, R8
76 unsigned long arg6, R9
77 unsigned long arg7, R10
78 unsigned long arg8, 112(R1)
79 unsigned long *out1); 120(R1)
80 */
81_GLOBAL(plpar_hcall_8arg_2ret)
82 HMT_MEDIUM
83
84 mfcr r0
85 ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */
86 stw r0,8(r1)
87
88 HVSC /* invoke the hypervisor */
89
90 lwz r0,8(r1)
91 ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */
92 std r4,0(r10)
93 mtcrf 0xff,r0
94 blr /* return r3 = status */
95
96
97/* long plpar_hcall_4out(unsigned long opcode, R3
98 unsigned long arg1, R4
99 unsigned long arg2, R5
100 unsigned long arg3, R6
101 unsigned long arg4, R7
102 unsigned long *out1, R8
103 unsigned long *out2, R9
104 unsigned long *out3, R10
105 unsigned long *out4); 112(R1)
106 */
107_GLOBAL(plpar_hcall_4out)
108 HMT_MEDIUM
109
110 mfcr r0
111 stw r0,8(r1)
112
113 std r8,STK_PARM(r8)(r1) /* Save out ptrs */
114 std r9,STK_PARM(r9)(r1)
115 std r10,STK_PARM(r10)(r1)
116
117 HVSC /* invoke the hypervisor */
118
119 lwz r0,8(r1)
120
121 ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */
122 ld r9,STK_PARM(r9)(r1)
123 ld r10,STK_PARM(r10)(r1)
124 ld r11,STK_PARM(r11)(r1)
125 std r4,0(r8)
126 std r5,0(r9)
127 std r6,0(r10)
128 std r7,0(r11)
129
130 mtcrf 0xff,r0
131 blr /* return r3 = status */
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
new file mode 100644
index 000000000000..9e90d41131d8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -0,0 +1,606 @@
1/*
2 * arch/ppc64/kernel/pSeries_iommu.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
5 *
6 * Rewrite, cleanup:
7 *
8 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9 *
10 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/config.h>
29#include <linux/init.h>
30#include <linux/types.h>
31#include <linux/slab.h>
32#include <linux/mm.h>
33#include <linux/spinlock.h>
34#include <linux/string.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <asm/io.h>
38#include <asm/prom.h>
39#include <asm/rtas.h>
40#include <asm/ppcdebug.h>
41#include <asm/iommu.h>
42#include <asm/pci-bridge.h>
43#include <asm/machdep.h>
44#include <asm/abs_addr.h>
45#include <asm/plpar_wrappers.h>
46#include <asm/pSeries_reconfig.h>
47#include <asm/systemcfg.h>
48#include <asm/firmware.h>
49#include <asm/tce.h>
50#include <asm/ppc-pci.h>
51
52#define DBG(fmt...)
53
54extern int is_python(struct device_node *);
55
56static void tce_build_pSeries(struct iommu_table *tbl, long index,
57 long npages, unsigned long uaddr,
58 enum dma_data_direction direction)
59{
60 union tce_entry t;
61 union tce_entry *tp;
62
63 index <<= TCE_PAGE_FACTOR;
64 npages <<= TCE_PAGE_FACTOR;
65
66 t.te_word = 0;
67 t.te_rdwr = 1; // Read allowed
68
69 if (direction != DMA_TO_DEVICE)
70 t.te_pciwr = 1;
71
72 tp = ((union tce_entry *)tbl->it_base) + index;
73
74 while (npages--) {
75 /* can't move this out since we might cross LMB boundary */
76 t.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
77
78 tp->te_word = t.te_word;
79
80 uaddr += TCE_PAGE_SIZE;
81 tp++;
82 }
83}
84
85
86static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
87{
88 union tce_entry t;
89 union tce_entry *tp;
90
91 npages <<= TCE_PAGE_FACTOR;
92 index <<= TCE_PAGE_FACTOR;
93
94 t.te_word = 0;
95 tp = ((union tce_entry *)tbl->it_base) + index;
96
97 while (npages--) {
98 tp->te_word = t.te_word;
99
100 tp++;
101 }
102}
103
104
105static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
106 long npages, unsigned long uaddr,
107 enum dma_data_direction direction)
108{
109 u64 rc;
110 union tce_entry tce;
111
112 tce.te_word = 0;
113 tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
114 tce.te_rdwr = 1;
115 if (direction != DMA_TO_DEVICE)
116 tce.te_pciwr = 1;
117
118 while (npages--) {
119 rc = plpar_tce_put((u64)tbl->it_index,
120 (u64)tcenum << 12,
121 tce.te_word );
122
123 if (rc && printk_ratelimit()) {
124 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
125 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
126 printk("\ttcenum = 0x%lx\n", (u64)tcenum);
127 printk("\ttce val = 0x%lx\n", tce.te_word );
128 show_stack(current, (unsigned long *)__get_SP());
129 }
130
131 tcenum++;
132 tce.te_rpn++;
133 }
134}
135
136static DEFINE_PER_CPU(void *, tce_page) = NULL;
137
138static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
139 long npages, unsigned long uaddr,
140 enum dma_data_direction direction)
141{
142 u64 rc;
143 union tce_entry tce, *tcep;
144 long l, limit;
145
146 tcenum <<= TCE_PAGE_FACTOR;
147 npages <<= TCE_PAGE_FACTOR;
148
149 if (npages == 1)
150 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
151 direction);
152
153 tcep = __get_cpu_var(tce_page);
154
155 /* This is safe to do since interrupts are off when we're called
156 * from iommu_alloc{,_sg}()
157 */
158 if (!tcep) {
159 tcep = (void *)__get_free_page(GFP_ATOMIC);
160 /* If allocation fails, fall back to the loop implementation */
161 if (!tcep)
162 return tce_build_pSeriesLP(tbl, tcenum, npages,
163 uaddr, direction);
164 __get_cpu_var(tce_page) = tcep;
165 }
166
167 tce.te_word = 0;
168 tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
169 tce.te_rdwr = 1;
170 if (direction != DMA_TO_DEVICE)
171 tce.te_pciwr = 1;
172
173 /* We can map max one pageful of TCEs at a time */
174 do {
175 /*
176 * Set up the page with TCE data, looping through and setting
177 * the values.
178 */
179 limit = min_t(long, npages, 4096/sizeof(union tce_entry));
180
181 for (l = 0; l < limit; l++) {
182 tcep[l] = tce;
183 tce.te_rpn++;
184 }
185
186 rc = plpar_tce_put_indirect((u64)tbl->it_index,
187 (u64)tcenum << 12,
188 (u64)virt_to_abs(tcep),
189 limit);
190
191 npages -= limit;
192 tcenum += limit;
193 } while (npages > 0 && !rc);
194
195 if (rc && printk_ratelimit()) {
196 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
197 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
198 printk("\tnpages = 0x%lx\n", (u64)npages);
199 printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word);
200 show_stack(current, (unsigned long *)__get_SP());
201 }
202}
203
204static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
205{
206 u64 rc;
207 union tce_entry tce;
208
209 tcenum <<= TCE_PAGE_FACTOR;
210 npages <<= TCE_PAGE_FACTOR;
211
212 tce.te_word = 0;
213
214 while (npages--) {
215 rc = plpar_tce_put((u64)tbl->it_index,
216 (u64)tcenum << 12,
217 tce.te_word);
218
219 if (rc && printk_ratelimit()) {
220 printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
221 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
222 printk("\ttcenum = 0x%lx\n", (u64)tcenum);
223 printk("\ttce val = 0x%lx\n", tce.te_word );
224 show_stack(current, (unsigned long *)__get_SP());
225 }
226
227 tcenum++;
228 }
229}
230
231
232static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
233{
234 u64 rc;
235 union tce_entry tce;
236
237 tcenum <<= TCE_PAGE_FACTOR;
238 npages <<= TCE_PAGE_FACTOR;
239
240 tce.te_word = 0;
241
242 rc = plpar_tce_stuff((u64)tbl->it_index,
243 (u64)tcenum << 12,
244 tce.te_word,
245 npages);
246
247 if (rc && printk_ratelimit()) {
248 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
249 printk("\trc = %ld\n", rc);
250 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
251 printk("\tnpages = 0x%lx\n", (u64)npages);
252 printk("\ttce val = 0x%lx\n", tce.te_word );
253 show_stack(current, (unsigned long *)__get_SP());
254 }
255}
256
257static void iommu_table_setparms(struct pci_controller *phb,
258 struct device_node *dn,
259 struct iommu_table *tbl)
260{
261 struct device_node *node;
262 unsigned long *basep;
263 unsigned int *sizep;
264
265 node = (struct device_node *)phb->arch_data;
266
267 basep = (unsigned long *)get_property(node, "linux,tce-base", NULL);
268 sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL);
269 if (basep == NULL || sizep == NULL) {
270 printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
271 "missing tce entries !\n", dn->full_name);
272 return;
273 }
274
275 tbl->it_base = (unsigned long)__va(*basep);
276 memset((void *)tbl->it_base, 0, *sizep);
277
278 tbl->it_busno = phb->bus->number;
279
280 /* Units of tce entries */
281 tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
282
283 /* Test if we are going over 2GB of DMA space */
284 if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
285 udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
286 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
287 }
288
289 phb->dma_window_base_cur += phb->dma_window_size;
290
291 /* Set the tce table size - measured in entries */
292 tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
293
294 tbl->it_index = 0;
295 tbl->it_blocksize = 16;
296 tbl->it_type = TCE_PCI;
297}
298
299/*
300 * iommu_table_setparms_lpar
301 *
302 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
303 *
304 * ToDo: properly interpret the ibm,dma-window property. The definition is:
305 * logical-bus-number (1 word)
306 * phys-address (#address-cells words)
307 * size (#cell-size words)
308 *
309 * Currently we hard code these sizes (more or less).
310 */
311static void iommu_table_setparms_lpar(struct pci_controller *phb,
312 struct device_node *dn,
313 struct iommu_table *tbl,
314 unsigned int *dma_window)
315{
316 tbl->it_busno = PCI_DN(dn)->bussubno;
317
318 /* TODO: Parse field size properties properly. */
319 tbl->it_size = (((unsigned long)dma_window[4] << 32) |
320 (unsigned long)dma_window[5]) >> PAGE_SHIFT;
321 tbl->it_offset = (((unsigned long)dma_window[2] << 32) |
322 (unsigned long)dma_window[3]) >> PAGE_SHIFT;
323 tbl->it_base = 0;
324 tbl->it_index = dma_window[0];
325 tbl->it_blocksize = 16;
326 tbl->it_type = TCE_PCI;
327}
328
329static void iommu_bus_setup_pSeries(struct pci_bus *bus)
330{
331 struct device_node *dn;
332 struct iommu_table *tbl;
333 struct device_node *isa_dn, *isa_dn_orig;
334 struct device_node *tmp;
335 struct pci_dn *pci;
336 int children;
337
338 DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);
339
340 dn = pci_bus_to_OF_node(bus);
341 pci = PCI_DN(dn);
342
343 if (bus->self) {
344 /* This is not a root bus, any setup will be done for the
345 * device-side of the bridge in iommu_dev_setup_pSeries().
346 */
347 return;
348 }
349
350 /* Check if the ISA bus on the system is under
351 * this PHB.
352 */
353 isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
354
355 while (isa_dn && isa_dn != dn)
356 isa_dn = isa_dn->parent;
357
358 if (isa_dn_orig)
359 of_node_put(isa_dn_orig);
360
361 /* Count number of direct PCI children of the PHB.
362 * All PCI device nodes have class-code property, so it's
363 * an easy way to find them.
364 */
365 for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
366 if (get_property(tmp, "class-code", NULL))
367 children++;
368
369 DBG("Children: %d\n", children);
370
371 /* Calculate amount of DMA window per slot. Each window must be
372 * a power of two (due to pci_alloc_consistent requirements).
373 *
374 * Keep 256MB aside for PHBs with ISA.
375 */
376
377 if (!isa_dn) {
378 /* No ISA/IDE - just set window size and return */
379 pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
380
381 while (pci->phb->dma_window_size * children > 0x80000000ul)
382 pci->phb->dma_window_size >>= 1;
383 DBG("No ISA/IDE, window size is 0x%lx\n",
384 pci->phb->dma_window_size);
385 pci->phb->dma_window_base_cur = 0;
386
387 return;
388 }
389
390 /* If we have ISA, then we probably have an IDE
391 * controller too. Allocate a 128MB table but
392 * skip the first 128MB to avoid stepping on ISA
393 * space.
394 */
395 pci->phb->dma_window_size = 0x8000000ul;
396 pci->phb->dma_window_base_cur = 0x8000000ul;
397
398 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
399
400 iommu_table_setparms(pci->phb, dn, tbl);
401 pci->iommu_table = iommu_init_table(tbl);
402
403 /* Divide the rest (1.75GB) among the children */
404 pci->phb->dma_window_size = 0x80000000ul;
405 while (pci->phb->dma_window_size * children > 0x70000000ul)
406 pci->phb->dma_window_size >>= 1;
407
408 DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
409
410}
411
412
413static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
414{
415 struct iommu_table *tbl;
416 struct device_node *dn, *pdn;
417 struct pci_dn *ppci;
418 unsigned int *dma_window = NULL;
419
420 DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self);
421
422 dn = pci_bus_to_OF_node(bus);
423
424 /* Find nearest ibm,dma-window, walking up the device tree */
425 for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
426 dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
427 if (dma_window != NULL)
428 break;
429 }
430
431 if (dma_window == NULL) {
432 DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name);
433 return;
434 }
435
436 ppci = pdn->data;
437 if (!ppci->iommu_table) {
438 /* Bussubno hasn't been copied yet.
439 * Do it now because iommu_table_setparms_lpar needs it.
440 */
441
442 ppci->bussubno = bus->number;
443
444 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
445 GFP_KERNEL);
446
447 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
448
449 ppci->iommu_table = iommu_init_table(tbl);
450 }
451
452 if (pdn != dn)
453 PCI_DN(dn)->iommu_table = ppci->iommu_table;
454}
455
456
457static void iommu_dev_setup_pSeries(struct pci_dev *dev)
458{
459 struct device_node *dn, *mydn;
460 struct iommu_table *tbl;
461
462 DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev));
463
464 mydn = dn = pci_device_to_OF_node(dev);
465
466 /* If we're the direct child of a root bus, then we need to allocate
467 * an iommu table ourselves. The bus setup code should have setup
468 * the window sizes already.
469 */
470 if (!dev->bus->self) {
471 DBG(" --> first child, no bridge. Allocating iommu table.\n");
472 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
473 iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl);
474 PCI_DN(mydn)->iommu_table = iommu_init_table(tbl);
475
476 return;
477 }
478
479 /* If this device is further down the bus tree, search upwards until
480 * an already allocated iommu table is found and use that.
481 */
482
483 while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL)
484 dn = dn->parent;
485
486 if (dn && dn->data) {
487 PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table;
488 } else {
489 DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev));
490 }
491}
492
493static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
494{
495 int err = NOTIFY_OK;
496 struct device_node *np = node;
497 struct pci_dn *pci = np->data;
498
499 switch (action) {
500 case PSERIES_RECONFIG_REMOVE:
501 if (pci->iommu_table &&
502 get_property(np, "ibm,dma-window", NULL))
503 iommu_free_table(np);
504 break;
505 default:
506 err = NOTIFY_DONE;
507 break;
508 }
509 return err;
510}
511
512static struct notifier_block iommu_reconfig_nb = {
513 .notifier_call = iommu_reconfig_notifier,
514};
515
516static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
517{
518 struct device_node *pdn, *dn;
519 struct iommu_table *tbl;
520 int *dma_window = NULL;
521 struct pci_dn *pci;
522
523 DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
524
525 /* dev setup for LPAR is a little tricky, since the device tree might
526 * contain the dma-window properties per-device and not neccesarily
527 * for the bus. So we need to search upwards in the tree until we
528 * either hit a dma-window property, OR find a parent with a table
529 * already allocated.
530 */
531 dn = pci_device_to_OF_node(dev);
532
533 for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table;
534 pdn = pdn->parent) {
535 dma_window = (unsigned int *)
536 get_property(pdn, "ibm,dma-window", NULL);
537 if (dma_window)
538 break;
539 }
540
541 /* Check for parent == NULL so we don't try to setup the empty EADS
542 * slots on POWER4 machines.
543 */
544 if (dma_window == NULL || pdn->parent == NULL) {
545 DBG("No dma window for device, linking to parent\n");
546 PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table;
547 return;
548 } else {
549 DBG("Found DMA window, allocating table\n");
550 }
551
552 pci = pdn->data;
553 if (!pci->iommu_table) {
554 /* iommu_table_setparms_lpar needs bussubno. */
555 pci->bussubno = pci->phb->bus->number;
556
557 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
558 GFP_KERNEL);
559
560 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
561
562 pci->iommu_table = iommu_init_table(tbl);
563 }
564
565 if (pdn != dn)
566 PCI_DN(dn)->iommu_table = pci->iommu_table;
567}
568
569static void iommu_bus_setup_null(struct pci_bus *b) { }
570static void iommu_dev_setup_null(struct pci_dev *d) { }
571
572/* These are called very early. */
573void iommu_init_early_pSeries(void)
574{
575 if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) {
576 /* Direct I/O, IOMMU off */
577 ppc_md.iommu_dev_setup = iommu_dev_setup_null;
578 ppc_md.iommu_bus_setup = iommu_bus_setup_null;
579 pci_direct_iommu_init();
580
581 return;
582 }
583
584 if (systemcfg->platform & PLATFORM_LPAR) {
585 if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
586 ppc_md.tce_build = tce_buildmulti_pSeriesLP;
587 ppc_md.tce_free = tce_freemulti_pSeriesLP;
588 } else {
589 ppc_md.tce_build = tce_build_pSeriesLP;
590 ppc_md.tce_free = tce_free_pSeriesLP;
591 }
592 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
593 ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP;
594 } else {
595 ppc_md.tce_build = tce_build_pSeries;
596 ppc_md.tce_free = tce_free_pSeries;
597 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
598 ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
599 }
600
601
602 pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
603
604 pci_iommu_init();
605}
606
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
new file mode 100644
index 000000000000..268d8362dde7
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -0,0 +1,517 @@
1/*
2 * pSeries_lpar.c
3 * Copyright (C) 2001 Todd Inglett, IBM Corporation
4 *
5 * pSeries LPAR support.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#define DEBUG
23
24#include <linux/config.h>
25#include <linux/kernel.h>
26#include <linux/dma-mapping.h>
27#include <asm/processor.h>
28#include <asm/mmu.h>
29#include <asm/page.h>
30#include <asm/pgtable.h>
31#include <asm/machdep.h>
32#include <asm/abs_addr.h>
33#include <asm/mmu_context.h>
34#include <asm/ppcdebug.h>
35#include <asm/iommu.h>
36#include <asm/tlbflush.h>
37#include <asm/tlb.h>
38#include <asm/prom.h>
39#include <asm/abs_addr.h>
40#include <asm/cputable.h>
41#include <asm/plpar_wrappers.h>
42
43#ifdef DEBUG
44#define DBG(fmt...) udbg_printf(fmt)
45#else
46#define DBG(fmt...)
47#endif
48
49/* in pSeries_hvCall.S */
50EXPORT_SYMBOL(plpar_hcall);
51EXPORT_SYMBOL(plpar_hcall_4out);
52EXPORT_SYMBOL(plpar_hcall_norets);
53EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
54
55extern void pSeries_find_serial_port(void);
56
57
58int vtermno; /* virtual terminal# for udbg */
59
60#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
61static void udbg_hvsi_putc(unsigned char c)
62{
63 /* packet's seqno isn't used anyways */
64 uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c };
65 int rc;
66
67 if (c == '\n')
68 udbg_hvsi_putc('\r');
69
70 do {
71 rc = plpar_put_term_char(vtermno, sizeof(packet), packet);
72 } while (rc == H_Busy);
73}
74
75static long hvsi_udbg_buf_len;
76static uint8_t hvsi_udbg_buf[256];
77
78static int udbg_hvsi_getc_poll(void)
79{
80 unsigned char ch;
81 int rc, i;
82
83 if (hvsi_udbg_buf_len == 0) {
84 rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf);
85 if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) {
86 /* bad read or non-data packet */
87 hvsi_udbg_buf_len = 0;
88 } else {
89 /* remove the packet header */
90 for (i = 4; i < hvsi_udbg_buf_len; i++)
91 hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i];
92 hvsi_udbg_buf_len -= 4;
93 }
94 }
95
96 if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) {
97 /* no data ready */
98 hvsi_udbg_buf_len = 0;
99 return -1;
100 }
101
102 ch = hvsi_udbg_buf[0];
103 /* shift remaining data down */
104 for (i = 1; i < hvsi_udbg_buf_len; i++) {
105 hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i];
106 }
107 hvsi_udbg_buf_len--;
108
109 return ch;
110}
111
112static unsigned char udbg_hvsi_getc(void)
113{
114 int ch;
115 for (;;) {
116 ch = udbg_hvsi_getc_poll();
117 if (ch == -1) {
118 /* This shouldn't be needed...but... */
119 volatile unsigned long delay;
120 for (delay=0; delay < 2000000; delay++)
121 ;
122 } else {
123 return ch;
124 }
125 }
126}
127
128static void udbg_putcLP(unsigned char c)
129{
130 char buf[16];
131 unsigned long rc;
132
133 if (c == '\n')
134 udbg_putcLP('\r');
135
136 buf[0] = c;
137 do {
138 rc = plpar_put_term_char(vtermno, 1, buf);
139 } while(rc == H_Busy);
140}
141
142/* Buffered chars getc */
143static long inbuflen;
144static long inbuf[2]; /* must be 2 longs */
145
146static int udbg_getc_pollLP(void)
147{
148 /* The interface is tricky because it may return up to 16 chars.
149 * We save them statically for future calls to udbg_getc().
150 */
151 char ch, *buf = (char *)inbuf;
152 int i;
153 long rc;
154 if (inbuflen == 0) {
155 /* get some more chars. */
156 inbuflen = 0;
157 rc = plpar_get_term_char(vtermno, &inbuflen, buf);
158 if (rc != H_Success)
159 inbuflen = 0; /* otherwise inbuflen is garbage */
160 }
161 if (inbuflen <= 0 || inbuflen > 16) {
162 /* Catch error case as well as other oddities (corruption) */
163 inbuflen = 0;
164 return -1;
165 }
166 ch = buf[0];
167 for (i = 1; i < inbuflen; i++) /* shuffle them down. */
168 buf[i-1] = buf[i];
169 inbuflen--;
170 return ch;
171}
172
173static unsigned char udbg_getcLP(void)
174{
175 int ch;
176 for (;;) {
177 ch = udbg_getc_pollLP();
178 if (ch == -1) {
179 /* This shouldn't be needed...but... */
180 volatile unsigned long delay;
181 for (delay=0; delay < 2000000; delay++)
182 ;
183 } else {
184 return ch;
185 }
186 }
187}
188
189/* call this from early_init() for a working debug console on
190 * vterm capable LPAR machines
191 */
192void udbg_init_debug_lpar(void)
193{
194 vtermno = 0;
195 udbg_putc = udbg_putcLP;
196 udbg_getc = udbg_getcLP;
197 udbg_getc_poll = udbg_getc_pollLP;
198}
199
200/* returns 0 if couldn't find or use /chosen/stdout as console */
201int find_udbg_vterm(void)
202{
203 struct device_node *stdout_node;
204 u32 *termno;
205 char *name;
206 int found = 0;
207
208 /* find the boot console from /chosen/stdout */
209 if (!of_chosen)
210 return 0;
211 name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
212 if (name == NULL)
213 return 0;
214 stdout_node = of_find_node_by_path(name);
215 if (!stdout_node)
216 return 0;
217
218 /* now we have the stdout node; figure out what type of device it is. */
219 name = (char *)get_property(stdout_node, "name", NULL);
220 if (!name) {
221 printk(KERN_WARNING "stdout node missing 'name' property!\n");
222 goto out;
223 }
224
225 if (strncmp(name, "vty", 3) == 0) {
226 if (device_is_compatible(stdout_node, "hvterm1")) {
227 termno = (u32 *)get_property(stdout_node, "reg", NULL);
228 if (termno) {
229 vtermno = termno[0];
230 udbg_putc = udbg_putcLP;
231 udbg_getc = udbg_getcLP;
232 udbg_getc_poll = udbg_getc_pollLP;
233 found = 1;
234 }
235 } else if (device_is_compatible(stdout_node, "hvterm-protocol")) {
236 termno = (u32 *)get_property(stdout_node, "reg", NULL);
237 if (termno) {
238 vtermno = termno[0];
239 udbg_putc = udbg_hvsi_putc;
240 udbg_getc = udbg_hvsi_getc;
241 udbg_getc_poll = udbg_hvsi_getc_poll;
242 found = 1;
243 }
244 }
245 } else if (strncmp(name, "serial", 6)) {
246 /* XXX fix ISA serial console */
247 printk(KERN_WARNING "serial stdout on LPAR ('%s')! "
248 "can't print udbg messages\n",
249 stdout_node->full_name);
250 } else {
251 printk(KERN_WARNING "don't know how to print to stdout '%s'\n",
252 stdout_node->full_name);
253 }
254
255out:
256 of_node_put(stdout_node);
257 return found;
258}
259
260void vpa_init(int cpu)
261{
262 int hwcpu = get_hard_smp_processor_id(cpu);
263 unsigned long vpa = (unsigned long)&(paca[cpu].lppaca);
264 long ret;
265 unsigned long flags;
266
267 /* Register the Virtual Processor Area (VPA) */
268 flags = 1UL << (63 - 18);
269
270 if (cpu_has_feature(CPU_FTR_ALTIVEC))
271 paca[cpu].lppaca.vmxregs_in_use = 1;
272
273 ret = register_vpa(flags, hwcpu, __pa(vpa));
274
275 if (ret)
276 printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
277 "cpu %d (hw %d) of area %lx returns %ld\n",
278 cpu, hwcpu, __pa(vpa), ret);
279}
280
281long pSeries_lpar_hpte_insert(unsigned long hpte_group,
282 unsigned long va, unsigned long prpn,
283 unsigned long vflags, unsigned long rflags)
284{
285 unsigned long lpar_rc;
286 unsigned long flags;
287 unsigned long slot;
288 unsigned long hpte_v, hpte_r;
289 unsigned long dummy0, dummy1;
290
291 hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
292 if (vflags & HPTE_V_LARGE)
293 hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
294
295 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
296
297 /* Now fill in the actual HPTE */
298 /* Set CEC cookie to 0 */
299 /* Zero page = 0 */
300 /* I-cache Invalidate = 0 */
301 /* I-cache synchronize = 0 */
302 /* Exact = 0 */
303 flags = 0;
304
305 /* XXX why is this here? - Anton */
306 if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
307 hpte_r &= ~_PAGE_COHERENT;
308
309 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
310 hpte_r, &slot, &dummy0, &dummy1);
311
312 if (unlikely(lpar_rc == H_PTEG_Full))
313 return -1;
314
315 /*
316 * Since we try and ioremap PHBs we don't own, the pte insert
317 * will fail. However we must catch the failure in hash_page
318 * or we will loop forever, so return -2 in this case.
319 */
320 if (unlikely(lpar_rc != H_Success))
321 return -2;
322
323 /* Because of iSeries, we have to pass down the secondary
324 * bucket bit here as well
325 */
326 return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
327}
328
329static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
330
331static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
332{
333 unsigned long slot_offset;
334 unsigned long lpar_rc;
335 int i;
336 unsigned long dummy1, dummy2;
337
338 /* pick a random slot to start at */
339 slot_offset = mftb() & 0x7;
340
341 for (i = 0; i < HPTES_PER_GROUP; i++) {
342
343 /* don't remove a bolted entry */
344 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
345 (0x1UL << 4), &dummy1, &dummy2);
346
347 if (lpar_rc == H_Success)
348 return i;
349
350 BUG_ON(lpar_rc != H_Not_Found);
351
352 slot_offset++;
353 slot_offset &= 0x7;
354 }
355
356 return -1;
357}
358
359static void pSeries_lpar_hptab_clear(void)
360{
361 unsigned long size_bytes = 1UL << ppc64_pft_size;
362 unsigned long hpte_count = size_bytes >> 4;
363 unsigned long dummy1, dummy2;
364 int i;
365
366 /* TODO: Use bulk call */
367 for (i = 0; i < hpte_count; i++)
368 plpar_pte_remove(0, i, 0, &dummy1, &dummy2);
369}
370
371/*
372 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
373 * the low 3 bits of flags happen to line up. So no transform is needed.
374 * We can probably optimize here and assume the high bits of newpp are
375 * already zero. For now I am paranoid.
376 */
377static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
378 unsigned long va, int large, int local)
379{
380 unsigned long lpar_rc;
381 unsigned long flags = (newpp & 7) | H_AVPN;
382 unsigned long avpn = va >> 23;
383
384 if (large)
385 avpn &= ~0x1UL;
386
387 lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
388
389 if (lpar_rc == H_Not_Found)
390 return -1;
391
392 BUG_ON(lpar_rc != H_Success);
393
394 return 0;
395}
396
397static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
398{
399 unsigned long dword0;
400 unsigned long lpar_rc;
401 unsigned long dummy_word1;
402 unsigned long flags;
403
404 /* Read 1 pte at a time */
405 /* Do not need RPN to logical page translation */
406 /* No cross CEC PFT access */
407 flags = 0;
408
409 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
410
411 BUG_ON(lpar_rc != H_Success);
412
413 return dword0;
414}
415
416static long pSeries_lpar_hpte_find(unsigned long vpn)
417{
418 unsigned long hash;
419 unsigned long i, j;
420 long slot;
421 unsigned long hpte_v;
422
423 hash = hpt_hash(vpn, 0);
424
425 for (j = 0; j < 2; j++) {
426 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
427 for (i = 0; i < HPTES_PER_GROUP; i++) {
428 hpte_v = pSeries_lpar_hpte_getword0(slot);
429
430 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
431 && (hpte_v & HPTE_V_VALID)
432 && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
433 /* HPTE matches */
434 if (j)
435 slot = -slot;
436 return slot;
437 }
438 ++slot;
439 }
440 hash = ~hash;
441 }
442
443 return -1;
444}
445
446static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
447 unsigned long ea)
448{
449 unsigned long lpar_rc;
450 unsigned long vsid, va, vpn, flags;
451 long slot;
452
453 vsid = get_kernel_vsid(ea);
454 va = (vsid << 28) | (ea & 0x0fffffff);
455 vpn = va >> PAGE_SHIFT;
456
457 slot = pSeries_lpar_hpte_find(vpn);
458 BUG_ON(slot == -1);
459
460 flags = newpp & 7;
461 lpar_rc = plpar_pte_protect(flags, slot, 0);
462
463 BUG_ON(lpar_rc != H_Success);
464}
465
466static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
467 int large, int local)
468{
469 unsigned long avpn = va >> 23;
470 unsigned long lpar_rc;
471 unsigned long dummy1, dummy2;
472
473 if (large)
474 avpn &= ~0x1UL;
475
476 lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
477 &dummy2);
478
479 if (lpar_rc == H_Not_Found)
480 return;
481
482 BUG_ON(lpar_rc != H_Success);
483}
484
485/*
486 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
487 * lock.
488 */
489void pSeries_lpar_flush_hash_range(unsigned long number, int local)
490{
491 int i;
492 unsigned long flags = 0;
493 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
494 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
495
496 if (lock_tlbie)
497 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
498
499 for (i = 0; i < number; i++)
500 flush_hash_page(batch->vaddr[i], batch->pte[i], local);
501
502 if (lock_tlbie)
503 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
504}
505
506void hpte_init_lpar(void)
507{
508 ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
509 ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp;
510 ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
511 ppc_md.hpte_insert = pSeries_lpar_hpte_insert;
512 ppc_md.hpte_remove = pSeries_lpar_hpte_remove;
513 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
514 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
515
516 htab_finish_init();
517}
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
new file mode 100644
index 000000000000..18abfb1f4e24
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -0,0 +1,148 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * /dev/nvram driver for PPC64
10 *
11 * This perhaps should live in drivers/char
12 */
13
14
15#include <linux/types.h>
16#include <linux/errno.h>
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/spinlock.h>
20#include <asm/uaccess.h>
21#include <asm/nvram.h>
22#include <asm/rtas.h>
23#include <asm/prom.h>
24#include <asm/machdep.h>
25
26static unsigned int nvram_size;
27static int nvram_fetch, nvram_store;
28static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
29static DEFINE_SPINLOCK(nvram_lock);
30
31
32static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
33{
34 unsigned int i;
35 unsigned long len;
36 int done;
37 unsigned long flags;
38 char *p = buf;
39
40
41 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
42 return -ENODEV;
43
44 if (*index >= nvram_size)
45 return 0;
46
47 i = *index;
48 if (i + count > nvram_size)
49 count = nvram_size - i;
50
51 spin_lock_irqsave(&nvram_lock, flags);
52
53 for (; count != 0; count -= len) {
54 len = count;
55 if (len > NVRW_CNT)
56 len = NVRW_CNT;
57
58 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
59 len) != 0) || len != done) {
60 spin_unlock_irqrestore(&nvram_lock, flags);
61 return -EIO;
62 }
63
64 memcpy(p, nvram_buf, len);
65
66 p += len;
67 i += len;
68 }
69
70 spin_unlock_irqrestore(&nvram_lock, flags);
71
72 *index = i;
73 return p - buf;
74}
75
76static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
77{
78 unsigned int i;
79 unsigned long len;
80 int done;
81 unsigned long flags;
82 const char *p = buf;
83
84 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
85 return -ENODEV;
86
87 if (*index >= nvram_size)
88 return 0;
89
90 i = *index;
91 if (i + count > nvram_size)
92 count = nvram_size - i;
93
94 spin_lock_irqsave(&nvram_lock, flags);
95
96 for (; count != 0; count -= len) {
97 len = count;
98 if (len > NVRW_CNT)
99 len = NVRW_CNT;
100
101 memcpy(nvram_buf, p, len);
102
103 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
104 len) != 0) || len != done) {
105 spin_unlock_irqrestore(&nvram_lock, flags);
106 return -EIO;
107 }
108
109 p += len;
110 i += len;
111 }
112 spin_unlock_irqrestore(&nvram_lock, flags);
113
114 *index = i;
115 return p - buf;
116}
117
118static ssize_t pSeries_nvram_get_size(void)
119{
120 return nvram_size ? nvram_size : -ENODEV;
121}
122
123int __init pSeries_nvram_init(void)
124{
125 struct device_node *nvram;
126 unsigned int *nbytes_p, proplen;
127
128 nvram = of_find_node_by_type(NULL, "nvram");
129 if (nvram == NULL)
130 return -ENODEV;
131
132 nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
133 if (nbytes_p == NULL || proplen != sizeof(unsigned int))
134 return -EIO;
135
136 nvram_size = *nbytes_p;
137
138 nvram_fetch = rtas_token("nvram-fetch");
139 nvram_store = rtas_token("nvram-store");
140 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
141 of_node_put(nvram);
142
143 ppc_md.nvram_read = pSeries_nvram_read;
144 ppc_md.nvram_write = pSeries_nvram_write;
145 ppc_md.nvram_size = pSeries_nvram_get_size;
146
147 return 0;
148}
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
new file mode 100644
index 000000000000..2dd477eb1c53
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -0,0 +1,142 @@
1/*
2 * arch/ppc64/kernel/pSeries_pci.c
3 *
4 * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 *
7 * pSeries specific routines for PCI.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include <linux/init.h>
25#include <linux/ioport.h>
26#include <linux/kernel.h>
27#include <linux/pci.h>
28#include <linux/string.h>
29
30#include <asm/pci-bridge.h>
31#include <asm/prom.h>
32#include <asm/ppc-pci.h>
33
34static int __initdata s7a_workaround = -1;
35
36#if 0
37void pcibios_name_device(struct pci_dev *dev)
38{
39 struct device_node *dn;
40
41 /*
42 * Add IBM loc code (slot) as a prefix to the device names for service
43 */
44 dn = pci_device_to_OF_node(dev);
45 if (dn) {
46 char *loc_code = get_property(dn, "ibm,loc-code", 0);
47 if (loc_code) {
48 int loc_len = strlen(loc_code);
49 if (loc_len < sizeof(dev->dev.name)) {
50 memmove(dev->dev.name+loc_len+1, dev->dev.name,
51 sizeof(dev->dev.name)-loc_len-1);
52 memcpy(dev->dev.name, loc_code, loc_len);
53 dev->dev.name[loc_len] = ' ';
54 dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
55 }
56 }
57 }
58}
59DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
60#endif
61
62static void __init check_s7a(void)
63{
64 struct device_node *root;
65 char *model;
66
67 s7a_workaround = 0;
68 root = of_find_node_by_path("/");
69 if (root) {
70 model = get_property(root, "model", NULL);
71 if (model && !strcmp(model, "IBM,7013-S7A"))
72 s7a_workaround = 1;
73 of_node_put(root);
74 }
75}
76
77void __devinit pSeries_irq_bus_setup(struct pci_bus *bus)
78{
79 struct pci_dev *dev;
80
81 if (s7a_workaround < 0)
82 check_s7a();
83 list_for_each_entry(dev, &bus->devices, bus_list) {
84 pci_read_irq_line(dev);
85 if (s7a_workaround) {
86 if (dev->irq > 16) {
87 dev->irq -= 3;
88 pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
89 dev->irq);
90 }
91 }
92 }
93}
94
95static void __init pSeries_request_regions(void)
96{
97 if (!isa_io_base)
98 return;
99
100 request_region(0x20,0x20,"pic1");
101 request_region(0xa0,0x20,"pic2");
102 request_region(0x00,0x20,"dma1");
103 request_region(0x40,0x20,"timer");
104 request_region(0x80,0x10,"dma page reg");
105 request_region(0xc0,0x20,"dma2");
106}
107
108void __init pSeries_final_fixup(void)
109{
110 phbs_remap_io();
111 pSeries_request_regions();
112
113 pci_addr_cache_build();
114}
115
116/*
117 * Assume the winbond 82c105 is the IDE controller on a
118 * p610. We should probably be more careful in case
119 * someone tries to plug in a similar adapter.
120 */
121static void fixup_winbond_82c105(struct pci_dev* dev)
122{
123 int i;
124 unsigned int reg;
125
126 if (!(systemcfg->platform & PLATFORM_PSERIES))
127 return;
128
129 printk("Using INTC for W82c105 IDE controller.\n");
130 pci_read_config_dword(dev, 0x40, &reg);
131 /* Enable LEGIRQ to use INTC instead of ISA interrupts */
132 pci_write_config_dword(dev, 0x40, reg | (1<<11));
133
134 for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) {
135 /* zap the 2nd function of the winbond chip */
136 if (dev->resource[i].flags & IORESOURCE_IO
137 && dev->bus->number == 0 && dev->devfn == 0x81)
138 dev->resource[i].flags &= ~IORESOURCE_IO;
139 }
140}
141DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
142 fixup_winbond_82c105);
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
new file mode 100644
index 000000000000..58c61219d08e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -0,0 +1,426 @@
1/*
2 * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI
3 * Hotplug and Dynamic Logical Partitioning on RPA platforms).
4 *
5 * Copyright (C) 2005 Nathan Lynch
6 * Copyright (C) 2005 IBM Corporation
7 *
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version
11 * 2 as published by the Free Software Foundation.
12 */
13
14#include <linux/kernel.h>
15#include <linux/kref.h>
16#include <linux/notifier.h>
17#include <linux/proc_fs.h>
18
19#include <asm/prom.h>
20#include <asm/pSeries_reconfig.h>
21#include <asm/uaccess.h>
22
23
24
25/*
26 * Routines for "runtime" addition and removal of device tree nodes.
27 */
28#ifdef CONFIG_PROC_DEVICETREE
29/*
30 * Add a node to /proc/device-tree.
31 */
32static void add_node_proc_entries(struct device_node *np)
33{
34 struct proc_dir_entry *ent;
35
36 ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde);
37 if (ent)
38 proc_device_tree_add_node(np, ent);
39}
40
41static void remove_node_proc_entries(struct device_node *np)
42{
43 struct property *pp = np->properties;
44 struct device_node *parent = np->parent;
45
46 while (pp) {
47 remove_proc_entry(pp->name, np->pde);
48 pp = pp->next;
49 }
50 if (np->pde)
51 remove_proc_entry(np->pde->name, parent->pde);
52}
53#else /* !CONFIG_PROC_DEVICETREE */
54static void add_node_proc_entries(struct device_node *np)
55{
56 return;
57}
58
59static void remove_node_proc_entries(struct device_node *np)
60{
61 return;
62}
63#endif /* CONFIG_PROC_DEVICETREE */
64
65/**
66 * derive_parent - basically like dirname(1)
67 * @path: the full_name of a node to be added to the tree
68 *
69 * Returns the node which should be the parent of the node
70 * described by path. E.g., for path = "/foo/bar", returns
71 * the node with full_name = "/foo".
72 */
73static struct device_node *derive_parent(const char *path)
74{
75 struct device_node *parent = NULL;
76 char *parent_path = "/";
77 size_t parent_path_len = strrchr(path, '/') - path + 1;
78
79 /* reject if path is "/" */
80 if (!strcmp(path, "/"))
81 return ERR_PTR(-EINVAL);
82
83 if (strrchr(path, '/') != path) {
84 parent_path = kmalloc(parent_path_len, GFP_KERNEL);
85 if (!parent_path)
86 return ERR_PTR(-ENOMEM);
87 strlcpy(parent_path, path, parent_path_len);
88 }
89 parent = of_find_node_by_path(parent_path);
90 if (!parent)
91 return ERR_PTR(-EINVAL);
92 if (strcmp(parent_path, "/"))
93 kfree(parent_path);
94 return parent;
95}
96
97static struct notifier_block *pSeries_reconfig_chain;
98
99int pSeries_reconfig_notifier_register(struct notifier_block *nb)
100{
101 return notifier_chain_register(&pSeries_reconfig_chain, nb);
102}
103
104void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
105{
106 notifier_chain_unregister(&pSeries_reconfig_chain, nb);
107}
108
109static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
110{
111 struct device_node *np;
112 int err = -ENOMEM;
113
114 np = kzalloc(sizeof(*np), GFP_KERNEL);
115 if (!np)
116 goto out_err;
117
118 np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL);
119 if (!np->full_name)
120 goto out_err;
121
122 strcpy(np->full_name, path);
123
124 np->properties = proplist;
125 OF_MARK_DYNAMIC(np);
126 kref_init(&np->kref);
127
128 np->parent = derive_parent(path);
129 if (IS_ERR(np->parent)) {
130 err = PTR_ERR(np->parent);
131 goto out_err;
132 }
133
134 err = notifier_call_chain(&pSeries_reconfig_chain,
135 PSERIES_RECONFIG_ADD, np);
136 if (err == NOTIFY_BAD) {
137 printk(KERN_ERR "Failed to add device node %s\n", path);
138 err = -ENOMEM; /* For now, safe to assume kmalloc failure */
139 goto out_err;
140 }
141
142 of_attach_node(np);
143
144 add_node_proc_entries(np);
145
146 of_node_put(np->parent);
147
148 return 0;
149
150out_err:
151 if (np) {
152 of_node_put(np->parent);
153 kfree(np->full_name);
154 kfree(np);
155 }
156 return err;
157}
158
159static int pSeries_reconfig_remove_node(struct device_node *np)
160{
161 struct device_node *parent, *child;
162
163 parent = of_get_parent(np);
164 if (!parent)
165 return -EINVAL;
166
167 if ((child = of_get_next_child(np, NULL))) {
168 of_node_put(child);
169 return -EBUSY;
170 }
171
172 remove_node_proc_entries(np);
173
174 notifier_call_chain(&pSeries_reconfig_chain,
175 PSERIES_RECONFIG_REMOVE, np);
176 of_detach_node(np);
177
178 of_node_put(parent);
179 of_node_put(np); /* Must decrement the refcount */
180 return 0;
181}
182
183/*
184 * /proc/ppc64/ofdt - yucky binary interface for adding and removing
185 * OF device nodes. Should be deprecated as soon as we get an
186 * in-kernel wrapper for the RTAS ibm,configure-connector call.
187 */
188
189static void release_prop_list(const struct property *prop)
190{
191 struct property *next;
192 for (; prop; prop = next) {
193 next = prop->next;
194 kfree(prop->name);
195 kfree(prop->value);
196 kfree(prop);
197 }
198
199}
200
201/**
202 * parse_next_property - process the next property from raw input buffer
203 * @buf: input buffer, must be nul-terminated
204 * @end: end of the input buffer + 1, for validation
205 * @name: return value; set to property name in buf
206 * @length: return value; set to length of value
207 * @value: return value; set to the property value in buf
208 *
209 * Note that the caller must make copies of the name and value returned,
210 * this function does no allocation or copying of the data. Return value
211 * is set to the next name in buf, or NULL on error.
212 */
213static char * parse_next_property(char *buf, char *end, char **name, int *length,
214 unsigned char **value)
215{
216 char *tmp;
217
218 *name = buf;
219
220 tmp = strchr(buf, ' ');
221 if (!tmp) {
222 printk(KERN_ERR "property parse failed in %s at line %d\n",
223 __FUNCTION__, __LINE__);
224 return NULL;
225 }
226 *tmp = '\0';
227
228 if (++tmp >= end) {
229 printk(KERN_ERR "property parse failed in %s at line %d\n",
230 __FUNCTION__, __LINE__);
231 return NULL;
232 }
233
234 /* now we're on the length */
235 *length = -1;
236 *length = simple_strtoul(tmp, &tmp, 10);
237 if (*length == -1) {
238 printk(KERN_ERR "property parse failed in %s at line %d\n",
239 __FUNCTION__, __LINE__);
240 return NULL;
241 }
242 if (*tmp != ' ' || ++tmp >= end) {
243 printk(KERN_ERR "property parse failed in %s at line %d\n",
244 __FUNCTION__, __LINE__);
245 return NULL;
246 }
247
248 /* now we're on the value */
249 *value = tmp;
250 tmp += *length;
251 if (tmp > end) {
252 printk(KERN_ERR "property parse failed in %s at line %d\n",
253 __FUNCTION__, __LINE__);
254 return NULL;
255 }
256 else if (tmp < end && *tmp != ' ' && *tmp != '\0') {
257 printk(KERN_ERR "property parse failed in %s at line %d\n",
258 __FUNCTION__, __LINE__);
259 return NULL;
260 }
261 tmp++;
262
263 /* and now we should be on the next name, or the end */
264 return tmp;
265}
266
267static struct property *new_property(const char *name, const int length,
268 const unsigned char *value, struct property *last)
269{
270 struct property *new = kmalloc(sizeof(*new), GFP_KERNEL);
271
272 if (!new)
273 return NULL;
274 memset(new, 0, sizeof(*new));
275
276 if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL)))
277 goto cleanup;
278 if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
279 goto cleanup;
280
281 strcpy(new->name, name);
282 memcpy(new->value, value, length);
283 *(((char *)new->value) + length) = 0;
284 new->length = length;
285 new->next = last;
286 return new;
287
288cleanup:
289 if (new->name)
290 kfree(new->name);
291 if (new->value)
292 kfree(new->value);
293 kfree(new);
294 return NULL;
295}
296
297static int do_add_node(char *buf, size_t bufsize)
298{
299 char *path, *end, *name;
300 struct device_node *np;
301 struct property *prop = NULL;
302 unsigned char* value;
303 int length, rv = 0;
304
305 end = buf + bufsize;
306 path = buf;
307 buf = strchr(buf, ' ');
308 if (!buf)
309 return -EINVAL;
310 *buf = '\0';
311 buf++;
312
313 if ((np = of_find_node_by_path(path))) {
314 of_node_put(np);
315 return -EINVAL;
316 }
317
318 /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */
319 while (buf < end &&
320 (buf = parse_next_property(buf, end, &name, &length, &value))) {
321 struct property *last = prop;
322
323 prop = new_property(name, length, value, last);
324 if (!prop) {
325 rv = -ENOMEM;
326 prop = last;
327 goto out;
328 }
329 }
330 if (!buf) {
331 rv = -EINVAL;
332 goto out;
333 }
334
335 rv = pSeries_reconfig_add_node(path, prop);
336
337out:
338 if (rv)
339 release_prop_list(prop);
340 return rv;
341}
342
343static int do_remove_node(char *buf)
344{
345 struct device_node *node;
346 int rv = -ENODEV;
347
348 if ((node = of_find_node_by_path(buf)))
349 rv = pSeries_reconfig_remove_node(node);
350
351 of_node_put(node);
352 return rv;
353}
354
355/**
356 * ofdt_write - perform operations on the Open Firmware device tree
357 *
358 * @file: not used
359 * @buf: command and arguments
360 * @count: size of the command buffer
361 * @off: not used
362 *
363 * Operations supported at this time are addition and removal of
364 * whole nodes along with their properties. Operations on individual
365 * properties are not implemented (yet).
366 */
367static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
368 loff_t *off)
369{
370 int rv = 0;
371 char *kbuf;
372 char *tmp;
373
374 if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) {
375 rv = -ENOMEM;
376 goto out;
377 }
378 if (copy_from_user(kbuf, buf, count)) {
379 rv = -EFAULT;
380 goto out;
381 }
382
383 kbuf[count] = '\0';
384
385 tmp = strchr(kbuf, ' ');
386 if (!tmp) {
387 rv = -EINVAL;
388 goto out;
389 }
390 *tmp = '\0';
391 tmp++;
392
393 if (!strcmp(kbuf, "add_node"))
394 rv = do_add_node(tmp, count - (tmp - kbuf));
395 else if (!strcmp(kbuf, "remove_node"))
396 rv = do_remove_node(tmp);
397 else
398 rv = -EINVAL;
399out:
400 kfree(kbuf);
401 return rv ? rv : count;
402}
403
404static struct file_operations ofdt_fops = {
405 .write = ofdt_write
406};
407
408/* create /proc/ppc64/ofdt write-only by root */
409static int proc_ppc64_create_ofdt(void)
410{
411 struct proc_dir_entry *ent;
412
413 if (!(systemcfg->platform & PLATFORM_PSERIES))
414 return 0;
415
416 ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
417 if (ent) {
418 ent->nlink = 1;
419 ent->data = NULL;
420 ent->size = 0;
421 ent->proc_fops = &ofdt_fops;
422 }
423
424 return 0;
425}
426__initcall(proc_ppc64_create_ofdt);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
new file mode 100644
index 000000000000..eb25ee2eead8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -0,0 +1,622 @@
1/*
2 * linux/arch/ppc/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Adapted from 'alpha' version by Gary Thomas
6 * Modified by Cort Dougan (cort@cs.nmt.edu)
7 * Modified by PPC64 Team, IBM Corp
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15/*
16 * bootup setup stuff..
17 */
18
19#undef DEBUG
20
21#include <linux/config.h>
22#include <linux/cpu.h>
23#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/stddef.h>
28#include <linux/unistd.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/a.out.h>
32#include <linux/tty.h>
33#include <linux/major.h>
34#include <linux/interrupt.h>
35#include <linux/reboot.h>
36#include <linux/init.h>
37#include <linux/ioport.h>
38#include <linux/console.h>
39#include <linux/pci.h>
40#include <linux/utsname.h>
41#include <linux/adb.h>
42#include <linux/module.h>
43#include <linux/delay.h>
44#include <linux/irq.h>
45#include <linux/seq_file.h>
46#include <linux/root_dev.h>
47
48#include <asm/mmu.h>
49#include <asm/processor.h>
50#include <asm/io.h>
51#include <asm/pgtable.h>
52#include <asm/prom.h>
53#include <asm/rtas.h>
54#include <asm/pci-bridge.h>
55#include <asm/iommu.h>
56#include <asm/dma.h>
57#include <asm/machdep.h>
58#include <asm/irq.h>
59#include <asm/time.h>
60#include <asm/nvram.h>
61#include <asm/plpar_wrappers.h>
62#include <asm/xics.h>
63#include <asm/firmware.h>
64#include <asm/pmc.h>
65#include <asm/mpic.h>
66#include <asm/ppc-pci.h>
67#include <asm/i8259.h>
68#include <asm/udbg.h>
69
70#ifdef DEBUG
71#define DBG(fmt...) udbg_printf(fmt)
72#else
73#define DBG(fmt...)
74#endif
75
76extern void find_udbg_vterm(void);
77extern void system_reset_fwnmi(void); /* from head.S */
78extern void machine_check_fwnmi(void); /* from head.S */
79extern void generic_find_legacy_serial_ports(u64 *physport,
80 unsigned int *default_speed);
81
82int fwnmi_active; /* TRUE if an FWNMI handler is present */
83
84extern void pSeries_system_reset_exception(struct pt_regs *regs);
85extern int pSeries_machine_check_exception(struct pt_regs *regs);
86
87static int pseries_shared_idle(void);
88static int pseries_dedicated_idle(void);
89
90static volatile void __iomem * chrp_int_ack_special;
91struct mpic *pSeries_mpic;
92
93void pSeries_get_cpuinfo(struct seq_file *m)
94{
95 struct device_node *root;
96 const char *model = "";
97
98 root = of_find_node_by_path("/");
99 if (root)
100 model = get_property(root, "model", NULL);
101 seq_printf(m, "machine\t\t: CHRP %s\n", model);
102 of_node_put(root);
103}
104
105/* Initialize firmware assisted non-maskable interrupts if
106 * the firmware supports this feature.
107 *
108 */
109static void __init fwnmi_init(void)
110{
111 int ret;
112 int ibm_nmi_register = rtas_token("ibm,nmi-register");
113 if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
114 return;
115 ret = rtas_call(ibm_nmi_register, 2, 1, NULL,
116 __pa((unsigned long)system_reset_fwnmi),
117 __pa((unsigned long)machine_check_fwnmi));
118 if (ret == 0)
119 fwnmi_active = 1;
120}
121
122static int pSeries_irq_cascade(struct pt_regs *regs, void *data)
123{
124 if (chrp_int_ack_special)
125 return readb(chrp_int_ack_special);
126 else
127 return i8259_irq(regs);
128}
129
130static void __init pSeries_init_mpic(void)
131{
132 unsigned int *addrp;
133 struct device_node *np;
134 int i;
135
136 /* All ISUs are setup, complete initialization */
137 mpic_init(pSeries_mpic);
138
139 /* Check what kind of cascade ACK we have */
140 if (!(np = of_find_node_by_name(NULL, "pci"))
141 || !(addrp = (unsigned int *)
142 get_property(np, "8259-interrupt-acknowledge", NULL)))
143 printk(KERN_ERR "Cannot find pci to get ack address\n");
144 else
145 chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1);
146 of_node_put(np);
147
148 /* Setup the legacy interrupts & controller */
149 for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
150 irq_desc[i].handler = &i8259_pic;
151 i8259_init(0);
152
153 /* Hook cascade to mpic */
154 mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL);
155}
156
157static void __init pSeries_setup_mpic(void)
158{
159 unsigned int *opprop;
160 unsigned long openpic_addr = 0;
161 unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS];
162 struct device_node *root;
163 int irq_count;
164
165 /* Find the Open PIC if present */
166 root = of_find_node_by_path("/");
167 opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL);
168 if (opprop != 0) {
169 int n = prom_n_addr_cells(root);
170
171 for (openpic_addr = 0; n > 0; --n)
172 openpic_addr = (openpic_addr << 32) + *opprop++;
173 printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
174 }
175 of_node_put(root);
176
177 BUG_ON(openpic_addr == 0);
178
179 /* Get the sense values from OF */
180 prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS);
181
182 /* Setup the openpic driver */
183 irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
184 pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY,
185 16, 16, irq_count, /* isu size, irq offset, irq count */
186 NR_IRQS - 4, /* ipi offset */
187 senses, irq_count, /* sense & sense size */
188 " MPIC ");
189}
190
191static void pseries_lpar_enable_pmcs(void)
192{
193 unsigned long set, reset;
194
195 power4_enable_pmcs();
196
197 set = 1UL << 63;
198 reset = 0;
199 plpar_hcall_norets(H_PERFMON, set, reset);
200
201 /* instruct hypervisor to maintain PMCs */
202 if (firmware_has_feature(FW_FEATURE_SPLPAR))
203 get_paca()->lppaca.pmcregs_in_use = 1;
204}
205
206static void __init pSeries_setup_arch(void)
207{
208 /* Fixup ppc_md depending on the type of interrupt controller */
209 if (ppc64_interrupt_controller == IC_OPEN_PIC) {
210 ppc_md.init_IRQ = pSeries_init_mpic;
211 ppc_md.get_irq = mpic_get_irq;
212 ppc_md.cpu_irq_down = mpic_teardown_this_cpu;
213 /* Allocate the mpic now, so that find_and_init_phbs() can
214 * fill the ISUs */
215 pSeries_setup_mpic();
216 } else {
217 ppc_md.init_IRQ = xics_init_IRQ;
218 ppc_md.get_irq = xics_get_irq;
219 ppc_md.cpu_irq_down = xics_teardown_cpu;
220 }
221
222#ifdef CONFIG_SMP
223 smp_init_pSeries();
224#endif
225 /* openpic global configuration register (64-bit format). */
226 /* openpic Interrupt Source Unit pointer (64-bit format). */
227 /* python0 facility area (mmio) (64-bit format) REAL address. */
228
229 /* init to some ~sane value until calibrate_delay() runs */
230 loops_per_jiffy = 50000000;
231
232 if (ROOT_DEV == 0) {
233 printk("No ramdisk, default root is /dev/sda2\n");
234 ROOT_DEV = Root_SDA2;
235 }
236
237 fwnmi_init();
238
239 /* Find and initialize PCI host bridges */
240 init_pci_config_tokens();
241 find_and_init_phbs();
242 eeh_init();
243
244#ifdef CONFIG_DUMMY_CONSOLE
245 conswitchp = &dummy_con;
246#endif
247
248 pSeries_nvram_init();
249
250 /* Choose an idle loop */
251 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
252 vpa_init(boot_cpuid);
253 if (get_paca()->lppaca.shared_proc) {
254 printk(KERN_INFO "Using shared processor idle loop\n");
255 ppc_md.idle_loop = pseries_shared_idle;
256 } else {
257 printk(KERN_INFO "Using dedicated idle loop\n");
258 ppc_md.idle_loop = pseries_dedicated_idle;
259 }
260 } else {
261 printk(KERN_INFO "Using default idle loop\n");
262 ppc_md.idle_loop = default_idle;
263 }
264
265 if (systemcfg->platform & PLATFORM_LPAR)
266 ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
267 else
268 ppc_md.enable_pmcs = power4_enable_pmcs;
269}
270
271static int __init pSeries_init_panel(void)
272{
273 /* Manually leave the kernel version on the panel. */
274 ppc_md.progress("Linux ppc64\n", 0);
275 ppc_md.progress(system_utsname.version, 0);
276
277 return 0;
278}
279arch_initcall(pSeries_init_panel);
280
281
282/* Build up the ppc64_firmware_features bitmask field
283 * using contents of device-tree/ibm,hypertas-functions.
284 * Ultimately this functionality may be moved into prom.c prom_init().
285 */
286static void __init fw_feature_init(void)
287{
288 struct device_node * dn;
289 char * hypertas;
290 unsigned int len;
291
292 DBG(" -> fw_feature_init()\n");
293
294 ppc64_firmware_features = 0;
295 dn = of_find_node_by_path("/rtas");
296 if (dn == NULL) {
297 printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
298 goto no_rtas;
299 }
300
301 hypertas = get_property(dn, "ibm,hypertas-functions", &len);
302 if (hypertas) {
303 while (len > 0){
304 int i, hypertas_len;
305 /* check value against table of strings */
306 for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) {
307 if ((firmware_features_table[i].name) &&
308 (strcmp(firmware_features_table[i].name,hypertas))==0) {
309 /* we have a match */
310 ppc64_firmware_features |=
311 (firmware_features_table[i].val);
312 break;
313 }
314 }
315 hypertas_len = strlen(hypertas);
316 len -= hypertas_len +1;
317 hypertas+= hypertas_len +1;
318 }
319 }
320
321 of_node_put(dn);
322 no_rtas:
323 printk(KERN_INFO "firmware_features = 0x%lx\n",
324 ppc64_firmware_features);
325
326 DBG(" <- fw_feature_init()\n");
327}
328
329
330static void __init pSeries_discover_pic(void)
331{
332 struct device_node *np;
333 char *typep;
334
335 /*
336 * Setup interrupt mapping options that are needed for finish_device_tree
337 * to properly parse the OF interrupt tree & do the virtual irq mapping
338 */
339 __irq_offset_value = NUM_ISA_INTERRUPTS;
340 ppc64_interrupt_controller = IC_INVALID;
341 for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) {
342 typep = (char *)get_property(np, "compatible", NULL);
343 if (strstr(typep, "open-pic"))
344 ppc64_interrupt_controller = IC_OPEN_PIC;
345 else if (strstr(typep, "ppc-xicp"))
346 ppc64_interrupt_controller = IC_PPC_XIC;
347 else
348 printk("pSeries_discover_pic: failed to recognize"
349 " interrupt-controller\n");
350 break;
351 }
352}
353
354static void pSeries_mach_cpu_die(void)
355{
356 local_irq_disable();
357 idle_task_exit();
358 /* Some hardware requires clearing the CPPR, while other hardware does not
359 * it is safe either way
360 */
361 pSeriesLP_cppr_info(0, 0);
362 rtas_stop_self();
363 /* Should never get here... */
364 BUG();
365 for(;;);
366}
367
368
369/*
370 * Early initialization. Relocation is on but do not reference unbolted pages
371 */
372static void __init pSeries_init_early(void)
373{
374 void *comport;
375 int iommu_off = 0;
376 unsigned int default_speed;
377 u64 physport;
378
379 DBG(" -> pSeries_init_early()\n");
380
381 fw_feature_init();
382
383 if (systemcfg->platform & PLATFORM_LPAR)
384 hpte_init_lpar();
385 else {
386 hpte_init_native();
387 iommu_off = (of_chosen &&
388 get_property(of_chosen, "linux,iommu-off", NULL));
389 }
390
391 generic_find_legacy_serial_ports(&physport, &default_speed);
392
393 if (systemcfg->platform & PLATFORM_LPAR)
394 find_udbg_vterm();
395 else if (physport) {
396 /* Map the uart for udbg. */
397 comport = (void *)ioremap(physport, 16);
398 udbg_init_uart(comport, default_speed);
399
400 DBG("Hello World !\n");
401 }
402
403
404 iommu_init_early_pSeries();
405
406 pSeries_discover_pic();
407
408 DBG(" <- pSeries_init_early()\n");
409}
410
411
412static int pSeries_check_legacy_ioport(unsigned int baseport)
413{
414 struct device_node *np;
415
416#define I8042_DATA_REG 0x60
417#define FDC_BASE 0x3f0
418
419
420 switch(baseport) {
421 case I8042_DATA_REG:
422 np = of_find_node_by_type(NULL, "8042");
423 if (np == NULL)
424 return -ENODEV;
425 of_node_put(np);
426 break;
427 case FDC_BASE:
428 np = of_find_node_by_type(NULL, "fdc");
429 if (np == NULL)
430 return -ENODEV;
431 of_node_put(np);
432 break;
433 }
434 return 0;
435}
436
437/*
438 * Called very early, MMU is off, device-tree isn't unflattened
439 */
440extern struct machdep_calls pSeries_md;
441
442static int __init pSeries_probe(int platform)
443{
444 if (platform != PLATFORM_PSERIES &&
445 platform != PLATFORM_PSERIES_LPAR)
446 return 0;
447
448 /* if we have some ppc_md fixups for LPAR to do, do
449 * it here ...
450 */
451
452 return 1;
453}
454
455DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
456
457static inline void dedicated_idle_sleep(unsigned int cpu)
458{
459 struct paca_struct *ppaca = &paca[cpu ^ 1];
460
461 /* Only sleep if the other thread is not idle */
462 if (!(ppaca->lppaca.idle)) {
463 local_irq_disable();
464
465 /*
466 * We are about to sleep the thread and so wont be polling any
467 * more.
468 */
469 clear_thread_flag(TIF_POLLING_NRFLAG);
470
471 /*
472 * SMT dynamic mode. Cede will result in this thread going
473 * dormant, if the partner thread is still doing work. Thread
474 * wakes up if partner goes idle, an interrupt is presented, or
475 * a prod occurs. Returning from the cede enables external
476 * interrupts.
477 */
478 if (!need_resched())
479 cede_processor();
480 else
481 local_irq_enable();
482 } else {
483 /*
484 * Give the HV an opportunity at the processor, since we are
485 * not doing any work.
486 */
487 poll_pending();
488 }
489}
490
491static int pseries_dedicated_idle(void)
492{
493 long oldval;
494 struct paca_struct *lpaca = get_paca();
495 unsigned int cpu = smp_processor_id();
496 unsigned long start_snooze;
497 unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
498
499 while (1) {
500 /*
501 * Indicate to the HV that we are idle. Now would be
502 * a good time to find other work to dispatch.
503 */
504 lpaca->lppaca.idle = 1;
505
506 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
507 if (!oldval) {
508 set_thread_flag(TIF_POLLING_NRFLAG);
509
510 start_snooze = __get_tb() +
511 *smt_snooze_delay * tb_ticks_per_usec;
512
513 while (!need_resched() && !cpu_is_offline(cpu)) {
514 ppc64_runlatch_off();
515
516 /*
517 * Go into low thread priority and possibly
518 * low power mode.
519 */
520 HMT_low();
521 HMT_very_low();
522
523 if (*smt_snooze_delay != 0 &&
524 __get_tb() > start_snooze) {
525 HMT_medium();
526 dedicated_idle_sleep(cpu);
527 }
528
529 }
530
531 HMT_medium();
532 clear_thread_flag(TIF_POLLING_NRFLAG);
533 } else {
534 set_need_resched();
535 }
536
537 lpaca->lppaca.idle = 0;
538 ppc64_runlatch_on();
539
540 schedule();
541
542 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
543 cpu_die();
544 }
545}
546
547static int pseries_shared_idle(void)
548{
549 struct paca_struct *lpaca = get_paca();
550 unsigned int cpu = smp_processor_id();
551
552 while (1) {
553 /*
554 * Indicate to the HV that we are idle. Now would be
555 * a good time to find other work to dispatch.
556 */
557 lpaca->lppaca.idle = 1;
558
559 while (!need_resched() && !cpu_is_offline(cpu)) {
560 local_irq_disable();
561 ppc64_runlatch_off();
562
563 /*
564 * Yield the processor to the hypervisor. We return if
565 * an external interrupt occurs (which are driven prior
566 * to returning here) or if a prod occurs from another
567 * processor. When returning here, external interrupts
568 * are enabled.
569 *
570 * Check need_resched() again with interrupts disabled
571 * to avoid a race.
572 */
573 if (!need_resched())
574 cede_processor();
575 else
576 local_irq_enable();
577
578 HMT_medium();
579 }
580
581 lpaca->lppaca.idle = 0;
582 ppc64_runlatch_on();
583
584 schedule();
585
586 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
587 cpu_die();
588 }
589
590 return 0;
591}
592
593static int pSeries_pci_probe_mode(struct pci_bus *bus)
594{
595 if (systemcfg->platform & PLATFORM_LPAR)
596 return PCI_PROBE_DEVTREE;
597 return PCI_PROBE_NORMAL;
598}
599
600struct machdep_calls __initdata pSeries_md = {
601 .probe = pSeries_probe,
602 .setup_arch = pSeries_setup_arch,
603 .init_early = pSeries_init_early,
604 .get_cpuinfo = pSeries_get_cpuinfo,
605 .log_error = pSeries_log_error,
606 .pcibios_fixup = pSeries_final_fixup,
607 .pci_probe_mode = pSeries_pci_probe_mode,
608 .irq_bus_setup = pSeries_irq_bus_setup,
609 .restart = rtas_restart,
610 .power_off = rtas_power_off,
611 .halt = rtas_halt,
612 .panic = rtas_os_term,
613 .cpu_die = pSeries_mach_cpu_die,
614 .get_boot_time = rtas_get_boot_time,
615 .get_rtc_time = rtas_get_rtc_time,
616 .set_rtc_time = rtas_set_rtc_time,
617 .calibrate_decr = generic_calibrate_decr,
618 .progress = rtas_progress,
619 .check_legacy_ioport = pSeries_check_legacy_ioport,
620 .system_reset_exception = pSeries_system_reset_exception,
621 .machine_check_exception = pSeries_machine_check_exception,
622};
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
new file mode 100644
index 000000000000..ae1bd270f308
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -0,0 +1,471 @@
1/*
2 * SMP support for pSeries machines.
3 *
4 * Dave Engebretsen, Peter Bergner, and
5 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
6 *
7 * Plus various changes from other IBM teams...
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#undef DEBUG
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/interrupt.h>
23#include <linux/delay.h>
24#include <linux/init.h>
25#include <linux/spinlock.h>
26#include <linux/cache.h>
27#include <linux/err.h>
28#include <linux/sysdev.h>
29#include <linux/cpu.h>
30
31#include <asm/ptrace.h>
32#include <asm/atomic.h>
33#include <asm/irq.h>
34#include <asm/page.h>
35#include <asm/pgtable.h>
36#include <asm/io.h>
37#include <asm/prom.h>
38#include <asm/smp.h>
39#include <asm/paca.h>
40#include <asm/time.h>
41#include <asm/machdep.h>
42#include <asm/xics.h>
43#include <asm/cputable.h>
44#include <asm/firmware.h>
45#include <asm/system.h>
46#include <asm/rtas.h>
47#include <asm/plpar_wrappers.h>
48#include <asm/pSeries_reconfig.h>
49#include <asm/mpic.h>
50
51#ifdef DEBUG
52#define DBG(fmt...) udbg_printf(fmt)
53#else
54#define DBG(fmt...)
55#endif
56
57/*
58 * The primary thread of each non-boot processor is recorded here before
59 * smp init.
60 */
61static cpumask_t of_spin_map;
62
63extern void pSeries_secondary_smp_init(unsigned long);
64
65#ifdef CONFIG_HOTPLUG_CPU
66
67/* Get state of physical CPU.
68 * Return codes:
69 * 0 - The processor is in the RTAS stopped state
70 * 1 - stop-self is in progress
71 * 2 - The processor is not in the RTAS stopped state
72 * -1 - Hardware Error
73 * -2 - Hardware Busy, Try again later.
74 */
75static int query_cpu_stopped(unsigned int pcpu)
76{
77 int cpu_status;
78 int status, qcss_tok;
79
80 qcss_tok = rtas_token("query-cpu-stopped-state");
81 if (qcss_tok == RTAS_UNKNOWN_SERVICE)
82 return -1;
83 status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
84 if (status != 0) {
85 printk(KERN_ERR
86 "RTAS query-cpu-stopped-state failed: %i\n", status);
87 return status;
88 }
89
90 return cpu_status;
91}
92
93int pSeries_cpu_disable(void)
94{
95 int cpu = smp_processor_id();
96
97 cpu_clear(cpu, cpu_online_map);
98 systemcfg->processorCount--;
99
100 /*fix boot_cpuid here*/
101 if (cpu == boot_cpuid)
102 boot_cpuid = any_online_cpu(cpu_online_map);
103
104 /* FIXME: abstract this to not be platform specific later on */
105 xics_migrate_irqs_away();
106 return 0;
107}
108
109void pSeries_cpu_die(unsigned int cpu)
110{
111 int tries;
112 int cpu_status;
113 unsigned int pcpu = get_hard_smp_processor_id(cpu);
114
115 for (tries = 0; tries < 25; tries++) {
116 cpu_status = query_cpu_stopped(pcpu);
117 if (cpu_status == 0 || cpu_status == -1)
118 break;
119 msleep(200);
120 }
121 if (cpu_status != 0) {
122 printk("Querying DEAD? cpu %i (%i) shows %i\n",
123 cpu, pcpu, cpu_status);
124 }
125
126 /* Isolation and deallocation are definatly done by
127 * drslot_chrp_cpu. If they were not they would be
128 * done here. Change isolate state to Isolate and
129 * change allocation-state to Unusable.
130 */
131 paca[cpu].cpu_start = 0;
132}
133
134/*
135 * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle
136 * here is that a cpu device node may represent up to two logical cpus
137 * in the SMT case. We must honor the assumption in other code that
138 * the logical ids for sibling SMT threads x and y are adjacent, such
139 * that x^1 == y and y^1 == x.
140 */
141static int pSeries_add_processor(struct device_node *np)
142{
143 unsigned int cpu;
144 cpumask_t candidate_map, tmp = CPU_MASK_NONE;
145 int err = -ENOSPC, len, nthreads, i;
146 u32 *intserv;
147
148 intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
149 if (!intserv)
150 return 0;
151
152 nthreads = len / sizeof(u32);
153 for (i = 0; i < nthreads; i++)
154 cpu_set(i, tmp);
155
156 lock_cpu_hotplug();
157
158 BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map));
159
160 /* Get a bitmap of unoccupied slots. */
161 cpus_xor(candidate_map, cpu_possible_map, cpu_present_map);
162 if (cpus_empty(candidate_map)) {
163 /* If we get here, it most likely means that NR_CPUS is
164 * less than the partition's max processors setting.
165 */
166 printk(KERN_ERR "Cannot add cpu %s; this system configuration"
167 " supports %d logical cpus.\n", np->full_name,
168 cpus_weight(cpu_possible_map));
169 goto out_unlock;
170 }
171
172 while (!cpus_empty(tmp))
173 if (cpus_subset(tmp, candidate_map))
174 /* Found a range where we can insert the new cpu(s) */
175 break;
176 else
177 cpus_shift_left(tmp, tmp, nthreads);
178
179 if (cpus_empty(tmp)) {
180 printk(KERN_ERR "Unable to find space in cpu_present_map for"
181 " processor %s with %d thread(s)\n", np->name,
182 nthreads);
183 goto out_unlock;
184 }
185
186 for_each_cpu_mask(cpu, tmp) {
187 BUG_ON(cpu_isset(cpu, cpu_present_map));
188 cpu_set(cpu, cpu_present_map);
189 set_hard_smp_processor_id(cpu, *intserv++);
190 }
191 err = 0;
192out_unlock:
193 unlock_cpu_hotplug();
194 return err;
195}
196
197/*
198 * Update the present map for a cpu node which is going away, and set
199 * the hard id in the paca(s) to -1 to be consistent with boot time
200 * convention for non-present cpus.
201 */
202static void pSeries_remove_processor(struct device_node *np)
203{
204 unsigned int cpu;
205 int len, nthreads, i;
206 u32 *intserv;
207
208 intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
209 if (!intserv)
210 return;
211
212 nthreads = len / sizeof(u32);
213
214 lock_cpu_hotplug();
215 for (i = 0; i < nthreads; i++) {
216 for_each_present_cpu(cpu) {
217 if (get_hard_smp_processor_id(cpu) != intserv[i])
218 continue;
219 BUG_ON(cpu_online(cpu));
220 cpu_clear(cpu, cpu_present_map);
221 set_hard_smp_processor_id(cpu, -1);
222 break;
223 }
224 if (cpu == NR_CPUS)
225 printk(KERN_WARNING "Could not find cpu to remove "
226 "with physical id 0x%x\n", intserv[i]);
227 }
228 unlock_cpu_hotplug();
229}
230
231static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node)
232{
233 int err = NOTIFY_OK;
234
235 switch (action) {
236 case PSERIES_RECONFIG_ADD:
237 if (pSeries_add_processor(node))
238 err = NOTIFY_BAD;
239 break;
240 case PSERIES_RECONFIG_REMOVE:
241 pSeries_remove_processor(node);
242 break;
243 default:
244 err = NOTIFY_DONE;
245 break;
246 }
247 return err;
248}
249
250static struct notifier_block pSeries_smp_nb = {
251 .notifier_call = pSeries_smp_notifier,
252};
253
254#endif /* CONFIG_HOTPLUG_CPU */
255
256/**
257 * smp_startup_cpu() - start the given cpu
258 *
259 * At boot time, there is nothing to do for primary threads which were
260 * started from Open Firmware. For anything else, call RTAS with the
261 * appropriate start location.
262 *
263 * Returns:
264 * 0 - failure
265 * 1 - success
266 */
267static inline int __devinit smp_startup_cpu(unsigned int lcpu)
268{
269 int status;
270 unsigned long start_here = __pa((u32)*((unsigned long *)
271 pSeries_secondary_smp_init));
272 unsigned int pcpu;
273 int start_cpu;
274
275 if (cpu_isset(lcpu, of_spin_map))
276 /* Already started by OF and sitting in spin loop */
277 return 1;
278
279 pcpu = get_hard_smp_processor_id(lcpu);
280
281 /* Fixup atomic count: it exited inside IRQ handler. */
282 paca[lcpu].__current->thread_info->preempt_count = 0;
283
284 /*
285 * If the RTAS start-cpu token does not exist then presume the
286 * cpu is already spinning.
287 */
288 start_cpu = rtas_token("start-cpu");
289 if (start_cpu == RTAS_UNKNOWN_SERVICE)
290 return 1;
291
292 status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
293 if (status != 0) {
294 printk(KERN_ERR "start-cpu failed: %i\n", status);
295 return 0;
296 }
297
298 return 1;
299}
300
301#ifdef CONFIG_XICS
302static inline void smp_xics_do_message(int cpu, int msg)
303{
304 set_bit(msg, &xics_ipi_message[cpu].value);
305 mb();
306 xics_cause_IPI(cpu);
307}
308
309static void smp_xics_message_pass(int target, int msg)
310{
311 unsigned int i;
312
313 if (target < NR_CPUS) {
314 smp_xics_do_message(target, msg);
315 } else {
316 for_each_online_cpu(i) {
317 if (target == MSG_ALL_BUT_SELF
318 && i == smp_processor_id())
319 continue;
320 smp_xics_do_message(i, msg);
321 }
322 }
323}
324
325static int __init smp_xics_probe(void)
326{
327 xics_request_IPIs();
328
329 return cpus_weight(cpu_possible_map);
330}
331
332static void __devinit smp_xics_setup_cpu(int cpu)
333{
334 if (cpu != boot_cpuid)
335 xics_setup_cpu();
336
337 if (firmware_has_feature(FW_FEATURE_SPLPAR))
338 vpa_init(cpu);
339
340 cpu_clear(cpu, of_spin_map);
341
342}
343#endif /* CONFIG_XICS */
344
345static DEFINE_SPINLOCK(timebase_lock);
346static unsigned long timebase = 0;
347
348static void __devinit pSeries_give_timebase(void)
349{
350 spin_lock(&timebase_lock);
351 rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
352 timebase = get_tb();
353 spin_unlock(&timebase_lock);
354
355 while (timebase)
356 barrier();
357 rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
358}
359
360static void __devinit pSeries_take_timebase(void)
361{
362 while (!timebase)
363 barrier();
364 spin_lock(&timebase_lock);
365 set_tb(timebase >> 32, timebase & 0xffffffff);
366 timebase = 0;
367 spin_unlock(&timebase_lock);
368}
369
370static void __devinit smp_pSeries_kick_cpu(int nr)
371{
372 BUG_ON(nr < 0 || nr >= NR_CPUS);
373
374 if (!smp_startup_cpu(nr))
375 return;
376
377 /*
378 * The processor is currently spinning, waiting for the
379 * cpu_start field to become non-zero After we set cpu_start,
380 * the processor will continue on to secondary_start
381 */
382 paca[nr].cpu_start = 1;
383}
384
385static int smp_pSeries_cpu_bootable(unsigned int nr)
386{
387 /* Special case - we inhibit secondary thread startup
388 * during boot if the user requests it. Odd-numbered
389 * cpus are assumed to be secondary threads.
390 */
391 if (system_state < SYSTEM_RUNNING &&
392 cpu_has_feature(CPU_FTR_SMT) &&
393 !smt_enabled_at_boot && nr % 2 != 0)
394 return 0;
395
396 return 1;
397}
398#ifdef CONFIG_MPIC
399static struct smp_ops_t pSeries_mpic_smp_ops = {
400 .message_pass = smp_mpic_message_pass,
401 .probe = smp_mpic_probe,
402 .kick_cpu = smp_pSeries_kick_cpu,
403 .setup_cpu = smp_mpic_setup_cpu,
404};
405#endif
406#ifdef CONFIG_XICS
407static struct smp_ops_t pSeries_xics_smp_ops = {
408 .message_pass = smp_xics_message_pass,
409 .probe = smp_xics_probe,
410 .kick_cpu = smp_pSeries_kick_cpu,
411 .setup_cpu = smp_xics_setup_cpu,
412 .cpu_bootable = smp_pSeries_cpu_bootable,
413};
414#endif
415
416/* This is called very early */
417void __init smp_init_pSeries(void)
418{
419 int i;
420
421 DBG(" -> smp_init_pSeries()\n");
422
423 switch (ppc64_interrupt_controller) {
424#ifdef CONFIG_MPIC
425 case IC_OPEN_PIC:
426 smp_ops = &pSeries_mpic_smp_ops;
427 break;
428#endif
429#ifdef CONFIG_XICS
430 case IC_PPC_XIC:
431 smp_ops = &pSeries_xics_smp_ops;
432 break;
433#endif
434 default:
435 panic("Invalid interrupt controller");
436 }
437
438#ifdef CONFIG_HOTPLUG_CPU
439 smp_ops->cpu_disable = pSeries_cpu_disable;
440 smp_ops->cpu_die = pSeries_cpu_die;
441
442 /* Processors can be added/removed only on LPAR */
443 if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
444 pSeries_reconfig_notifier_register(&pSeries_smp_nb);
445#endif
446
447 /* Mark threads which are still spinning in hold loops. */
448 if (cpu_has_feature(CPU_FTR_SMT)) {
449 for_each_present_cpu(i) {
450 if (i % 2 == 0)
451 /*
452 * Even-numbered logical cpus correspond to
453 * primary threads.
454 */
455 cpu_set(i, of_spin_map);
456 }
457 } else {
458 of_spin_map = cpu_present_map;
459 }
460
461 cpu_clear(boot_cpuid, of_spin_map);
462
463 /* Non-lpar has additional take/give timebase */
464 if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
465 smp_ops->give_timebase = pSeries_give_timebase;
466 smp_ops->take_timebase = pSeries_take_timebase;
467 }
468
469 DBG(" <- smp_init_pSeries()\n");
470}
471
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
new file mode 100644
index 000000000000..866379b80c09
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -0,0 +1,274 @@
1/*
2 * IBM PowerPC pSeries Virtual I/O Infrastructure Support.
3 *
4 * Copyright (c) 2003-2005 IBM Corp.
5 * Dave Engebretsen engebret@us.ibm.com
6 * Santiago Leon santil@us.ibm.com
7 * Hollis Blanchard <hollisb@us.ibm.com>
8 * Stephen Rothwell
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/mm.h>
19#include <linux/kobject.h>
20#include <asm/iommu.h>
21#include <asm/dma.h>
22#include <asm/prom.h>
23#include <asm/vio.h>
24#include <asm/hvcall.h>
25#include <asm/tce.h>
26
27extern struct subsystem devices_subsys; /* needed for vio_find_name() */
28
29static void probe_bus_pseries(void)
30{
31 struct device_node *node_vroot, *of_node;
32
33 node_vroot = find_devices("vdevice");
34 if ((node_vroot == NULL) || (node_vroot->child == NULL))
35 /* this machine doesn't do virtual IO, and that's ok */
36 return;
37
38 /*
39 * Create struct vio_devices for each virtual device in the device tree.
40 * Drivers will associate with them later.
41 */
42 for (of_node = node_vroot->child; of_node != NULL;
43 of_node = of_node->sibling) {
44 printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
45 vio_register_device_node(of_node);
46 }
47}
48
49/**
50 * vio_match_device_pseries: - Tell if a pSeries VIO device matches a
51 * vio_device_id
52 */
53static int vio_match_device_pseries(const struct vio_device_id *id,
54 const struct vio_dev *dev)
55{
56 return (strncmp(dev->type, id->type, strlen(id->type)) == 0) &&
57 device_is_compatible(dev->dev.platform_data, id->compat);
58}
59
60static void vio_release_device_pseries(struct device *dev)
61{
62 /* XXX free TCE table */
63 of_node_put(dev->platform_data);
64}
65
66static ssize_t viodev_show_devspec(struct device *dev,
67 struct device_attribute *attr, char *buf)
68{
69 struct device_node *of_node = dev->platform_data;
70
71 return sprintf(buf, "%s\n", of_node->full_name);
72}
73DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
74
75static void vio_unregister_device_pseries(struct vio_dev *viodev)
76{
77 device_remove_file(&viodev->dev, &dev_attr_devspec);
78}
79
80static struct vio_bus_ops vio_bus_ops_pseries = {
81 .match = vio_match_device_pseries,
82 .unregister_device = vio_unregister_device_pseries,
83 .release_device = vio_release_device_pseries,
84};
85
86/**
87 * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus
88 */
89static int __init vio_bus_init_pseries(void)
90{
91 int err;
92
93 err = vio_bus_init(&vio_bus_ops_pseries);
94 if (err == 0)
95 probe_bus_pseries();
96 return err;
97}
98
99__initcall(vio_bus_init_pseries);
100
101/**
102 * vio_build_iommu_table: - gets the dma information from OF and
103 * builds the TCE tree.
104 * @dev: the virtual device.
105 *
106 * Returns a pointer to the built tce tree, or NULL if it can't
107 * find property.
108*/
109static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
110{
111 unsigned int *dma_window;
112 struct iommu_table *newTceTable;
113 unsigned long offset;
114 int dma_window_property_size;
115
116 dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
117 if(!dma_window) {
118 return NULL;
119 }
120
121 newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
122
123 /* There should be some code to extract the phys-encoded offset
124 using prom_n_addr_cells(). However, according to a comment
125 on earlier versions, it's always zero, so we don't bother */
126 offset = dma_window[1] >> PAGE_SHIFT;
127
128 /* TCE table size - measured in tce entries */
129 newTceTable->it_size = dma_window[4] >> PAGE_SHIFT;
130 /* offset for VIO should always be 0 */
131 newTceTable->it_offset = offset;
132 newTceTable->it_busno = 0;
133 newTceTable->it_index = (unsigned long)dma_window[0];
134 newTceTable->it_type = TCE_VB;
135
136 return iommu_init_table(newTceTable);
137}
138
139/**
140 * vio_register_device_node: - Register a new vio device.
141 * @of_node: The OF node for this device.
142 *
143 * Creates and initializes a vio_dev structure from the data in
144 * of_node (dev.platform_data) and adds it to the list of virtual devices.
145 * Returns a pointer to the created vio_dev or NULL if node has
146 * NULL device_type or compatible fields.
147 */
148struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
149{
150 struct vio_dev *viodev;
151 unsigned int *unit_address;
152 unsigned int *irq_p;
153
154 /* we need the 'device_type' property, in order to match with drivers */
155 if ((NULL == of_node->type)) {
156 printk(KERN_WARNING
157 "%s: node %s missing 'device_type'\n", __FUNCTION__,
158 of_node->name ? of_node->name : "<unknown>");
159 return NULL;
160 }
161
162 unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
163 if (!unit_address) {
164 printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
165 of_node->name ? of_node->name : "<unknown>");
166 return NULL;
167 }
168
169 /* allocate a vio_dev for this node */
170 viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
171 if (!viodev) {
172 return NULL;
173 }
174 memset(viodev, 0, sizeof(struct vio_dev));
175
176 viodev->dev.platform_data = of_node_get(of_node);
177
178 viodev->irq = NO_IRQ;
179 irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
180 if (irq_p) {
181 int virq = virt_irq_create_mapping(*irq_p);
182 if (virq == NO_IRQ) {
183 printk(KERN_ERR "Unable to allocate interrupt "
184 "number for %s\n", of_node->full_name);
185 } else
186 viodev->irq = irq_offset_up(virq);
187 }
188
189 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
190 viodev->name = of_node->name;
191 viodev->type = of_node->type;
192 viodev->unit_address = *unit_address;
193 viodev->iommu_table = vio_build_iommu_table(viodev);
194
195 /* register with generic device framework */
196 if (vio_register_device(viodev) == NULL) {
197 /* XXX free TCE table */
198 kfree(viodev);
199 return NULL;
200 }
201 device_create_file(&viodev->dev, &dev_attr_devspec);
202
203 return viodev;
204}
205EXPORT_SYMBOL(vio_register_device_node);
206
207/**
208 * vio_get_attribute: - get attribute for virtual device
209 * @vdev: The vio device to get property.
210 * @which: The property/attribute to be extracted.
211 * @length: Pointer to length of returned data size (unused if NULL).
212 *
213 * Calls prom.c's get_property() to return the value of the
214 * attribute specified by the preprocessor constant @which
215*/
216const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
217{
218 return get_property(vdev->dev.platform_data, (char*)which, length);
219}
220EXPORT_SYMBOL(vio_get_attribute);
221
222/* vio_find_name() - internal because only vio.c knows how we formatted the
223 * kobject name
224 * XXX once vio_bus_type.devices is actually used as a kset in
225 * drivers/base/bus.c, this function should be removed in favor of
226 * "device_find(kobj_name, &vio_bus_type)"
227 */
228static struct vio_dev *vio_find_name(const char *kobj_name)
229{
230 struct kobject *found;
231
232 found = kset_find_obj(&devices_subsys.kset, kobj_name);
233 if (!found)
234 return NULL;
235
236 return to_vio_dev(container_of(found, struct device, kobj));
237}
238
239/**
240 * vio_find_node - find an already-registered vio_dev
241 * @vnode: device_node of the virtual device we're looking for
242 */
243struct vio_dev *vio_find_node(struct device_node *vnode)
244{
245 uint32_t *unit_address;
246 char kobj_name[BUS_ID_SIZE];
247
248 /* construct the kobject name from the device node */
249 unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
250 if (!unit_address)
251 return NULL;
252 snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
253
254 return vio_find_name(kobj_name);
255}
256EXPORT_SYMBOL(vio_find_node);
257
258int vio_enable_interrupts(struct vio_dev *dev)
259{
260 int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
261 if (rc != H_Success)
262 printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
263 return rc;
264}
265EXPORT_SYMBOL(vio_enable_interrupts);
266
267int vio_disable_interrupts(struct vio_dev *dev)
268{
269 int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
270 if (rc != H_Success)
271 printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
272 return rc;
273}
274EXPORT_SYMBOL(vio_disable_interrupts);