diff options
Diffstat (limited to 'arch/ppc64')
-rw-r--r-- | arch/ppc64/Kconfig | 38 | ||||
-rw-r--r-- | arch/ppc64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/ppc64/kernel/head.S | 6 | ||||
-rw-r--r-- | arch/ppc64/kernel/lparcfg.c | 1 | ||||
-rw-r--r-- | arch/ppc64/kernel/machine_kexec.c | 302 | ||||
-rw-r--r-- | arch/ppc64/kernel/misc.S | 175 | ||||
-rw-r--r-- | arch/ppc64/kernel/mpic.c | 29 | ||||
-rw-r--r-- | arch/ppc64/kernel/mpic.h | 3 | ||||
-rw-r--r-- | arch/ppc64/kernel/pSeries_setup.c | 4 | ||||
-rw-r--r-- | arch/ppc64/kernel/pSeries_smp.c | 5 | ||||
-rw-r--r-- | arch/ppc64/kernel/setup.c | 22 | ||||
-rw-r--r-- | arch/ppc64/kernel/xics.c | 25 | ||||
-rw-r--r-- | arch/ppc64/mm/hash_native.c | 47 |
13 files changed, 619 insertions, 39 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index cb27068bfcd4..f804f25232ac 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig | |||
@@ -142,6 +142,23 @@ config PPC_SPLPAR | |||
142 | processors, that is, which share physical processors between | 142 | processors, that is, which share physical processors between |
143 | two or more partitions. | 143 | two or more partitions. |
144 | 144 | ||
145 | config KEXEC | ||
146 | bool "kexec system call (EXPERIMENTAL)" | ||
147 | depends on PPC_MULTIPLATFORM && EXPERIMENTAL | ||
148 | help | ||
149 | kexec is a system call that implements the ability to shutdown your | ||
150 | current kernel, and to start another kernel. It is like a reboot | ||
151 | but it is indepedent of the system firmware. And like a reboot | ||
152 | you can start any kernel with it, not just Linux. | ||
153 | |||
154 | The name comes from the similiarity to the exec system call. | ||
155 | |||
156 | It is an ongoing process to be certain the hardware in a machine | ||
157 | is properly shutdown, so do not be surprised if this code does not | ||
158 | initially work for you. It may help to enable device hotplugging | ||
159 | support. As of this writing the exact hardware interface is | ||
160 | strongly in flux, so no good recommendation can be made. | ||
161 | |||
145 | config IBMVIO | 162 | config IBMVIO |
146 | depends on PPC_PSERIES || PPC_ISERIES | 163 | depends on PPC_PSERIES || PPC_ISERIES |
147 | bool | 164 | bool |
@@ -270,26 +287,7 @@ config SCHED_SMT | |||
270 | when dealing with POWER5 cpus at a cost of slightly increased | 287 | when dealing with POWER5 cpus at a cost of slightly increased |
271 | overhead in some places. If unsure say N here. | 288 | overhead in some places. If unsure say N here. |
272 | 289 | ||
273 | config PREEMPT | 290 | source "kernel/Kconfig.preempt" |
274 | bool "Preemptible Kernel" | ||
275 | help | ||
276 | This option reduces the latency of the kernel when reacting to | ||
277 | real-time or interactive events by allowing a low priority process to | ||
278 | be preempted even if it is in kernel mode executing a system call. | ||
279 | |||
280 | Say Y here if you are building a kernel for a desktop, embedded | ||
281 | or real-time system. Say N if you are unsure. | ||
282 | |||
283 | config PREEMPT_BKL | ||
284 | bool "Preempt The Big Kernel Lock" | ||
285 | depends on PREEMPT | ||
286 | default y | ||
287 | help | ||
288 | This option reduces the latency of the kernel by making the | ||
289 | big kernel lock preemptible. | ||
290 | |||
291 | Say Y here if you are building a kernel for a desktop system. | ||
292 | Say N if you are unsure. | ||
293 | 291 | ||
294 | config EEH | 292 | config EEH |
295 | bool "PCI Extended Error Handling (EEH)" if EMBEDDED | 293 | bool "PCI Extended Error Handling (EEH)" if EMBEDDED |
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index dffbfb7ac8d5..d9b2660ef221 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile | |||
@@ -36,6 +36,7 @@ obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ | |||
36 | obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ | 36 | obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ |
37 | bpa_iic.o spider-pic.o | 37 | bpa_iic.o spider-pic.o |
38 | 38 | ||
39 | obj-$(CONFIG_KEXEC) += machine_kexec.o | ||
39 | obj-$(CONFIG_EEH) += eeh.o | 40 | obj-$(CONFIG_EEH) += eeh.o |
40 | obj-$(CONFIG_PROC_FS) += proc_ppc64.o | 41 | obj-$(CONFIG_PROC_FS) += proc_ppc64.o |
41 | obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o | 42 | obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o |
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 02c8f4e3e4bc..675c2708588f 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S | |||
@@ -1194,7 +1194,7 @@ _GLOBAL(pSeries_secondary_smp_init) | |||
1194 | bl .__restore_cpu_setup | 1194 | bl .__restore_cpu_setup |
1195 | 1195 | ||
1196 | /* Set up a paca value for this processor. Since we have the | 1196 | /* Set up a paca value for this processor. Since we have the |
1197 | * physical cpu id in r3, we need to search the pacas to find | 1197 | * physical cpu id in r24, we need to search the pacas to find |
1198 | * which logical id maps to our physical one. | 1198 | * which logical id maps to our physical one. |
1199 | */ | 1199 | */ |
1200 | LOADADDR(r13, paca) /* Get base vaddr of paca array */ | 1200 | LOADADDR(r13, paca) /* Get base vaddr of paca array */ |
@@ -1207,8 +1207,8 @@ _GLOBAL(pSeries_secondary_smp_init) | |||
1207 | cmpwi r5,NR_CPUS | 1207 | cmpwi r5,NR_CPUS |
1208 | blt 1b | 1208 | blt 1b |
1209 | 1209 | ||
1210 | 99: HMT_LOW /* Couldn't find our CPU id */ | 1210 | mr r3,r24 /* not found, copy phys to r3 */ |
1211 | b 99b | 1211 | b .kexec_wait /* next kernel might do better */ |
1212 | 1212 | ||
1213 | 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ | 1213 | 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ |
1214 | /* From now on, r24 is expected to be logica cpuid */ | 1214 | /* From now on, r24 is expected to be logica cpuid */ |
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 387923fcf9b0..02e96627fa66 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/system.h> | 34 | #include <asm/system.h> |
35 | #include <asm/time.h> | 35 | #include <asm/time.h> |
36 | #include <asm/iSeries/ItExtVpdPanel.h> | 36 | #include <asm/iSeries/ItExtVpdPanel.h> |
37 | #include <asm/prom.h> | ||
37 | 38 | ||
38 | #define MODULE_VERS "1.6" | 39 | #define MODULE_VERS "1.6" |
39 | #define MODULE_NAME "lparcfg" | 40 | #define MODULE_NAME "lparcfg" |
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c new file mode 100644 index 000000000000..fdb2fc649d72 --- /dev/null +++ b/arch/ppc64/kernel/machine_kexec.c | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * machine_kexec.c - handle transition of Linux booting another kernel | ||
3 | * | ||
4 | * Copyright (C) 2004-2005, IBM Corp. | ||
5 | * | ||
6 | * Created by: Milton D Miller II | ||
7 | * | ||
8 | * This source code is licensed under the GNU General Public License, | ||
9 | * Version 2. See the file COPYING for more details. | ||
10 | */ | ||
11 | |||
12 | |||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/kexec.h> | ||
15 | #include <linux/smp.h> | ||
16 | #include <linux/thread_info.h> | ||
17 | #include <linux/errno.h> | ||
18 | |||
19 | #include <asm/page.h> | ||
20 | #include <asm/current.h> | ||
21 | #include <asm/machdep.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/paca.h> | ||
24 | #include <asm/mmu.h> | ||
25 | #include <asm/sections.h> /* _end */ | ||
26 | #include <asm/prom.h> | ||
27 | |||
28 | #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ | ||
29 | |||
30 | /* Have this around till we move it into crash specific file */ | ||
31 | note_buf_t crash_notes[NR_CPUS]; | ||
32 | |||
33 | /* Dummy for now. Not sure if we need to have a crash shutdown in here | ||
34 | * and if what it will achieve. Letting it be now to compile the code | ||
35 | * in generic kexec environment | ||
36 | */ | ||
37 | void machine_crash_shutdown(struct pt_regs *regs) | ||
38 | { | ||
39 | /* do nothing right now */ | ||
40 | /* smp_relase_cpus() if we want smp on panic kernel */ | ||
41 | /* cpu_irq_down to isolate us until we are ready */ | ||
42 | } | ||
43 | |||
44 | int machine_kexec_prepare(struct kimage *image) | ||
45 | { | ||
46 | int i; | ||
47 | unsigned long begin, end; /* limits of segment */ | ||
48 | unsigned long low, high; /* limits of blocked memory range */ | ||
49 | struct device_node *node; | ||
50 | unsigned long *basep; | ||
51 | unsigned int *sizep; | ||
52 | |||
53 | if (!ppc_md.hpte_clear_all) | ||
54 | return -ENOENT; | ||
55 | |||
56 | /* | ||
57 | * Since we use the kernel fault handlers and paging code to | ||
58 | * handle the virtual mode, we must make sure no destination | ||
59 | * overlaps kernel static data or bss. | ||
60 | */ | ||
61 | for (i = 0; i < image->nr_segments; i++) | ||
62 | if (image->segment[i].mem < __pa(_end)) | ||
63 | return -ETXTBSY; | ||
64 | |||
65 | /* | ||
66 | * For non-LPAR, we absolutely can not overwrite the mmu hash | ||
67 | * table, since we are still using the bolted entries in it to | ||
68 | * do the copy. Check that here. | ||
69 | * | ||
70 | * It is safe if the end is below the start of the blocked | ||
71 | * region (end <= low), or if the beginning is after the | ||
72 | * end of the blocked region (begin >= high). Use the | ||
73 | * boolean identity !(a || b) === (!a && !b). | ||
74 | */ | ||
75 | if (htab_address) { | ||
76 | low = __pa(htab_address); | ||
77 | high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; | ||
78 | |||
79 | for (i = 0; i < image->nr_segments; i++) { | ||
80 | begin = image->segment[i].mem; | ||
81 | end = begin + image->segment[i].memsz; | ||
82 | |||
83 | if ((begin < high) && (end > low)) | ||
84 | return -ETXTBSY; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* We also should not overwrite the tce tables */ | ||
89 | for (node = of_find_node_by_type(NULL, "pci"); node != NULL; | ||
90 | node = of_find_node_by_type(node, "pci")) { | ||
91 | basep = (unsigned long *)get_property(node, "linux,tce-base", | ||
92 | NULL); | ||
93 | sizep = (unsigned int *)get_property(node, "linux,tce-size", | ||
94 | NULL); | ||
95 | if (basep == NULL || sizep == NULL) | ||
96 | continue; | ||
97 | |||
98 | low = *basep; | ||
99 | high = low + (*sizep); | ||
100 | |||
101 | for (i = 0; i < image->nr_segments; i++) { | ||
102 | begin = image->segment[i].mem; | ||
103 | end = begin + image->segment[i].memsz; | ||
104 | |||
105 | if ((begin < high) && (end > low)) | ||
106 | return -ETXTBSY; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | void machine_kexec_cleanup(struct kimage *image) | ||
114 | { | ||
115 | /* we do nothing in prepare that needs to be undone */ | ||
116 | } | ||
117 | |||
118 | #define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) | ||
119 | |||
120 | static void copy_segments(unsigned long ind) | ||
121 | { | ||
122 | unsigned long entry; | ||
123 | unsigned long *ptr; | ||
124 | void *dest; | ||
125 | void *addr; | ||
126 | |||
127 | /* | ||
128 | * We rely on kexec_load to create a lists that properly | ||
129 | * initializes these pointers before they are used. | ||
130 | * We will still crash if the list is wrong, but at least | ||
131 | * the compiler will be quiet. | ||
132 | */ | ||
133 | ptr = NULL; | ||
134 | dest = NULL; | ||
135 | |||
136 | for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { | ||
137 | addr = __va(entry & PAGE_MASK); | ||
138 | |||
139 | switch (entry & IND_FLAGS) { | ||
140 | case IND_DESTINATION: | ||
141 | dest = addr; | ||
142 | break; | ||
143 | case IND_INDIRECTION: | ||
144 | ptr = addr; | ||
145 | break; | ||
146 | case IND_SOURCE: | ||
147 | copy_page(dest, addr); | ||
148 | dest += PAGE_SIZE; | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | void kexec_copy_flush(struct kimage *image) | ||
154 | { | ||
155 | long i, nr_segments = image->nr_segments; | ||
156 | struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; | ||
157 | |||
158 | /* save the ranges on the stack to efficiently flush the icache */ | ||
159 | memcpy(ranges, image->segment, sizeof(ranges)); | ||
160 | |||
161 | /* | ||
162 | * After this call we may not use anything allocated in dynamic | ||
163 | * memory, including *image. | ||
164 | * | ||
165 | * Only globals and the stack are allowed. | ||
166 | */ | ||
167 | copy_segments(image->head); | ||
168 | |||
169 | /* | ||
170 | * we need to clear the icache for all dest pages sometime, | ||
171 | * including ones that were in place on the original copy | ||
172 | */ | ||
173 | for (i = 0; i < nr_segments; i++) | ||
174 | flush_icache_range(ranges[i].mem + KERNELBASE, | ||
175 | ranges[i].mem + KERNELBASE + | ||
176 | ranges[i].memsz); | ||
177 | } | ||
178 | |||
179 | #ifdef CONFIG_SMP | ||
180 | |||
181 | /* FIXME: we should schedule this function to be called on all cpus based | ||
182 | * on calling the interrupts, but we would like to call it off irq level | ||
183 | * so that the interrupt controller is clean. | ||
184 | */ | ||
185 | void kexec_smp_down(void *arg) | ||
186 | { | ||
187 | if (ppc_md.cpu_irq_down) | ||
188 | ppc_md.cpu_irq_down(); | ||
189 | |||
190 | local_irq_disable(); | ||
191 | kexec_smp_wait(); | ||
192 | /* NOTREACHED */ | ||
193 | } | ||
194 | |||
195 | static void kexec_prepare_cpus(void) | ||
196 | { | ||
197 | int my_cpu, i, notified=-1; | ||
198 | |||
199 | smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); | ||
200 | my_cpu = get_cpu(); | ||
201 | |||
202 | /* check the others cpus are now down (via paca hw cpu id == -1) */ | ||
203 | for (i=0; i < NR_CPUS; i++) { | ||
204 | if (i == my_cpu) | ||
205 | continue; | ||
206 | |||
207 | while (paca[i].hw_cpu_id != -1) { | ||
208 | if (!cpu_possible(i)) { | ||
209 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
210 | " possible, ignoring\n", | ||
211 | i, paca[i].hw_cpu_id); | ||
212 | break; | ||
213 | } | ||
214 | if (!cpu_online(i)) { | ||
215 | /* Fixme: this can be spinning in | ||
216 | * pSeries_secondary_wait with a paca | ||
217 | * waiting for it to go online. | ||
218 | */ | ||
219 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
220 | " online, ignoring\n", | ||
221 | i, paca[i].hw_cpu_id); | ||
222 | break; | ||
223 | } | ||
224 | if (i != notified) { | ||
225 | printk( "kexec: waiting for cpu %d (physical" | ||
226 | " %d) to go down\n", | ||
227 | i, paca[i].hw_cpu_id); | ||
228 | notified = i; | ||
229 | } | ||
230 | } | ||
231 | } | ||
232 | |||
233 | /* after we tell the others to go down */ | ||
234 | if (ppc_md.cpu_irq_down) | ||
235 | ppc_md.cpu_irq_down(); | ||
236 | |||
237 | put_cpu(); | ||
238 | |||
239 | local_irq_disable(); | ||
240 | } | ||
241 | |||
242 | #else /* ! SMP */ | ||
243 | |||
244 | static void kexec_prepare_cpus(void) | ||
245 | { | ||
246 | /* | ||
247 | * move the secondarys to us so that we can copy | ||
248 | * the new kernel 0-0x100 safely | ||
249 | * | ||
250 | * do this if kexec in setup.c ? | ||
251 | */ | ||
252 | smp_relase_cpus(); | ||
253 | if (ppc_md.cpu_irq_down) | ||
254 | ppc_md.cpu_irq_down(); | ||
255 | local_irq_disable(); | ||
256 | } | ||
257 | |||
258 | #endif /* SMP */ | ||
259 | |||
260 | /* | ||
261 | * kexec thread structure and stack. | ||
262 | * | ||
263 | * We need to make sure that this is 16384-byte aligned due to the | ||
264 | * way process stacks are handled. It also must be statically allocated | ||
265 | * or allocated as part of the kimage, because everything else may be | ||
266 | * overwritten when we copy the kexec image. We piggyback on the | ||
267 | * "init_task" linker section here to statically allocate a stack. | ||
268 | * | ||
269 | * We could use a smaller stack if we don't care about anything using | ||
270 | * current, but that audit has not been performed. | ||
271 | */ | ||
272 | union thread_union kexec_stack | ||
273 | __attribute__((__section__(".data.init_task"))) = { }; | ||
274 | |||
275 | /* Our assembly helper, in kexec_stub.S */ | ||
276 | extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, | ||
277 | void *image, void *control, | ||
278 | void (*clear_all)(void)) ATTRIB_NORET; | ||
279 | |||
280 | /* too late to fail here */ | ||
281 | void machine_kexec(struct kimage *image) | ||
282 | { | ||
283 | |||
284 | /* prepare control code if any */ | ||
285 | |||
286 | /* shutdown other cpus into our wait loop and quiesce interrupts */ | ||
287 | kexec_prepare_cpus(); | ||
288 | |||
289 | /* switch to a staticly allocated stack. Based on irq stack code. | ||
290 | * XXX: the task struct will likely be invalid once we do the copy! | ||
291 | */ | ||
292 | kexec_stack.thread_info.task = current_thread_info()->task; | ||
293 | kexec_stack.thread_info.flags = 0; | ||
294 | |||
295 | /* Some things are best done in assembly. Finding globals with | ||
296 | * a toc is easier in C, so pass in what we can. | ||
297 | */ | ||
298 | kexec_sequence(&kexec_stack, image->start, image, | ||
299 | page_address(image->control_code_page), | ||
300 | ppc_md.hpte_clear_all); | ||
301 | /* NOTREACHED */ | ||
302 | } | ||
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index e3c73b3425dc..f3dea0c5a88c 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S | |||
@@ -680,6 +680,177 @@ _GLOBAL(kernel_thread) | |||
680 | ld r30,-16(r1) | 680 | ld r30,-16(r1) |
681 | blr | 681 | blr |
682 | 682 | ||
683 | /* kexec_wait(phys_cpu) | ||
684 | * | ||
685 | * wait for the flag to change, indicating this kernel is going away but | ||
686 | * the slave code for the next one is at addresses 0 to 100. | ||
687 | * | ||
688 | * This is used by all slaves. | ||
689 | * | ||
690 | * Physical (hardware) cpu id should be in r3. | ||
691 | */ | ||
692 | _GLOBAL(kexec_wait) | ||
693 | bl 1f | ||
694 | 1: mflr r5 | ||
695 | addi r5,r5,kexec_flag-1b | ||
696 | |||
697 | 99: HMT_LOW | ||
698 | #ifdef CONFIG_KEXEC /* use no memory without kexec */ | ||
699 | lwz r4,0(r5) | ||
700 | cmpwi 0,r4,0 | ||
701 | bnea 0x60 | ||
702 | #endif | ||
703 | b 99b | ||
704 | |||
705 | /* this can be in text because we won't change it until we are | ||
706 | * running in real anyways | ||
707 | */ | ||
708 | kexec_flag: | ||
709 | .long 0 | ||
710 | |||
711 | |||
712 | #ifdef CONFIG_KEXEC | ||
713 | |||
714 | /* kexec_smp_wait(void) | ||
715 | * | ||
716 | * call with interrupts off | ||
717 | * note: this is a terminal routine, it does not save lr | ||
718 | * | ||
719 | * get phys id from paca | ||
720 | * set paca id to -1 to say we got here | ||
721 | * switch to real mode | ||
722 | * join other cpus in kexec_wait(phys_id) | ||
723 | */ | ||
724 | _GLOBAL(kexec_smp_wait) | ||
725 | lhz r3,PACAHWCPUID(r13) | ||
726 | li r4,-1 | ||
727 | sth r4,PACAHWCPUID(r13) /* let others know we left */ | ||
728 | bl real_mode | ||
729 | b .kexec_wait | ||
730 | |||
731 | /* | ||
732 | * switch to real mode (turn mmu off) | ||
733 | * we use the early kernel trick that the hardware ignores bits | ||
734 | * 0 and 1 (big endian) of the effective address in real mode | ||
735 | * | ||
736 | * don't overwrite r3 here, it is live for kexec_wait above. | ||
737 | */ | ||
738 | real_mode: /* assume normal blr return */ | ||
739 | 1: li r9,MSR_RI | ||
740 | li r10,MSR_DR|MSR_IR | ||
741 | mflr r11 /* return address to SRR0 */ | ||
742 | mfmsr r12 | ||
743 | andc r9,r12,r9 | ||
744 | andc r10,r12,r10 | ||
745 | |||
746 | mtmsrd r9,1 | ||
747 | mtspr SPRN_SRR1,r10 | ||
748 | mtspr SPRN_SRR0,r11 | ||
749 | rfid | ||
750 | |||
751 | |||
752 | /* | ||
753 | * kexec_sequence(newstack, start, image, control, clear_all()) | ||
754 | * | ||
755 | * does the grungy work with stack switching and real mode switches | ||
756 | * also does simple calls to other code | ||
757 | */ | ||
758 | |||
759 | _GLOBAL(kexec_sequence) | ||
760 | mflr r0 | ||
761 | std r0,16(r1) | ||
762 | |||
763 | /* switch stacks to newstack -- &kexec_stack.stack */ | ||
764 | stdu r1,THREAD_SIZE-112(r3) | ||
765 | mr r1,r3 | ||
766 | |||
767 | li r0,0 | ||
768 | std r0,16(r1) | ||
769 | |||
770 | /* save regs for local vars on new stack. | ||
771 | * yes, we won't go back, but ... | ||
772 | */ | ||
773 | std r31,-8(r1) | ||
774 | std r30,-16(r1) | ||
775 | std r29,-24(r1) | ||
776 | std r28,-32(r1) | ||
777 | std r27,-40(r1) | ||
778 | std r26,-48(r1) | ||
779 | std r25,-56(r1) | ||
780 | |||
781 | stdu r1,-112-64(r1) | ||
782 | |||
783 | /* save args into preserved regs */ | ||
784 | mr r31,r3 /* newstack (both) */ | ||
785 | mr r30,r4 /* start (real) */ | ||
786 | mr r29,r5 /* image (virt) */ | ||
787 | mr r28,r6 /* control, unused */ | ||
788 | mr r27,r7 /* clear_all() fn desc */ | ||
789 | mr r26,r8 /* spare */ | ||
790 | lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ | ||
791 | |||
792 | /* disable interrupts, we are overwriting kernel data next */ | ||
793 | mfmsr r3 | ||
794 | rlwinm r3,r3,0,17,15 | ||
795 | mtmsrd r3,1 | ||
796 | |||
797 | /* copy dest pages, flush whole dest image */ | ||
798 | mr r3,r29 | ||
799 | bl .kexec_copy_flush /* (image) */ | ||
800 | |||
801 | /* turn off mmu */ | ||
802 | bl real_mode | ||
803 | |||
804 | /* clear out hardware hash page table and tlb */ | ||
805 | ld r5,0(r27) /* deref function descriptor */ | ||
806 | mtctr r5 | ||
807 | bctrl /* ppc_md.hash_clear_all(void); */ | ||
808 | |||
809 | /* | ||
810 | * kexec image calling is: | ||
811 | * the first 0x100 bytes of the entry point are copied to 0 | ||
812 | * | ||
813 | * all slaves branch to slave = 0x60 (absolute) | ||
814 | * slave(phys_cpu_id); | ||
815 | * | ||
816 | * master goes to start = entry point | ||
817 | * start(phys_cpu_id, start, 0); | ||
818 | * | ||
819 | * | ||
820 | * a wrapper is needed to call existing kernels, here is an approximate | ||
821 | * description of one method: | ||
822 | * | ||
823 | * v2: (2.6.10) | ||
824 | * start will be near the boot_block (maybe 0x100 bytes before it?) | ||
825 | * it will have a 0x60, which will b to boot_block, where it will wait | ||
826 | * and 0 will store phys into struct boot-block and load r3 from there, | ||
827 | * copy kernel 0-0x100 and tell slaves to back down to 0x60 again | ||
828 | * | ||
829 | * v1: (2.6.9) | ||
830 | * boot block will have all cpus scanning device tree to see if they | ||
831 | * are the boot cpu ????? | ||
832 | * other device tree differences (prop sizes, va vs pa, etc)... | ||
833 | */ | ||
834 | |||
835 | /* copy 0x100 bytes starting at start to 0 */ | ||
836 | li r3,0 | ||
837 | mr r4,r30 | ||
838 | li r5,0x100 | ||
839 | li r6,0 | ||
840 | bl .copy_and_flush /* (dest, src, copy limit, start offset) */ | ||
841 | 1: /* assume normal blr return */ | ||
842 | |||
843 | /* release other cpus to the new kernel secondary start at 0x60 */ | ||
844 | mflr r5 | ||
845 | li r6,1 | ||
846 | stw r6,kexec_flag-1b(5) | ||
847 | mr r3,r25 # my phys cpu | ||
848 | mr r4,r30 # start, aka phys mem offset | ||
849 | mtlr 4 | ||
850 | li r5,0 | ||
851 | blr /* image->start(physid, image->start, 0); */ | ||
852 | #endif /* CONFIG_KEXEC */ | ||
853 | |||
683 | /* Why isn't this a) automatic, b) written in 'C'? */ | 854 | /* Why isn't this a) automatic, b) written in 'C'? */ |
684 | .balign 8 | 855 | .balign 8 |
685 | _GLOBAL(sys_call_table32) | 856 | _GLOBAL(sys_call_table32) |
@@ -951,7 +1122,7 @@ _GLOBAL(sys_call_table32) | |||
951 | .llong .compat_sys_mq_timedreceive /* 265 */ | 1122 | .llong .compat_sys_mq_timedreceive /* 265 */ |
952 | .llong .compat_sys_mq_notify | 1123 | .llong .compat_sys_mq_notify |
953 | .llong .compat_sys_mq_getsetattr | 1124 | .llong .compat_sys_mq_getsetattr |
954 | .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ | 1125 | .llong .compat_sys_kexec_load |
955 | .llong .sys32_add_key | 1126 | .llong .sys32_add_key |
956 | .llong .sys32_request_key | 1127 | .llong .sys32_request_key |
957 | .llong .compat_sys_keyctl | 1128 | .llong .compat_sys_keyctl |
@@ -1227,7 +1398,7 @@ _GLOBAL(sys_call_table) | |||
1227 | .llong .sys_mq_timedreceive /* 265 */ | 1398 | .llong .sys_mq_timedreceive /* 265 */ |
1228 | .llong .sys_mq_notify | 1399 | .llong .sys_mq_notify |
1229 | .llong .sys_mq_getsetattr | 1400 | .llong .sys_mq_getsetattr |
1230 | .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ | 1401 | .llong .sys_kexec_load |
1231 | .llong .sys_add_key | 1402 | .llong .sys_add_key |
1232 | .llong .sys_request_key /* 270 */ | 1403 | .llong .sys_request_key /* 270 */ |
1233 | .llong .sys_keyctl | 1404 | .llong .sys_keyctl |
diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c index 593ea5b82afa..e8fbab1df37f 100644 --- a/arch/ppc64/kernel/mpic.c +++ b/arch/ppc64/kernel/mpic.c | |||
@@ -792,6 +792,35 @@ void mpic_setup_this_cpu(void) | |||
792 | #endif /* CONFIG_SMP */ | 792 | #endif /* CONFIG_SMP */ |
793 | } | 793 | } |
794 | 794 | ||
795 | /* | ||
796 | * XXX: someone who knows mpic should check this. | ||
797 | * do we need to eoi the ipi here (see xics comments)? | ||
798 | * or can we reset the mpic in the new kernel? | ||
799 | */ | ||
800 | void mpic_teardown_this_cpu(void) | ||
801 | { | ||
802 | struct mpic *mpic = mpic_primary; | ||
803 | unsigned long flags; | ||
804 | u32 msk = 1 << hard_smp_processor_id(); | ||
805 | unsigned int i; | ||
806 | |||
807 | BUG_ON(mpic == NULL); | ||
808 | |||
809 | DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); | ||
810 | spin_lock_irqsave(&mpic_lock, flags); | ||
811 | |||
812 | /* let the mpic know we don't want intrs. */ | ||
813 | for (i = 0; i < mpic->num_sources ; i++) | ||
814 | mpic_irq_write(i, MPIC_IRQ_DESTINATION, | ||
815 | mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); | ||
816 | |||
817 | /* Set current processor priority to max */ | ||
818 | mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); | ||
819 | |||
820 | spin_unlock_irqrestore(&mpic_lock, flags); | ||
821 | } | ||
822 | |||
823 | |||
795 | void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) | 824 | void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) |
796 | { | 825 | { |
797 | struct mpic *mpic = mpic_primary; | 826 | struct mpic *mpic = mpic_primary; |
diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h index 63e177143eac..99fbbc9a084c 100644 --- a/arch/ppc64/kernel/mpic.h +++ b/arch/ppc64/kernel/mpic.h | |||
@@ -255,6 +255,9 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq); | |||
255 | /* Setup a non-boot CPU */ | 255 | /* Setup a non-boot CPU */ |
256 | extern void mpic_setup_this_cpu(void); | 256 | extern void mpic_setup_this_cpu(void); |
257 | 257 | ||
258 | /* Clean up for kexec (or cpu offline or ...) */ | ||
259 | extern void mpic_teardown_this_cpu(void); | ||
260 | |||
258 | /* Request IPIs on primary mpic */ | 261 | /* Request IPIs on primary mpic */ |
259 | extern void mpic_request_ipis(void); | 262 | extern void mpic_request_ipis(void); |
260 | 263 | ||
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index f2b41243342c..44d9af72d225 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c | |||
@@ -187,14 +187,16 @@ static void __init pSeries_setup_arch(void) | |||
187 | { | 187 | { |
188 | /* Fixup ppc_md depending on the type of interrupt controller */ | 188 | /* Fixup ppc_md depending on the type of interrupt controller */ |
189 | if (ppc64_interrupt_controller == IC_OPEN_PIC) { | 189 | if (ppc64_interrupt_controller == IC_OPEN_PIC) { |
190 | ppc_md.init_IRQ = pSeries_init_mpic; | 190 | ppc_md.init_IRQ = pSeries_init_mpic; |
191 | ppc_md.get_irq = mpic_get_irq; | 191 | ppc_md.get_irq = mpic_get_irq; |
192 | ppc_md.cpu_irq_down = mpic_teardown_this_cpu; | ||
192 | /* Allocate the mpic now, so that find_and_init_phbs() can | 193 | /* Allocate the mpic now, so that find_and_init_phbs() can |
193 | * fill the ISUs */ | 194 | * fill the ISUs */ |
194 | pSeries_setup_mpic(); | 195 | pSeries_setup_mpic(); |
195 | } else { | 196 | } else { |
196 | ppc_md.init_IRQ = xics_init_IRQ; | 197 | ppc_md.init_IRQ = xics_init_IRQ; |
197 | ppc_md.get_irq = xics_get_irq; | 198 | ppc_md.get_irq = xics_get_irq; |
199 | ppc_md.cpu_irq_down = xics_teardown_cpu; | ||
198 | } | 200 | } |
199 | 201 | ||
200 | #ifdef CONFIG_SMP | 202 | #ifdef CONFIG_SMP |
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 30154140f7e2..62c55a123560 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c | |||
@@ -93,10 +93,13 @@ static int query_cpu_stopped(unsigned int pcpu) | |||
93 | 93 | ||
94 | int pSeries_cpu_disable(void) | 94 | int pSeries_cpu_disable(void) |
95 | { | 95 | { |
96 | int cpu = smp_processor_id(); | ||
97 | |||
98 | cpu_clear(cpu, cpu_online_map); | ||
96 | systemcfg->processorCount--; | 99 | systemcfg->processorCount--; |
97 | 100 | ||
98 | /*fix boot_cpuid here*/ | 101 | /*fix boot_cpuid here*/ |
99 | if (smp_processor_id() == boot_cpuid) | 102 | if (cpu == boot_cpuid) |
100 | boot_cpuid = any_online_cpu(cpu_online_map); | 103 | boot_cpuid = any_online_cpu(cpu_online_map); |
101 | 104 | ||
102 | /* FIXME: abstract this to not be platform specific later on */ | 105 | /* FIXME: abstract this to not be platform specific later on */ |
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 0a47a5ef428d..d5e4866e9ac2 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c | |||
@@ -677,11 +677,16 @@ void __init setup_system(void) | |||
677 | DBG(" <- setup_system()\n"); | 677 | DBG(" <- setup_system()\n"); |
678 | } | 678 | } |
679 | 679 | ||
680 | 680 | /* also used by kexec */ | |
681 | void machine_restart(char *cmd) | 681 | void machine_shutdown(void) |
682 | { | 682 | { |
683 | if (ppc_md.nvram_sync) | 683 | if (ppc_md.nvram_sync) |
684 | ppc_md.nvram_sync(); | 684 | ppc_md.nvram_sync(); |
685 | } | ||
686 | |||
687 | void machine_restart(char *cmd) | ||
688 | { | ||
689 | machine_shutdown(); | ||
685 | ppc_md.restart(cmd); | 690 | ppc_md.restart(cmd); |
686 | #ifdef CONFIG_SMP | 691 | #ifdef CONFIG_SMP |
687 | smp_send_stop(); | 692 | smp_send_stop(); |
@@ -690,13 +695,11 @@ void machine_restart(char *cmd) | |||
690 | local_irq_disable(); | 695 | local_irq_disable(); |
691 | while (1) ; | 696 | while (1) ; |
692 | } | 697 | } |
693 | |||
694 | EXPORT_SYMBOL(machine_restart); | 698 | EXPORT_SYMBOL(machine_restart); |
695 | 699 | ||
696 | void machine_power_off(void) | 700 | void machine_power_off(void) |
697 | { | 701 | { |
698 | if (ppc_md.nvram_sync) | 702 | machine_shutdown(); |
699 | ppc_md.nvram_sync(); | ||
700 | ppc_md.power_off(); | 703 | ppc_md.power_off(); |
701 | #ifdef CONFIG_SMP | 704 | #ifdef CONFIG_SMP |
702 | smp_send_stop(); | 705 | smp_send_stop(); |
@@ -705,13 +708,11 @@ void machine_power_off(void) | |||
705 | local_irq_disable(); | 708 | local_irq_disable(); |
706 | while (1) ; | 709 | while (1) ; |
707 | } | 710 | } |
708 | |||
709 | EXPORT_SYMBOL(machine_power_off); | 711 | EXPORT_SYMBOL(machine_power_off); |
710 | 712 | ||
711 | void machine_halt(void) | 713 | void machine_halt(void) |
712 | { | 714 | { |
713 | if (ppc_md.nvram_sync) | 715 | machine_shutdown(); |
714 | ppc_md.nvram_sync(); | ||
715 | ppc_md.halt(); | 716 | ppc_md.halt(); |
716 | #ifdef CONFIG_SMP | 717 | #ifdef CONFIG_SMP |
717 | smp_send_stop(); | 718 | smp_send_stop(); |
@@ -720,7 +721,6 @@ void machine_halt(void) | |||
720 | local_irq_disable(); | 721 | local_irq_disable(); |
721 | while (1) ; | 722 | while (1) ; |
722 | } | 723 | } |
723 | |||
724 | EXPORT_SYMBOL(machine_halt); | 724 | EXPORT_SYMBOL(machine_halt); |
725 | 725 | ||
726 | static int ppc64_panic_event(struct notifier_block *this, | 726 | static int ppc64_panic_event(struct notifier_block *this, |
diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index 879f39b90a33..677c4450984a 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c | |||
@@ -647,6 +647,31 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) | |||
647 | } | 647 | } |
648 | } | 648 | } |
649 | 649 | ||
650 | void xics_teardown_cpu(void) | ||
651 | { | ||
652 | int cpu = smp_processor_id(); | ||
653 | int status; | ||
654 | |||
655 | ops->cppr_info(cpu, 0x00); | ||
656 | iosync(); | ||
657 | |||
658 | /* | ||
659 | * we need to EOI the IPI if we got here from kexec down IPI | ||
660 | * | ||
661 | * xics doesn't care if we duplicate an EOI as long as we | ||
662 | * don't EOI and raise priority. | ||
663 | * | ||
664 | * probably need to check all the other interrupts too | ||
665 | * should we be flagging idle loop instead? | ||
666 | * or creating some task to be scheduled? | ||
667 | */ | ||
668 | ops->xirr_info_set(cpu, XICS_IPI); | ||
669 | |||
670 | status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, | ||
671 | (1UL << interrupt_server_size) - 1 - default_distrib_server, 0); | ||
672 | WARN_ON(status != 0); | ||
673 | } | ||
674 | |||
650 | #ifdef CONFIG_HOTPLUG_CPU | 675 | #ifdef CONFIG_HOTPLUG_CPU |
651 | 676 | ||
652 | /* Interrupts are disabled. */ | 677 | /* Interrupts are disabled. */ |
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 52b6b9305341..4fec05817d66 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c | |||
@@ -304,6 +304,50 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, | |||
304 | local_irq_restore(flags); | 304 | local_irq_restore(flags); |
305 | } | 305 | } |
306 | 306 | ||
307 | /* | ||
308 | * clear all mappings on kexec. All cpus are in real mode (or they will | ||
309 | * be when they isi), and we are the only one left. We rely on our kernel | ||
310 | * mapping being 0xC0's and the hardware ignoring those two real bits. | ||
311 | * | ||
312 | * TODO: add batching support when enabled. remember, no dynamic memory here, | ||
313 | * athough there is the control page available... | ||
314 | */ | ||
315 | static void native_hpte_clear(void) | ||
316 | { | ||
317 | unsigned long slot, slots, flags; | ||
318 | HPTE *hptep = htab_address; | ||
319 | Hpte_dword0 dw0; | ||
320 | unsigned long pteg_count; | ||
321 | |||
322 | pteg_count = htab_hash_mask + 1; | ||
323 | |||
324 | local_irq_save(flags); | ||
325 | |||
326 | /* we take the tlbie lock and hold it. Some hardware will | ||
327 | * deadlock if we try to tlbie from two processors at once. | ||
328 | */ | ||
329 | spin_lock(&native_tlbie_lock); | ||
330 | |||
331 | slots = pteg_count * HPTES_PER_GROUP; | ||
332 | |||
333 | for (slot = 0; slot < slots; slot++, hptep++) { | ||
334 | /* | ||
335 | * we could lock the pte here, but we are the only cpu | ||
336 | * running, right? and for crash dump, we probably | ||
337 | * don't want to wait for a maybe bad cpu. | ||
338 | */ | ||
339 | dw0 = hptep->dw0.dw0; | ||
340 | |||
341 | if (dw0.v) { | ||
342 | hptep->dw0.dword0 = 0; | ||
343 | tlbie(slot2va(dw0.avpn, dw0.l, dw0.h, slot), dw0.l); | ||
344 | } | ||
345 | } | ||
346 | |||
347 | spin_unlock(&native_tlbie_lock); | ||
348 | local_irq_restore(flags); | ||
349 | } | ||
350 | |||
307 | static void native_flush_hash_range(unsigned long context, | 351 | static void native_flush_hash_range(unsigned long context, |
308 | unsigned long number, int local) | 352 | unsigned long number, int local) |
309 | { | 353 | { |
@@ -415,7 +459,8 @@ void hpte_init_native(void) | |||
415 | ppc_md.hpte_updatepp = native_hpte_updatepp; | 459 | ppc_md.hpte_updatepp = native_hpte_updatepp; |
416 | ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; | 460 | ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; |
417 | ppc_md.hpte_insert = native_hpte_insert; | 461 | ppc_md.hpte_insert = native_hpte_insert; |
418 | ppc_md.hpte_remove = native_hpte_remove; | 462 | ppc_md.hpte_remove = native_hpte_remove; |
463 | ppc_md.hpte_clear_all = native_hpte_clear; | ||
419 | if (tlb_batching_enabled()) | 464 | if (tlb_batching_enabled()) |
420 | ppc_md.flush_hash_range = native_flush_hash_range; | 465 | ppc_md.flush_hash_range = native_flush_hash_range; |
421 | htab_finish_init(); | 466 | htab_finish_init(); |