diff options
Diffstat (limited to 'arch/ppc64/kernel/machine_kexec.c')
-rw-r--r-- | arch/ppc64/kernel/machine_kexec.c | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c new file mode 100644 index 000000000000..217965d60a45 --- /dev/null +++ b/arch/ppc64/kernel/machine_kexec.c | |||
@@ -0,0 +1,301 @@ | |||
1 | /* | ||
2 | * machine_kexec.c - handle transition of Linux booting another kernel | ||
3 | * | ||
4 | * Copyright (C) 2004-2005, IBM Corp. | ||
5 | * | ||
6 | * Created by: Milton D Miller II | ||
7 | * | ||
8 | * This source code is licensed under the GNU General Public License, | ||
9 | * Version 2. See the file COPYING for more details. | ||
10 | */ | ||
11 | |||
12 | |||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/kexec.h> | ||
15 | #include <linux/smp.h> | ||
16 | #include <linux/thread_info.h> | ||
17 | #include <linux/errno.h> | ||
18 | |||
19 | #include <asm/page.h> | ||
20 | #include <asm/current.h> | ||
21 | #include <asm/machdep.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/paca.h> | ||
24 | #include <asm/mmu.h> | ||
25 | #include <asm/sections.h> /* _end */ | ||
26 | #include <asm/prom.h> | ||
27 | |||
28 | #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ | ||
29 | |||
30 | /* Have this around till we move it into crash specific file */ | ||
31 | note_buf_t crash_notes[NR_CPUS]; | ||
32 | |||
33 | /* Dummy for now. Not sure if we need to have a crash shutdown in here | ||
34 | * and if what it will achieve. Letting it be now to compile the code | ||
35 | * in generic kexec environment | ||
36 | */ | ||
37 | void machine_crash_shutdown(void) | ||
38 | { | ||
39 | /* do nothing right now */ | ||
40 | /* smp_relase_cpus() if we want smp on panic kernel */ | ||
41 | /* cpu_irq_down to isolate us until we are ready */ | ||
42 | } | ||
43 | |||
44 | int machine_kexec_prepare(struct kimage *image) | ||
45 | { | ||
46 | int i; | ||
47 | unsigned long begin, end; /* limits of segment */ | ||
48 | unsigned long low, high; /* limits of blocked memory range */ | ||
49 | struct device_node *node; | ||
50 | unsigned long *basep; | ||
51 | unsigned int *sizep; | ||
52 | |||
53 | if (!ppc_md.hpte_clear_all) | ||
54 | return -ENOENT; | ||
55 | |||
56 | /* | ||
57 | * Since we use the kernel fault handlers and paging code to | ||
58 | * handle the virtual mode, we must make sure no destination | ||
59 | * overlaps kernel static data or bss. | ||
60 | */ | ||
61 | for(i = 0; i < image->nr_segments; i++) | ||
62 | if (image->segment[i].mem < __pa(_end)) | ||
63 | return -ETXTBSY; | ||
64 | |||
65 | /* | ||
66 | * For non-LPAR, we absolutely can not overwrite the mmu hash | ||
67 | * table, since we are still using the bolted entries in it to | ||
68 | * do the copy. Check that here. | ||
69 | * | ||
70 | * It is safe if the end is below the start of the blocked | ||
71 | * region (end <= low), or if the beginning is after the | ||
72 | * end of the blocked region (begin >= high). Use the | ||
73 | * boolean identity !(a || b) === (!a && !b). | ||
74 | */ | ||
75 | if (htab_address) { | ||
76 | low = __pa(htab_address); | ||
77 | high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; | ||
78 | |||
79 | for(i = 0; i < image->nr_segments; i++) { | ||
80 | begin = image->segment[i].mem; | ||
81 | end = begin + image->segment[i].memsz; | ||
82 | |||
83 | if ((begin < high) && (end > low)) | ||
84 | return -ETXTBSY; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* We also should not overwrite the tce tables */ | ||
89 | for (node = of_find_node_by_type(NULL, "pci"); node != NULL; | ||
90 | node = of_find_node_by_type(node, "pci")) { | ||
91 | basep = (unsigned long *)get_property(node, "linux,tce-base", | ||
92 | NULL); | ||
93 | sizep = (unsigned int *)get_property(node, "linux,tce-size", | ||
94 | NULL); | ||
95 | if (basep == NULL || sizep == NULL) | ||
96 | continue; | ||
97 | |||
98 | low = *basep; | ||
99 | high = low + (*sizep); | ||
100 | |||
101 | for(i = 0; i < image->nr_segments; i++) { | ||
102 | begin = image->segment[i].mem; | ||
103 | end = begin + image->segment[i].memsz; | ||
104 | |||
105 | if ((begin < high) && (end > low)) | ||
106 | return -ETXTBSY; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | void machine_kexec_cleanup(struct kimage *image) | ||
114 | { | ||
115 | /* we do nothing in prepare that needs to be undone */ | ||
116 | } | ||
117 | |||
118 | #define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) | ||
119 | |||
120 | static void copy_segments(unsigned long ind) | ||
121 | { | ||
122 | unsigned long entry; | ||
123 | unsigned long *ptr; | ||
124 | void *dest; | ||
125 | void *addr; | ||
126 | |||
127 | /* | ||
128 | * We rely on kexec_load to create a lists that properly | ||
129 | * initializes these pointers before they are used. | ||
130 | * We will still crash if the list is wrong, but at least | ||
131 | * the compiler will be quiet. | ||
132 | */ | ||
133 | ptr = NULL; | ||
134 | dest = NULL; | ||
135 | |||
136 | for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { | ||
137 | addr = __va(entry & PAGE_MASK); | ||
138 | |||
139 | switch (entry & IND_FLAGS) { | ||
140 | case IND_DESTINATION: | ||
141 | dest = addr; | ||
142 | break; | ||
143 | case IND_INDIRECTION: | ||
144 | ptr = addr; | ||
145 | break; | ||
146 | case IND_SOURCE: | ||
147 | copy_page(dest, addr); | ||
148 | dest += PAGE_SIZE; | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | void kexec_copy_flush(struct kimage *image) | ||
154 | { | ||
155 | long i, nr_segments = image->nr_segments; | ||
156 | struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; | ||
157 | |||
158 | /* save the ranges on the stack to efficiently flush the icache */ | ||
159 | memcpy(ranges, image->segment, sizeof(ranges)); | ||
160 | |||
161 | /* | ||
162 | * After this call we may not use anything allocated in dynamic | ||
163 | * memory, including *image. | ||
164 | * | ||
165 | * Only globals and the stack are allowed. | ||
166 | */ | ||
167 | copy_segments(image->head); | ||
168 | |||
169 | /* | ||
170 | * we need to clear the icache for all dest pages sometime, | ||
171 | * including ones that were in place on the original copy | ||
172 | */ | ||
173 | for (i = 0; i < nr_segments; i++) | ||
174 | flush_icache_range(ranges[i].mem + KERNELBASE, | ||
175 | ranges[i].mem + KERNELBASE + | ||
176 | ranges[i].memsz); | ||
177 | } | ||
178 | |||
179 | #ifdef CONFIG_SMP | ||
180 | |||
181 | /* FIXME: we should schedule this function to be called on all cpus based | ||
182 | * on calling the interrupts, but we would like to call it off irq level | ||
183 | * so that the interrupt controller is clean. | ||
184 | */ | ||
185 | void kexec_smp_down(void *arg) | ||
186 | { | ||
187 | if (ppc_md.cpu_irq_down) | ||
188 | ppc_md.cpu_irq_down(); | ||
189 | |||
190 | local_irq_disable(); | ||
191 | kexec_smp_wait(); | ||
192 | /* NOTREACHED */ | ||
193 | } | ||
194 | |||
195 | static void kexec_prepare_cpus(void) | ||
196 | { | ||
197 | int my_cpu, i, notified=-1; | ||
198 | |||
199 | smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); | ||
200 | my_cpu = get_cpu(); | ||
201 | |||
202 | /* check the others cpus are now down (via paca hw cpu id == -1) */ | ||
203 | for (i=0; i < NR_CPUS; i++) { | ||
204 | if (i == my_cpu) | ||
205 | continue; | ||
206 | |||
207 | while (paca[i].hw_cpu_id != -1) { | ||
208 | if (!cpu_possible(i)) { | ||
209 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
210 | " possible, ignoring\n", | ||
211 | i, paca[i].hw_cpu_id); | ||
212 | break; | ||
213 | } | ||
214 | if (!cpu_online(i)) { | ||
215 | /* Fixme: this can be spinning in | ||
216 | * pSeries_secondary_wait with a paca | ||
217 | * waiting for it to go online. | ||
218 | */ | ||
219 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
220 | " online, ignoring\n", | ||
221 | i, paca[i].hw_cpu_id); | ||
222 | break; | ||
223 | } | ||
224 | if (i != notified) { | ||
225 | printk( "kexec: waiting for cpu %d (physical" | ||
226 | " %d) to go down\n", | ||
227 | i, paca[i].hw_cpu_id); | ||
228 | notified = i; | ||
229 | } | ||
230 | } | ||
231 | } | ||
232 | |||
233 | /* after we tell the others to go down */ | ||
234 | if (ppc_md.cpu_irq_down) | ||
235 | ppc_md.cpu_irq_down(); | ||
236 | |||
237 | put_cpu(); | ||
238 | |||
239 | local_irq_disable(); | ||
240 | } | ||
241 | |||
242 | #else /* ! SMP */ | ||
243 | |||
244 | static void kexec_prepare_cpus(void) | ||
245 | { | ||
246 | /* | ||
247 | * move the secondarys to us so that we can copy | ||
248 | * the new kernel 0-0x100 safely | ||
249 | * | ||
250 | * do this if kexec in setup.c ? | ||
251 | */ | ||
252 | smp_relase_cpus(); | ||
253 | if (ppc_md.cpu_irq_down) | ||
254 | ppc_md.cpu_irq_down(); | ||
255 | local_irq_disable(); | ||
256 | } | ||
257 | |||
258 | #endif /* SMP */ | ||
259 | |||
260 | /* | ||
261 | * kexec thread structure and stack. | ||
262 | * | ||
263 | * We need to make sure that this is 16384-byte aligned due to the | ||
264 | * way process stacks are handled. It also must be statically allocated | ||
265 | * or allocated as part of the kimage, because everything else may be | ||
266 | * overwritten when we copy the kexec image. We piggyback on the | ||
267 | * "init_task" linker section here to statically allocate a stack. | ||
268 | * | ||
269 | * We could use a smaller stack if we don't care about anything using | ||
270 | * current, but that audit has not been performed. | ||
271 | */ | ||
272 | union thread_union kexec_stack | ||
273 | __attribute__((__section__(".data.init_task"))) = { }; | ||
274 | |||
275 | /* Our assembly helper, in kexec_stub.S */ | ||
276 | extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, | ||
277 | void *image, void *control, void (*clear_all)(void)) ATTRIB_NORET; | ||
278 | |||
279 | /* too late to fail here */ | ||
280 | void machine_kexec(struct kimage *image) | ||
281 | { | ||
282 | |||
283 | /* prepare control code if any */ | ||
284 | |||
285 | /* shutdown other cpus into our wait loop and quiesce interrupts */ | ||
286 | kexec_prepare_cpus(); | ||
287 | |||
288 | /* switch to a staticly allocated stack. Based on irq stack code. | ||
289 | * XXX: the task struct will likely be invalid once we do the copy! | ||
290 | */ | ||
291 | kexec_stack.thread_info.task = current_thread_info()->task; | ||
292 | kexec_stack.thread_info.flags = 0; | ||
293 | |||
294 | /* Some things are best done in assembly. Finding globals with | ||
295 | * a toc is easier in C, so pass in what we can. | ||
296 | */ | ||
297 | kexec_sequence(&kexec_stack, image->start, image, | ||
298 | page_address(image->control_code_page), | ||
299 | ppc_md.hpte_clear_all); | ||
300 | /* NOTREACHED */ | ||
301 | } | ||