diff options
Diffstat (limited to 'arch/powerpc/kernel/machine_kexec_64.c')
-rw-r--r-- | arch/powerpc/kernel/machine_kexec_64.c | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c new file mode 100644 index 000000000000..97c51e452be7 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_64.c | |||
@@ -0,0 +1,358 @@ | |||
1 | /* | ||
2 | * machine_kexec.c - handle transition of Linux booting another kernel | ||
3 | * | ||
4 | * Copyright (C) 2004-2005, IBM Corp. | ||
5 | * | ||
6 | * Created by: Milton D Miller II | ||
7 | * | ||
8 | * This source code is licensed under the GNU General Public License, | ||
9 | * Version 2. See the file COPYING for more details. | ||
10 | */ | ||
11 | |||
12 | |||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/kexec.h> | ||
15 | #include <linux/smp.h> | ||
16 | #include <linux/thread_info.h> | ||
17 | #include <linux/errno.h> | ||
18 | |||
19 | #include <asm/page.h> | ||
20 | #include <asm/current.h> | ||
21 | #include <asm/machdep.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/paca.h> | ||
24 | #include <asm/mmu.h> | ||
25 | #include <asm/sections.h> /* _end */ | ||
26 | #include <asm/prom.h> | ||
27 | #include <asm/smp.h> | ||
28 | |||
29 | #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ | ||
30 | |||
31 | /* Have this around till we move it into crash specific file */ | ||
32 | note_buf_t crash_notes[NR_CPUS]; | ||
33 | |||
34 | /* Dummy for now. Not sure if we need to have a crash shutdown in here | ||
35 | * and if what it will achieve. Letting it be now to compile the code | ||
36 | * in generic kexec environment | ||
37 | */ | ||
38 | void machine_crash_shutdown(struct pt_regs *regs) | ||
39 | { | ||
40 | /* do nothing right now */ | ||
41 | /* smp_relase_cpus() if we want smp on panic kernel */ | ||
42 | /* cpu_irq_down to isolate us until we are ready */ | ||
43 | } | ||
44 | |||
45 | int machine_kexec_prepare(struct kimage *image) | ||
46 | { | ||
47 | int i; | ||
48 | unsigned long begin, end; /* limits of segment */ | ||
49 | unsigned long low, high; /* limits of blocked memory range */ | ||
50 | struct device_node *node; | ||
51 | unsigned long *basep; | ||
52 | unsigned int *sizep; | ||
53 | |||
54 | if (!ppc_md.hpte_clear_all) | ||
55 | return -ENOENT; | ||
56 | |||
57 | /* | ||
58 | * Since we use the kernel fault handlers and paging code to | ||
59 | * handle the virtual mode, we must make sure no destination | ||
60 | * overlaps kernel static data or bss. | ||
61 | */ | ||
62 | for (i = 0; i < image->nr_segments; i++) | ||
63 | if (image->segment[i].mem < __pa(_end)) | ||
64 | return -ETXTBSY; | ||
65 | |||
66 | /* | ||
67 | * For non-LPAR, we absolutely can not overwrite the mmu hash | ||
68 | * table, since we are still using the bolted entries in it to | ||
69 | * do the copy. Check that here. | ||
70 | * | ||
71 | * It is safe if the end is below the start of the blocked | ||
72 | * region (end <= low), or if the beginning is after the | ||
73 | * end of the blocked region (begin >= high). Use the | ||
74 | * boolean identity !(a || b) === (!a && !b). | ||
75 | */ | ||
76 | if (htab_address) { | ||
77 | low = __pa(htab_address); | ||
78 | high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; | ||
79 | |||
80 | for (i = 0; i < image->nr_segments; i++) { | ||
81 | begin = image->segment[i].mem; | ||
82 | end = begin + image->segment[i].memsz; | ||
83 | |||
84 | if ((begin < high) && (end > low)) | ||
85 | return -ETXTBSY; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | /* We also should not overwrite the tce tables */ | ||
90 | for (node = of_find_node_by_type(NULL, "pci"); node != NULL; | ||
91 | node = of_find_node_by_type(node, "pci")) { | ||
92 | basep = (unsigned long *)get_property(node, "linux,tce-base", | ||
93 | NULL); | ||
94 | sizep = (unsigned int *)get_property(node, "linux,tce-size", | ||
95 | NULL); | ||
96 | if (basep == NULL || sizep == NULL) | ||
97 | continue; | ||
98 | |||
99 | low = *basep; | ||
100 | high = low + (*sizep); | ||
101 | |||
102 | for (i = 0; i < image->nr_segments; i++) { | ||
103 | begin = image->segment[i].mem; | ||
104 | end = begin + image->segment[i].memsz; | ||
105 | |||
106 | if ((begin < high) && (end > low)) | ||
107 | return -ETXTBSY; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | void machine_kexec_cleanup(struct kimage *image) | ||
115 | { | ||
116 | /* we do nothing in prepare that needs to be undone */ | ||
117 | } | ||
118 | |||
119 | #define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) | ||
120 | |||
121 | static void copy_segments(unsigned long ind) | ||
122 | { | ||
123 | unsigned long entry; | ||
124 | unsigned long *ptr; | ||
125 | void *dest; | ||
126 | void *addr; | ||
127 | |||
128 | /* | ||
129 | * We rely on kexec_load to create a lists that properly | ||
130 | * initializes these pointers before they are used. | ||
131 | * We will still crash if the list is wrong, but at least | ||
132 | * the compiler will be quiet. | ||
133 | */ | ||
134 | ptr = NULL; | ||
135 | dest = NULL; | ||
136 | |||
137 | for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { | ||
138 | addr = __va(entry & PAGE_MASK); | ||
139 | |||
140 | switch (entry & IND_FLAGS) { | ||
141 | case IND_DESTINATION: | ||
142 | dest = addr; | ||
143 | break; | ||
144 | case IND_INDIRECTION: | ||
145 | ptr = addr; | ||
146 | break; | ||
147 | case IND_SOURCE: | ||
148 | copy_page(dest, addr); | ||
149 | dest += PAGE_SIZE; | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | |||
154 | void kexec_copy_flush(struct kimage *image) | ||
155 | { | ||
156 | long i, nr_segments = image->nr_segments; | ||
157 | struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; | ||
158 | |||
159 | /* save the ranges on the stack to efficiently flush the icache */ | ||
160 | memcpy(ranges, image->segment, sizeof(ranges)); | ||
161 | |||
162 | /* | ||
163 | * After this call we may not use anything allocated in dynamic | ||
164 | * memory, including *image. | ||
165 | * | ||
166 | * Only globals and the stack are allowed. | ||
167 | */ | ||
168 | copy_segments(image->head); | ||
169 | |||
170 | /* | ||
171 | * we need to clear the icache for all dest pages sometime, | ||
172 | * including ones that were in place on the original copy | ||
173 | */ | ||
174 | for (i = 0; i < nr_segments; i++) | ||
175 | flush_icache_range(ranges[i].mem + KERNELBASE, | ||
176 | ranges[i].mem + KERNELBASE + | ||
177 | ranges[i].memsz); | ||
178 | } | ||
179 | |||
180 | #ifdef CONFIG_SMP | ||
181 | |||
182 | /* FIXME: we should schedule this function to be called on all cpus based | ||
183 | * on calling the interrupts, but we would like to call it off irq level | ||
184 | * so that the interrupt controller is clean. | ||
185 | */ | ||
186 | void kexec_smp_down(void *arg) | ||
187 | { | ||
188 | if (ppc_md.kexec_cpu_down) | ||
189 | ppc_md.kexec_cpu_down(0, 1); | ||
190 | |||
191 | local_irq_disable(); | ||
192 | kexec_smp_wait(); | ||
193 | /* NOTREACHED */ | ||
194 | } | ||
195 | |||
196 | static void kexec_prepare_cpus(void) | ||
197 | { | ||
198 | int my_cpu, i, notified=-1; | ||
199 | |||
200 | smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); | ||
201 | my_cpu = get_cpu(); | ||
202 | |||
203 | /* check the others cpus are now down (via paca hw cpu id == -1) */ | ||
204 | for (i=0; i < NR_CPUS; i++) { | ||
205 | if (i == my_cpu) | ||
206 | continue; | ||
207 | |||
208 | while (paca[i].hw_cpu_id != -1) { | ||
209 | barrier(); | ||
210 | if (!cpu_possible(i)) { | ||
211 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
212 | " possible, ignoring\n", | ||
213 | i, paca[i].hw_cpu_id); | ||
214 | break; | ||
215 | } | ||
216 | if (!cpu_online(i)) { | ||
217 | /* Fixme: this can be spinning in | ||
218 | * pSeries_secondary_wait with a paca | ||
219 | * waiting for it to go online. | ||
220 | */ | ||
221 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
222 | " online, ignoring\n", | ||
223 | i, paca[i].hw_cpu_id); | ||
224 | break; | ||
225 | } | ||
226 | if (i != notified) { | ||
227 | printk( "kexec: waiting for cpu %d (physical" | ||
228 | " %d) to go down\n", | ||
229 | i, paca[i].hw_cpu_id); | ||
230 | notified = i; | ||
231 | } | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* after we tell the others to go down */ | ||
236 | if (ppc_md.kexec_cpu_down) | ||
237 | ppc_md.kexec_cpu_down(0, 0); | ||
238 | |||
239 | put_cpu(); | ||
240 | |||
241 | local_irq_disable(); | ||
242 | } | ||
243 | |||
244 | #else /* ! SMP */ | ||
245 | |||
246 | static void kexec_prepare_cpus(void) | ||
247 | { | ||
248 | /* | ||
249 | * move the secondarys to us so that we can copy | ||
250 | * the new kernel 0-0x100 safely | ||
251 | * | ||
252 | * do this if kexec in setup.c ? | ||
253 | * | ||
254 | * We need to release the cpus if we are ever going from an | ||
255 | * UP to an SMP kernel. | ||
256 | */ | ||
257 | smp_release_cpus(); | ||
258 | if (ppc_md.kexec_cpu_down) | ||
259 | ppc_md.kexec_cpu_down(0, 0); | ||
260 | local_irq_disable(); | ||
261 | } | ||
262 | |||
263 | #endif /* SMP */ | ||
264 | |||
265 | /* | ||
266 | * kexec thread structure and stack. | ||
267 | * | ||
268 | * We need to make sure that this is 16384-byte aligned due to the | ||
269 | * way process stacks are handled. It also must be statically allocated | ||
270 | * or allocated as part of the kimage, because everything else may be | ||
271 | * overwritten when we copy the kexec image. We piggyback on the | ||
272 | * "init_task" linker section here to statically allocate a stack. | ||
273 | * | ||
274 | * We could use a smaller stack if we don't care about anything using | ||
275 | * current, but that audit has not been performed. | ||
276 | */ | ||
277 | union thread_union kexec_stack | ||
278 | __attribute__((__section__(".data.init_task"))) = { }; | ||
279 | |||
280 | /* Our assembly helper, in kexec_stub.S */ | ||
281 | extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, | ||
282 | void *image, void *control, | ||
283 | void (*clear_all)(void)) ATTRIB_NORET; | ||
284 | |||
285 | /* too late to fail here */ | ||
286 | void machine_kexec(struct kimage *image) | ||
287 | { | ||
288 | |||
289 | /* prepare control code if any */ | ||
290 | |||
291 | /* shutdown other cpus into our wait loop and quiesce interrupts */ | ||
292 | kexec_prepare_cpus(); | ||
293 | |||
294 | /* switch to a staticly allocated stack. Based on irq stack code. | ||
295 | * XXX: the task struct will likely be invalid once we do the copy! | ||
296 | */ | ||
297 | kexec_stack.thread_info.task = current_thread_info()->task; | ||
298 | kexec_stack.thread_info.flags = 0; | ||
299 | |||
300 | /* Some things are best done in assembly. Finding globals with | ||
301 | * a toc is easier in C, so pass in what we can. | ||
302 | */ | ||
303 | kexec_sequence(&kexec_stack, image->start, image, | ||
304 | page_address(image->control_code_page), | ||
305 | ppc_md.hpte_clear_all); | ||
306 | /* NOTREACHED */ | ||
307 | } | ||
308 | |||
309 | /* Values we need to export to the second kernel via the device tree. */ | ||
310 | static unsigned long htab_base, htab_size, kernel_end; | ||
311 | |||
312 | static struct property htab_base_prop = { | ||
313 | .name = "linux,htab-base", | ||
314 | .length = sizeof(unsigned long), | ||
315 | .value = (unsigned char *)&htab_base, | ||
316 | }; | ||
317 | |||
318 | static struct property htab_size_prop = { | ||
319 | .name = "linux,htab-size", | ||
320 | .length = sizeof(unsigned long), | ||
321 | .value = (unsigned char *)&htab_size, | ||
322 | }; | ||
323 | |||
324 | static struct property kernel_end_prop = { | ||
325 | .name = "linux,kernel-end", | ||
326 | .length = sizeof(unsigned long), | ||
327 | .value = (unsigned char *)&kernel_end, | ||
328 | }; | ||
329 | |||
330 | static void __init export_htab_values(void) | ||
331 | { | ||
332 | struct device_node *node; | ||
333 | |||
334 | node = of_find_node_by_path("/chosen"); | ||
335 | if (!node) | ||
336 | return; | ||
337 | |||
338 | kernel_end = __pa(_end); | ||
339 | prom_add_property(node, &kernel_end_prop); | ||
340 | |||
341 | /* On machines with no htab htab_address is NULL */ | ||
342 | if (NULL == htab_address) | ||
343 | goto out; | ||
344 | |||
345 | htab_base = __pa(htab_address); | ||
346 | prom_add_property(node, &htab_base_prop); | ||
347 | |||
348 | htab_size = 1UL << ppc64_pft_size; | ||
349 | prom_add_property(node, &htab_size_prop); | ||
350 | |||
351 | out: | ||
352 | of_node_put(node); | ||
353 | } | ||
354 | |||
355 | void __init kexec_setup(void) | ||
356 | { | ||
357 | export_htab_values(); | ||
358 | } | ||