aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorNicolas Pitre <nicolas.pitre@linaro.org>2011-02-21 01:06:45 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2011-02-23 12:24:22 -0500
commit6d7d0ae51574943bf571d269da3243257a2d15db (patch)
tree4899352ea39e5e52941ba41eceb4df3de4734b68 /arch
parent425fc47adb5bb69f76285be77a09a3341a30799e (diff)
ARM: 6750/1: improvements to compressed/head.S
In the case of a conflict between the memory used by the compressed kernel with its decompressor code and the memory used for the decompressed kernel, we currently store the later after the former and relocate it afterwards. This would be more efficient to do this the other way around i.e. relocate the compressed data up front instead, resulting in a smaller copy. That also has the advantage of making the code smaller and more straight forward. Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/boot/compressed/head.S239
1 files changed, 110 insertions, 129 deletions
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 920f4dbd4883..39859216af00 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -174,9 +174,7 @@ not_angel:
174 */ 174 */
175 175
176 .text 176 .text
177 adr r0, LC0 177
178 ldmia r0, {r1, r2, r3, r5, r6, r11, ip}
179 ldr sp, [r0, #28]
180#ifdef CONFIG_AUTO_ZRELADDR 178#ifdef CONFIG_AUTO_ZRELADDR
181 @ determine final kernel image address 179 @ determine final kernel image address
182 mov r4, pc 180 mov r4, pc
@@ -185,35 +183,108 @@ not_angel:
185#else 183#else
186 ldr r4, =zreladdr 184 ldr r4, =zreladdr
187#endif 185#endif
188 subs r0, r0, r1 @ calculate the delta offset
189 186
190 @ if delta is zero, we are 187 bl cache_on
191 beq not_relocated @ running at the address we 188
192 @ were linked at. 189restart: adr r0, LC0
190 ldmia r0, {r1, r2, r3, r5, r6, r9, r11, r12}
191 ldr sp, [r0, #32]
192
193 /*
194 * We might be running at a different address. We need
195 * to fix up various pointers.
196 */
197 sub r0, r0, r1 @ calculate the delta offset
198 add r5, r5, r0 @ _start
199 add r6, r6, r0 @ _edata
193 200
201#ifndef CONFIG_ZBOOT_ROM
202 /* malloc space is above the relocated stack (64k max) */
203 add sp, sp, r0
204 add r10, sp, #0x10000
205#else
194 /* 206 /*
195 * We're running at a different address. We need to fix 207 * With ZBOOT_ROM the bss/stack is non relocatable,
196 * up various pointers: 208 * but someone could still run this code from RAM,
197 * r5 - zImage base address (_start) 209 * in which case our reference is _edata.
198 * r6 - size of decompressed image
199 * r11 - GOT start
200 * ip - GOT end
201 */ 210 */
202 add r5, r5, r0 211 mov r10, r6
212#endif
213
214/*
215 * Check to see if we will overwrite ourselves.
216 * r4 = final kernel address
217 * r5 = start of this image
218 * r9 = size of decompressed image
219 * r10 = end of this image, including bss/stack/malloc space if non XIP
220 * We basically want:
221 * r4 >= r10 -> OK
222 * r4 + image length <= r5 -> OK
223 */
224 cmp r4, r10
225 bhs wont_overwrite
226 add r10, r4, r9
227 cmp r10, r5
228 bls wont_overwrite
229
230/*
231 * Relocate ourselves past the end of the decompressed kernel.
232 * r5 = start of this image
233 * r6 = _edata
234 * r10 = end of the decompressed kernel
235 * Because we always copy ahead, we need to do it from the end and go
236 * backward in case the source and destination overlap.
237 */
238 /* Round up to next 256-byte boundary. */
239 add r10, r10, #256
240 bic r10, r10, #255
241
242 sub r9, r6, r5 @ size to copy
243 add r9, r9, #31 @ rounded up to a multiple
244 bic r9, r9, #31 @ ... of 32 bytes
245 add r6, r9, r5
246 add r9, r9, r10
247
2481: ldmdb r6!, {r0 - r3, r10 - r12, lr}
249 cmp r6, r5
250 stmdb r9!, {r0 - r3, r10 - r12, lr}
251 bhi 1b
252
253 /* Preserve offset to relocated code. */
254 sub r6, r9, r6
255
256 bl cache_clean_flush
257
258 adr r0, BSYM(restart)
259 add r0, r0, r6
260 mov pc, r0
261
262wont_overwrite:
263/*
264 * If delta is zero, we are running at the address we were linked at.
265 * r0 = delta
266 * r2 = BSS start
267 * r3 = BSS end
268 * r4 = kernel execution address
269 * r7 = architecture ID
270 * r8 = atags pointer
271 * r11 = GOT start
272 * r12 = GOT end
273 * sp = stack pointer
274 */
275 teq r0, #0
276 beq not_relocated
203 add r11, r11, r0 277 add r11, r11, r0
204 add ip, ip, r0 278 add r12, r12, r0
205 279
206#ifndef CONFIG_ZBOOT_ROM 280#ifndef CONFIG_ZBOOT_ROM
207 /* 281 /*
208 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n, 282 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
209 * we need to fix up pointers into the BSS region. 283 * we need to fix up pointers into the BSS region.
210 * r2 - BSS start 284 * Note that the stack pointer has already been fixed up.
211 * r3 - BSS end
212 * sp - stack pointer
213 */ 285 */
214 add r2, r2, r0 286 add r2, r2, r0
215 add r3, r3, r0 287 add r3, r3, r0
216 add sp, sp, r0
217 288
218 /* 289 /*
219 * Relocate all entries in the GOT table. 290 * Relocate all entries in the GOT table.
@@ -221,7 +292,7 @@ not_angel:
2211: ldr r1, [r11, #0] @ relocate entries in the GOT 2921: ldr r1, [r11, #0] @ relocate entries in the GOT
222 add r1, r1, r0 @ table. This fixes up the 293 add r1, r1, r0 @ table. This fixes up the
223 str r1, [r11], #4 @ C references. 294 str r1, [r11], #4 @ C references.
224 cmp r11, ip 295 cmp r11, r12
225 blo 1b 296 blo 1b
226#else 297#else
227 298
@@ -234,7 +305,7 @@ not_angel:
234 cmphs r3, r1 @ _end < entry 305 cmphs r3, r1 @ _end < entry
235 addlo r1, r1, r0 @ table. This fixes up the 306 addlo r1, r1, r0 @ table. This fixes up the
236 str r1, [r11], #4 @ C references. 307 str r1, [r11], #4 @ C references.
237 cmp r11, ip 308 cmp r11, r12
238 blo 1b 309 blo 1b
239#endif 310#endif
240 311
@@ -246,76 +317,24 @@ not_relocated: mov r0, #0
246 cmp r2, r3 317 cmp r2, r3
247 blo 1b 318 blo 1b
248 319
249 /*
250 * The C runtime environment should now be setup
251 * sufficiently. Turn the cache on, set up some
252 * pointers, and start decompressing.
253 */
254 bl cache_on
255
256 mov r1, sp @ malloc space above stack
257 add r2, sp, #0x10000 @ 64k max
258
259/* 320/*
260 * Check to see if we will overwrite ourselves. 321 * The C runtime environment should now be setup sufficiently.
261 * r4 = final kernel address 322 * Set up some pointers, and start decompressing.
262 * r5 = start of this image 323 * r4 = kernel execution address
263 * r6 = size of decompressed image 324 * r7 = architecture ID
264 * r2 = end of malloc space (and therefore this image) 325 * r8 = atags pointer
265 * We basically want:
266 * r4 >= r2 -> OK
267 * r4 + image length <= r5 -> OK
268 */ 326 */
269 cmp r4, r2 327 mov r0, r4
270 bhs wont_overwrite 328 mov r1, sp @ malloc space above stack
271 add r0, r4, r6 329 add r2, sp, #0x10000 @ 64k max
272 cmp r0, r5
273 bls wont_overwrite
274
275 mov r5, r2 @ decompress after malloc space
276 mov r0, r5
277 mov r3, r7 330 mov r3, r7
278 bl decompress_kernel 331 bl decompress_kernel
279
280 add r0, r0, #127 + 128 @ alignment + stack
281 bic r0, r0, #127 @ align the kernel length
282/*
283 * r0 = decompressed kernel length
284 * r1-r3 = unused
285 * r4 = kernel execution address
286 * r5 = decompressed kernel start
287 * r7 = architecture ID
288 * r8 = atags pointer
289 * r9-r12,r14 = corrupted
290 */
291 add r1, r5, r0 @ end of decompressed kernel
292 adr r2, reloc_start
293 ldr r3, LC1
294 add r3, r2, r3
2951: ldmia r2!, {r9 - r12, r14} @ copy relocation code
296 stmia r1!, {r9 - r12, r14}
297 ldmia r2!, {r9 - r12, r14}
298 stmia r1!, {r9 - r12, r14}
299 cmp r2, r3
300 blo 1b
301 mov sp, r1
302 add sp, sp, #128 @ relocate the stack
303
304 bl cache_clean_flush 332 bl cache_clean_flush
305 ARM( add pc, r5, r0 ) @ call relocation code 333 bl cache_off
306 THUMB( add r12, r5, r0 ) 334 mov r0, #0 @ must be zero
307 THUMB( mov pc, r12 ) @ call relocation code 335 mov r1, r7 @ restore architecture number
308 336 mov r2, r8 @ restore atags pointer
309/* 337 mov pc, r4 @ call kernel
310 * We're not in danger of overwriting ourselves. Do this the simple way.
311 *
312 * r4 = kernel execution address
313 * r7 = architecture ID
314 */
315wont_overwrite: mov r0, r4
316 mov r3, r7
317 bl decompress_kernel
318 b call_kernel
319 338
320 .align 2 339 .align 2
321 .type LC0, #object 340 .type LC0, #object
@@ -323,11 +342,11 @@ LC0: .word LC0 @ r1
323 .word __bss_start @ r2 342 .word __bss_start @ r2
324 .word _end @ r3 343 .word _end @ r3
325 .word _start @ r5 344 .word _start @ r5
326 .word _image_size @ r6 345 .word _edata @ r6
346 .word _image_size @ r9
327 .word _got_start @ r11 347 .word _got_start @ r11
328 .word _got_end @ ip 348 .word _got_end @ ip
329 .word user_stack_end @ sp 349 .word user_stack_end @ sp
330LC1: .word reloc_end - reloc_start
331 .size LC0, . - LC0 350 .size LC0, . - LC0
332 351
333#ifdef CONFIG_ARCH_RPC 352#ifdef CONFIG_ARCH_RPC
@@ -353,7 +372,7 @@ params: ldr r0, =0x10000100 @ params_phys for RPC
353 * On exit, 372 * On exit,
354 * r0, r1, r2, r3, r9, r10, r12 corrupted 373 * r0, r1, r2, r3, r9, r10, r12 corrupted
355 * This routine must preserve: 374 * This routine must preserve:
356 * r4, r5, r6, r7, r8 375 * r4, r7, r8
357 */ 376 */
358 .align 5 377 .align 5
359cache_on: mov r3, #8 @ cache_on function 378cache_on: mov r3, #8 @ cache_on function
@@ -551,43 +570,6 @@ __common_mmu_cache_on:
551#endif 570#endif
552 571
553/* 572/*
554 * All code following this line is relocatable. It is relocated by
555 * the above code to the end of the decompressed kernel image and
556 * executed there. During this time, we have no stacks.
557 *
558 * r0 = decompressed kernel length
559 * r1-r3 = unused
560 * r4 = kernel execution address
561 * r5 = decompressed kernel start
562 * r7 = architecture ID
563 * r8 = atags pointer
564 * r9-r12,r14 = corrupted
565 */
566 .align 5
567reloc_start: add r9, r5, r0
568 sub r9, r9, #128 @ do not copy the stack
569 debug_reloc_start
570 mov r1, r4
5711:
572 .rept 4
573 ldmia r5!, {r0, r2, r3, r10 - r12, r14} @ relocate kernel
574 stmia r1!, {r0, r2, r3, r10 - r12, r14}
575 .endr
576
577 cmp r5, r9
578 blo 1b
579 mov sp, r1
580 add sp, sp, #128 @ relocate the stack
581 debug_reloc_end
582
583call_kernel: bl cache_clean_flush
584 bl cache_off
585 mov r0, #0 @ must be zero
586 mov r1, r7 @ restore architecture number
587 mov r2, r8 @ restore atags pointer
588 mov pc, r4 @ call kernel
589
590/*
591 * Here follow the relocatable cache support functions for the 573 * Here follow the relocatable cache support functions for the
592 * various processors. This is a generic hook for locating an 574 * various processors. This is a generic hook for locating an
593 * entry and jumping to an instruction at the specified offset 575 * entry and jumping to an instruction at the specified offset
@@ -791,7 +773,7 @@ proc_types:
791 * On exit, 773 * On exit,
792 * r0, r1, r2, r3, r9, r12 corrupted 774 * r0, r1, r2, r3, r9, r12 corrupted
793 * This routine must preserve: 775 * This routine must preserve:
794 * r4, r6, r7 776 * r4, r7, r8
795 */ 777 */
796 .align 5 778 .align 5
797cache_off: mov r3, #12 @ cache_off function 779cache_off: mov r3, #12 @ cache_off function
@@ -866,7 +848,7 @@ __armv3_mmu_cache_off:
866 * On exit, 848 * On exit,
867 * r1, r2, r3, r9, r10, r11, r12 corrupted 849 * r1, r2, r3, r9, r10, r11, r12 corrupted
868 * This routine must preserve: 850 * This routine must preserve:
869 * r0, r4, r5, r6, r7 851 * r4, r6, r7, r8
870 */ 852 */
871 .align 5 853 .align 5
872cache_clean_flush: 854cache_clean_flush:
@@ -1088,7 +1070,6 @@ memdump: mov r12, r0
1088#endif 1070#endif
1089 1071
1090 .ltorg 1072 .ltorg
1091reloc_end:
1092 1073
1093 .align 1074 .align
1094 .section ".stack", "aw", %nobits 1075 .section ".stack", "aw", %nobits