ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses

The current phys_to_virt patching mechanism works only for 32 bit physical addresses and this patch extends the idea for 64bit physical addresses. The 64bit v2p patching mechanism patches the higher 8 bits of physical address with a constant using 'mov' instruction and lower 32bits are patched using 'add'. While this is correct, in those platforms where the lowmem addressable physical memory spawns across 4GB boundary, a carry bit can be produced as a result of addition of lower 32bits. This has to be taken in to account and added in to the upper. The patched __pv_offset and va are added in lower 32bits, where __pv_offset can be in two's complement form when PA_START < VA_START and that can result in a false carry bit. e.g 1) PA = 0x80000000; VA = 0xC0000000 __pv_offset = PA - VA = 0xC0000000 (2's complement) 2) PA = 0x2 80000000; VA = 0xC000000 __pv_offset = PA - VA = 0x1 C0000000 So adding __pv_offset + VA should never result in a true overflow for (1). So in order to differentiate between a true carry, a __pv_offset is extended to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is 2's complement. So 'mvn #0' is inserted instead of 'mov' while patching for the same reason. Since mov, add, sub instruction are to patched with different constants inside the same stub, the rotation field of the opcode is using to differentiate between them. So the above examples for v2p translation becomes for VA=0xC0000000, 1) PA[63:32] = 0xffffffff PA[31:0] = VA + 0xC0000000 --> results in a carry PA[63:32] = PA[63:32] + carry PA[63:0] = 0x0 80000000 2) PA[63:32] = 0x1 PA[31:0] = VA + 0xC0000000 --> results in a carry PA[63:32] = PA[63:32] + carry PA[63:0] = 0x2 80000000 The above ideas were suggested by Nicolas Pitre <nico@linaro.org> as part of the review of first and second versions of the subject patch. There is no corresponding change on the phys_to_virt() side, because computations on the upper 32-bits would be discarded anyway. Cc: Russell King <linux@arm.linux.org.uk> Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Sricharan R <r.sricharan@ti.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
author: Sricharan R <r.sricharan@ti.com> 2013-07-29 10:56:22 -0400
committer: Santosh Shilimkar <santosh.shilimkar@ti.com> 2013-10-10 20:25:06 -0400
commit: f52bb722547f43caeaecbcc62db9f3c3b80ead9b (patch)
tree: 99c0fbf6fb94053b5ed81ebfc84ee7010871f05d /arch/arm/kernel/head.S
parent: c1a5f4f6733e39c3b780ff14ad6a68252475e7a9 (diff)
1 files changed, 47 insertions, 16 deletions
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 2c7cc1e03473..54547947a4e9 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -536,6 +536,14 @@ ENTRY(fixup_smp)
        ldmfd   sp!, {r4 - r6, pc}
 ENDPROC(fixup_smp)
+#ifdef __ARMEB_
+#define LOW_OFFSET      0x4
+#define HIGH_OFFSET     0x0
+#else
+#define LOW_OFFSET      0x0
+#define HIGH_OFFSET     0x4
+#endif
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
 /* __fixup_pv_table - patch the stub instructions with the delta between
@@ -546,17 +554,20 @@ ENDPROC(fixup_smp)
        __HEAD
 __fixup_pv_table:
        adr     r0, 1f
-        ldmia   r0, {r3-r5, r7}
+        ldmia   r0, {r3-r7}
-        sub     r3, r0, r3      @ PHYS_OFFSET - PAGE_OFFSET
+        mvn     ip, #0
+        subs    r3, r0, r3      @ PHYS_OFFSET - PAGE_OFFSET
        add     r4, r4, r3      @ adjust table start address
        add     r5, r5, r3      @ adjust table end address
-        add     r7, r7, r3      @ adjust __pv_phys_offset address
+        add     r6, r6, r3      @ adjust __pv_phys_offset address
-        str     r8, [r7]        @ save computed PHYS_OFFSET to __pv_phys_offset
+        add     r7, r7, r3      @ adjust __pv_offset address
+        str     r8, [r6, #LOW_OFFSET]   @ save computed PHYS_OFFSET to __pv_phys_offset
+        strcc   ip, [r7, #HIGH_OFFSET]  @ save to __pv_offset high bits
        mov     r6, r3, lsr #24 @ constant for add/sub instructions
        teq     r3, r6, lsl #24 @ must be 16MiB aligned
 THUMB(  it      ne              @ cross section branch )
        bne     __error
-        str     r6, [r7, #4]    @ save to __pv_offset
+        str     r3, [r7, #LOW_OFFSET]   @ save to __pv_offset low bits
        b       __fixup_a_pv_table
 ENDPROC(__fixup_pv_table)
@@ -565,10 +576,19 @@ ENDPROC(__fixup_pv_table)
        .long   __pv_table_begin
        .long   __pv_table_end
 2:      .long   __pv_phys_offset
+        .long   __pv_offset
        .text
 __fixup_a_pv_table:
+        adr     r0, 3f
+        ldr     r6, [r0]
+        add     r6, r6, r3
+        ldr     r0, [r6, #HIGH_OFFSET]  @ pv_offset high word
+        ldr     r6, [r6, #LOW_OFFSET]   @ pv_offset low word
+        mov     r6, r6, lsr #24
+        cmn     r0, #1
 #ifdef CONFIG_THUMB2_KERNEL
+        moveq   r0, #0x200000   @ set bit 21, mov to mvn instruction
        lsls    r6, #24
        beq     2f
        clz     r7, r6
@@ -582,18 +602,28 @@ __fixup_a_pv_table:
        b       2f
 1:      add     r7, r3
        ldrh    ip, [r7, #2]
-        and     ip, 0x8f00
+        tst     ip, #0x4000
-        orr     ip, r6  @ mask in offset bits 31-24
+        and     ip, #0x8f00
+        orrne   ip, r6  @ mask in offset bits 31-24
+        orreq   ip, r0  @ mask in offset bits 7-0
        strh    ip, [r7, #2]
+        ldrheq  ip, [r7]
+        biceq   ip, #0x20
+        orreq   ip, ip, r0, lsr #16
+        strheq  ip, [r7]
 2:      cmp     r4, r5
        ldrcc   r7, [r4], #4    @ use branch for delay slot
        bcc     1b
        bx      lr
 #else
+        moveq   r0, #0x400000   @ set bit 22, mov to mvn instruction
        b       2f
 1:      ldr     ip, [r7, r3]
        bic     ip, ip, #0x000000ff
-        orr     ip, ip, r6      @ mask in offset bits 31-24
+        tst     ip, #0xf00      @ check the rotation field
+        orrne   ip, ip, r6      @ mask in offset bits 31-24
+        biceq   ip, ip, #0x400000       @ clear bit 22
+        orreq   ip, ip, r0      @ mask in offset bits 7-0
        str     ip, [r7, r3]
 2:      cmp     r4, r5
        ldrcc   r7, [r4], #4    @ use branch for delay slot
@@ -602,28 +632,29 @@ __fixup_a_pv_table:
 #endif
 ENDPROC(__fixup_a_pv_table)
+3:      .long __pv_offset
 ENTRY(fixup_pv_table)
        stmfd   sp!, {r4 - r7, lr}
-        ldr     r2, 2f                  @ get address of __pv_phys_offset
        mov     r3, #0                  @ no offset
        mov     r4, r0                  @ r0 = table start
        add     r5, r0, r1              @ r1 = table size
-        ldr     r6, [r2, #4]            @ get __pv_offset
        bl      __fixup_a_pv_table
        ldmfd   sp!, {r4 - r7, pc}
 ENDPROC(fixup_pv_table)
-        .align
-2:      .long   __pv_phys_offset
        .data
        .globl  __pv_phys_offset
        .type   __pv_phys_offset, %object
 __pv_phys_offset:
-        .long   0
+        .quad   0
-        .size   __pv_phys_offset, . - __pv_phys_offset
+        .size   __pv_phys_offset, . -__pv_phys_offset
+        .globl  __pv_offset
+        .type   __pv_offset, %object
 __pv_offset:
-        .long   0
+        .quad   0
+        .size   __pv_offset, . -__pv_offset
 #endif
 #include "head-common.S"
author	Sricharan R <r.sricharan@ti.com>	2013-07-29 10:56:22 -0400
committer	Santosh Shilimkar <santosh.shilimkar@ti.com>	2013-10-10 20:25:06 -0400
commit	f52bb722547f43caeaecbcc62db9f3c3b80ead9b (patch)
tree	99c0fbf6fb94053b5ed81ebfc84ee7010871f05d /arch/arm/kernel/head.S
parent	c1a5f4f6733e39c3b780ff14ad6a68252475e7a9 (diff)