diff options
author | Sricharan R <r.sricharan@ti.com> | 2013-07-29 10:56:22 -0400 |
---|---|---|
committer | Santosh Shilimkar <santosh.shilimkar@ti.com> | 2013-10-10 20:25:06 -0400 |
commit | f52bb722547f43caeaecbcc62db9f3c3b80ead9b (patch) | |
tree | 99c0fbf6fb94053b5ed81ebfc84ee7010871f05d /arch/arm/kernel | |
parent | c1a5f4f6733e39c3b780ff14ad6a68252475e7a9 (diff) |
ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses
The current phys_to_virt patching mechanism works only for 32 bit
physical addresses and this patch extends the idea for 64bit physical
addresses.
The 64bit v2p patching mechanism patches the higher 8 bits of physical
address with a constant using 'mov' instruction and lower 32bits are patched
using 'add'. While this is correct, in those platforms where the lowmem addressable
physical memory spawns across 4GB boundary, a carry bit can be produced as a
result of addition of lower 32bits. This has to be taken in to account and added
in to the upper. The patched __pv_offset and va are added in lower 32bits, where
__pv_offset can be in two's complement form when PA_START < VA_START and that can
result in a false carry bit.
e.g
1) PA = 0x80000000; VA = 0xC0000000
__pv_offset = PA - VA = 0xC0000000 (2's complement)
2) PA = 0x2 80000000; VA = 0xC000000
__pv_offset = PA - VA = 0x1 C0000000
So adding __pv_offset + VA should never result in a true overflow for (1).
So in order to differentiate between a true carry, a __pv_offset is extended
to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is
2's complement. So 'mvn #0' is inserted instead of 'mov' while patching
for the same reason. Since mov, add, sub instruction are to patched
with different constants inside the same stub, the rotation field
of the opcode is using to differentiate between them.
So the above examples for v2p translation becomes for VA=0xC0000000,
1) PA[63:32] = 0xffffffff
PA[31:0] = VA + 0xC0000000 --> results in a carry
PA[63:32] = PA[63:32] + carry
PA[63:0] = 0x0 80000000
2) PA[63:32] = 0x1
PA[31:0] = VA + 0xC0000000 --> results in a carry
PA[63:32] = PA[63:32] + carry
PA[63:0] = 0x2 80000000
The above ideas were suggested by Nicolas Pitre <nico@linaro.org> as
part of the review of first and second versions of the subject patch.
There is no corresponding change on the phys_to_virt() side, because
computations on the upper 32-bits would be discarded anyway.
Cc: Russell King <linux@arm.linux.org.uk>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Sricharan R <r.sricharan@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/armksyms.c | 1 | ||||
-rw-r--r-- | arch/arm/kernel/head.S | 63 |
2 files changed, 48 insertions, 16 deletions
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 60d3b738d420..1f031ddd0667 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c | |||
@@ -155,4 +155,5 @@ EXPORT_SYMBOL(__gnu_mcount_nc); | |||
155 | 155 | ||
156 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT | 156 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT |
157 | EXPORT_SYMBOL(__pv_phys_offset); | 157 | EXPORT_SYMBOL(__pv_phys_offset); |
158 | EXPORT_SYMBOL(__pv_offset); | ||
158 | #endif | 159 | #endif |
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c7cc1e03473..54547947a4e9 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S | |||
@@ -536,6 +536,14 @@ ENTRY(fixup_smp) | |||
536 | ldmfd sp!, {r4 - r6, pc} | 536 | ldmfd sp!, {r4 - r6, pc} |
537 | ENDPROC(fixup_smp) | 537 | ENDPROC(fixup_smp) |
538 | 538 | ||
539 | #ifdef __ARMEB_ | ||
540 | #define LOW_OFFSET 0x4 | ||
541 | #define HIGH_OFFSET 0x0 | ||
542 | #else | ||
543 | #define LOW_OFFSET 0x0 | ||
544 | #define HIGH_OFFSET 0x4 | ||
545 | #endif | ||
546 | |||
539 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT | 547 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT |
540 | 548 | ||
541 | /* __fixup_pv_table - patch the stub instructions with the delta between | 549 | /* __fixup_pv_table - patch the stub instructions with the delta between |
@@ -546,17 +554,20 @@ ENDPROC(fixup_smp) | |||
546 | __HEAD | 554 | __HEAD |
547 | __fixup_pv_table: | 555 | __fixup_pv_table: |
548 | adr r0, 1f | 556 | adr r0, 1f |
549 | ldmia r0, {r3-r5, r7} | 557 | ldmia r0, {r3-r7} |
550 | sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET | 558 | mvn ip, #0 |
559 | subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET | ||
551 | add r4, r4, r3 @ adjust table start address | 560 | add r4, r4, r3 @ adjust table start address |
552 | add r5, r5, r3 @ adjust table end address | 561 | add r5, r5, r3 @ adjust table end address |
553 | add r7, r7, r3 @ adjust __pv_phys_offset address | 562 | add r6, r6, r3 @ adjust __pv_phys_offset address |
554 | str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset | 563 | add r7, r7, r3 @ adjust __pv_offset address |
564 | str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset | ||
565 | strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits | ||
555 | mov r6, r3, lsr #24 @ constant for add/sub instructions | 566 | mov r6, r3, lsr #24 @ constant for add/sub instructions |
556 | teq r3, r6, lsl #24 @ must be 16MiB aligned | 567 | teq r3, r6, lsl #24 @ must be 16MiB aligned |
557 | THUMB( it ne @ cross section branch ) | 568 | THUMB( it ne @ cross section branch ) |
558 | bne __error | 569 | bne __error |
559 | str r6, [r7, #4] @ save to __pv_offset | 570 | str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits |
560 | b __fixup_a_pv_table | 571 | b __fixup_a_pv_table |
561 | ENDPROC(__fixup_pv_table) | 572 | ENDPROC(__fixup_pv_table) |
562 | 573 | ||
@@ -565,10 +576,19 @@ ENDPROC(__fixup_pv_table) | |||
565 | .long __pv_table_begin | 576 | .long __pv_table_begin |
566 | .long __pv_table_end | 577 | .long __pv_table_end |
567 | 2: .long __pv_phys_offset | 578 | 2: .long __pv_phys_offset |
579 | .long __pv_offset | ||
568 | 580 | ||
569 | .text | 581 | .text |
570 | __fixup_a_pv_table: | 582 | __fixup_a_pv_table: |
583 | adr r0, 3f | ||
584 | ldr r6, [r0] | ||
585 | add r6, r6, r3 | ||
586 | ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word | ||
587 | ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word | ||
588 | mov r6, r6, lsr #24 | ||
589 | cmn r0, #1 | ||
571 | #ifdef CONFIG_THUMB2_KERNEL | 590 | #ifdef CONFIG_THUMB2_KERNEL |
591 | moveq r0, #0x200000 @ set bit 21, mov to mvn instruction | ||
572 | lsls r6, #24 | 592 | lsls r6, #24 |
573 | beq 2f | 593 | beq 2f |
574 | clz r7, r6 | 594 | clz r7, r6 |
@@ -582,18 +602,28 @@ __fixup_a_pv_table: | |||
582 | b 2f | 602 | b 2f |
583 | 1: add r7, r3 | 603 | 1: add r7, r3 |
584 | ldrh ip, [r7, #2] | 604 | ldrh ip, [r7, #2] |
585 | and ip, 0x8f00 | 605 | tst ip, #0x4000 |
586 | orr ip, r6 @ mask in offset bits 31-24 | 606 | and ip, #0x8f00 |
607 | orrne ip, r6 @ mask in offset bits 31-24 | ||
608 | orreq ip, r0 @ mask in offset bits 7-0 | ||
587 | strh ip, [r7, #2] | 609 | strh ip, [r7, #2] |
610 | ldrheq ip, [r7] | ||
611 | biceq ip, #0x20 | ||
612 | orreq ip, ip, r0, lsr #16 | ||
613 | strheq ip, [r7] | ||
588 | 2: cmp r4, r5 | 614 | 2: cmp r4, r5 |
589 | ldrcc r7, [r4], #4 @ use branch for delay slot | 615 | ldrcc r7, [r4], #4 @ use branch for delay slot |
590 | bcc 1b | 616 | bcc 1b |
591 | bx lr | 617 | bx lr |
592 | #else | 618 | #else |
619 | moveq r0, #0x400000 @ set bit 22, mov to mvn instruction | ||
593 | b 2f | 620 | b 2f |
594 | 1: ldr ip, [r7, r3] | 621 | 1: ldr ip, [r7, r3] |
595 | bic ip, ip, #0x000000ff | 622 | bic ip, ip, #0x000000ff |
596 | orr ip, ip, r6 @ mask in offset bits 31-24 | 623 | tst ip, #0xf00 @ check the rotation field |
624 | orrne ip, ip, r6 @ mask in offset bits 31-24 | ||
625 | biceq ip, ip, #0x400000 @ clear bit 22 | ||
626 | orreq ip, ip, r0 @ mask in offset bits 7-0 | ||
597 | str ip, [r7, r3] | 627 | str ip, [r7, r3] |
598 | 2: cmp r4, r5 | 628 | 2: cmp r4, r5 |
599 | ldrcc r7, [r4], #4 @ use branch for delay slot | 629 | ldrcc r7, [r4], #4 @ use branch for delay slot |
@@ -602,28 +632,29 @@ __fixup_a_pv_table: | |||
602 | #endif | 632 | #endif |
603 | ENDPROC(__fixup_a_pv_table) | 633 | ENDPROC(__fixup_a_pv_table) |
604 | 634 | ||
635 | 3: .long __pv_offset | ||
636 | |||
605 | ENTRY(fixup_pv_table) | 637 | ENTRY(fixup_pv_table) |
606 | stmfd sp!, {r4 - r7, lr} | 638 | stmfd sp!, {r4 - r7, lr} |
607 | ldr r2, 2f @ get address of __pv_phys_offset | ||
608 | mov r3, #0 @ no offset | 639 | mov r3, #0 @ no offset |
609 | mov r4, r0 @ r0 = table start | 640 | mov r4, r0 @ r0 = table start |
610 | add r5, r0, r1 @ r1 = table size | 641 | add r5, r0, r1 @ r1 = table size |
611 | ldr r6, [r2, #4] @ get __pv_offset | ||
612 | bl __fixup_a_pv_table | 642 | bl __fixup_a_pv_table |
613 | ldmfd sp!, {r4 - r7, pc} | 643 | ldmfd sp!, {r4 - r7, pc} |
614 | ENDPROC(fixup_pv_table) | 644 | ENDPROC(fixup_pv_table) |
615 | 645 | ||
616 | .align | ||
617 | 2: .long __pv_phys_offset | ||
618 | |||
619 | .data | 646 | .data |
620 | .globl __pv_phys_offset | 647 | .globl __pv_phys_offset |
621 | .type __pv_phys_offset, %object | 648 | .type __pv_phys_offset, %object |
622 | __pv_phys_offset: | 649 | __pv_phys_offset: |
623 | .long 0 | 650 | .quad 0 |
624 | .size __pv_phys_offset, . - __pv_phys_offset | 651 | .size __pv_phys_offset, . -__pv_phys_offset |
652 | |||
653 | .globl __pv_offset | ||
654 | .type __pv_offset, %object | ||
625 | __pv_offset: | 655 | __pv_offset: |
626 | .long 0 | 656 | .quad 0 |
657 | .size __pv_offset, . -__pv_offset | ||
627 | #endif | 658 | #endif |
628 | 659 | ||
629 | #include "head-common.S" | 660 | #include "head-common.S" |