aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel/entry-armv.S
diff options
context:
space:
mode:
authorNicolas Pitre <nico@org.rmk.(none)>2005-04-29 17:08:33 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2005-04-29 17:08:33 -0400
commit2d2669b62984b8d76b05a6a045390a3250317d21 (patch)
tree822f62adf59f2e6302a16289cc99b0f9b873cfb4 /arch/arm/kernel/entry-armv.S
parent3a1e501511a1e2c665c566939047794dcf86466b (diff)
[PATCH] ARM: 2651/3: kernel helpers for NPTL support
Patch from Nicolas Pitre This patch entirely reworks the kernel assistance for NPTL on ARM. In particular this provides an efficient way to retrieve the TLS value and perform atomic operations without any instruction emulation nor special system call. This even allows for pre ARMv6 binaries to be forward compatible with SMP systems without any penalty. The problematic and performance critical operations are performed through segment of kernel provided user code reachable from user space at a fixed address in kernel memory. Those fixed entry points are within the vector page so we basically get it for free as no extra memory page is required and nothing else may be mapped at that location anyway. This is different from (but doesn't preclude) a full blown VDSO implementation, however a VDSO would prevent some assembly tricks with constants that allows for efficient branching to those code segments. And since those code segments only use a few cycles before returning to user code, the overhead of a VDSO far call would add a significant overhead to such minimalistic operations. The ARM_NR_set_tls syscall also changed number. This is done for two reasons: 1) this patch changes the way the TLS value was previously meant to be retrieved, therefore we ensure whatever library using the old way gets fixed (they only exist in private tree at the moment since the NPTL work is still progressing). 2) the previous number was allocated in a range causing an undefined instruction trap on kernels not supporting that syscall and it was determined that allocating it in a range returning -ENOSYS would be much nicer for libraries trying to determine if the feature is present or not. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/kernel/entry-armv.S')
-rw-r--r--arch/arm/kernel/entry-armv.S213
1 files changed, 212 insertions, 1 deletions
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 2a5c3fe09a95..080df907f242 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -269,6 +269,12 @@ __pabt_svc:
269 add r5, sp, #S_PC 269 add r5, sp, #S_PC
270 ldmia r7, {r2 - r4} @ Get USR pc, cpsr 270 ldmia r7, {r2 - r4} @ Get USR pc, cpsr
271 271
272#if __LINUX_ARM_ARCH__ < 6
273 @ make sure our user space atomic helper is aborted
274 cmp r2, #VIRT_OFFSET
275 bichs r3, r3, #PSR_Z_BIT
276#endif
277
272 @ 278 @
273 @ We are now ready to fill in the remaining blanks on the stack: 279 @ We are now ready to fill in the remaining blanks on the stack:
274 @ 280 @
@@ -499,8 +505,12 @@ ENTRY(__switch_to)
499 mra r4, r5, acc0 505 mra r4, r5, acc0
500 stmia ip, {r4, r5} 506 stmia ip, {r4, r5}
501#endif 507#endif
508#ifdef CONFIG_HAS_TLS_REG
509 mcr p15, 0, r3, c13, c0, 3 @ set TLS register
510#else
502 mov r4, #0xffff0fff 511 mov r4, #0xffff0fff
503 str r3, [r4, #-3] @ Set TLS ptr 512 str r3, [r4, #-15] @ TLS val at 0xffff0ff0
513#endif
504 mcr p15, 0, r6, c3, c0, 0 @ Set domain register 514 mcr p15, 0, r6, c3, c0, 0 @ Set domain register
505#ifdef CONFIG_VFP 515#ifdef CONFIG_VFP
506 @ Always disable VFP so we can lazily save/restore the old 516 @ Always disable VFP so we can lazily save/restore the old
@@ -519,6 +529,207 @@ ENTRY(__switch_to)
519 ldmib r2, {r4 - sl, fp, sp, pc} @ Load all regs saved previously 529 ldmib r2, {r4 - sl, fp, sp, pc} @ Load all regs saved previously
520 530
521 __INIT 531 __INIT
532
533/*
534 * User helpers.
535 *
536 * These are segment of kernel provided user code reachable from user space
537 * at a fixed address in kernel memory. This is used to provide user space
538 * with some operations which require kernel help because of unimplemented
539 * native feature and/or instructions in many ARM CPUs. The idea is for
540 * this code to be executed directly in user mode for best efficiency but
541 * which is too intimate with the kernel counter part to be left to user
542 * libraries. In fact this code might even differ from one CPU to another
543 * depending on the available instruction set and restrictions like on
544 * SMP systems. In other words, the kernel reserves the right to change
545 * this code as needed without warning. Only the entry points and their
546 * results are guaranteed to be stable.
547 *
548 * Each segment is 32-byte aligned and will be moved to the top of the high
549 * vector page. New segments (if ever needed) must be added in front of
550 * existing ones. This mechanism should be used only for things that are
551 * really small and justified, and not be abused freely.
552 *
553 * User space is expected to implement those things inline when optimizing
554 * for a processor that has the necessary native support, but only if such
555 * resulting binaries are already to be incompatible with earlier ARM
556 * processors due to the use of unsupported instructions other than what
557 * is provided here. In other words don't make binaries unable to run on
558 * earlier processors just for the sake of not using these kernel helpers
559 * if your compiled code is not going to use the new instructions for other
560 * purpose.
561 */
562
563 .align 5
564 .globl __kuser_helper_start
565__kuser_helper_start:
566
567/*
568 * Reference prototype:
569 *
570 * int __kernel_cmpxchg(int oldval, int newval, int *ptr)
571 *
572 * Input:
573 *
574 * r0 = oldval
575 * r1 = newval
576 * r2 = ptr
577 * lr = return address
578 *
579 * Output:
580 *
581 * r0 = returned value (zero or non-zero)
582 * C flag = set if r0 == 0, clear if r0 != 0
583 *
584 * Clobbered:
585 *
586 * r3, ip, flags
587 *
588 * Definition and user space usage example:
589 *
590 * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
591 * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
592 *
593 * Atomically store newval in *ptr if *ptr is equal to oldval for user space.
594 * Return zero if *ptr was changed or non-zero if no exchange happened.
595 * The C flag is also set if *ptr was changed to allow for assembly
596 * optimization in the calling code.
597 *
598 * For example, a user space atomic_add implementation could look like this:
599 *
600 * #define atomic_add(ptr, val) \
601 * ({ register unsigned int *__ptr asm("r2") = (ptr); \
602 * register unsigned int __result asm("r1"); \
603 * asm volatile ( \
604 * "1: @ atomic_add\n\t" \
605 * "ldr r0, [r2]\n\t" \
606 * "mov r3, #0xffff0fff\n\t" \
607 * "add lr, pc, #4\n\t" \
608 * "add r1, r0, %2\n\t" \
609 * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
610 * "bcc 1b" \
611 * : "=&r" (__result) \
612 * : "r" (__ptr), "rIL" (val) \
613 * : "r0","r3","ip","lr","cc","memory" ); \
614 * __result; })
615 */
616
617__kuser_cmpxchg: @ 0xffff0fc0
618
619#if __LINUX_ARM_ARCH__ < 6
620
621#ifdef CONFIG_SMP /* sanity check */
622#error "CONFIG_SMP on a machine supporting pre-ARMv6 processors?"
623#endif
624
625 /*
626 * Theory of operation:
627 *
628 * We set the Z flag before loading oldval. If ever an exception
629 * occurs we can not be sure the loaded value will still be the same
630 * when the exception returns, therefore the user exception handler
631 * will clear the Z flag whenever the interrupted user code was
632 * actually from the kernel address space (see the usr_entry macro).
633 *
634 * The post-increment on the str is used to prevent a race with an
635 * exception happening just after the str instruction which would
636 * clear the Z flag although the exchange was done.
637 */
638 teq ip, ip @ set Z flag
639 ldr ip, [r2] @ load current val
640 add r3, r2, #1 @ prepare store ptr
641 teqeq ip, r0 @ compare with oldval if still allowed
642 streq r1, [r3, #-1]! @ store newval if still allowed
643 subs r0, r2, r3 @ if r2 == r3 the str occured
644 mov pc, lr
645
646#else
647
648 ldrex r3, [r2]
649 subs r3, r3, r0
650 strexeq r3, r1, [r2]
651 rsbs r0, r3, #0
652 mov pc, lr
653
654#endif
655
656 .align 5
657
658/*
659 * Reference prototype:
660 *
661 * int __kernel_get_tls(void)
662 *
663 * Input:
664 *
665 * lr = return address
666 *
667 * Output:
668 *
669 * r0 = TLS value
670 *
671 * Clobbered:
672 *
673 * the Z flag might be lost
674 *
675 * Definition and user space usage example:
676 *
677 * typedef int (__kernel_get_tls_t)(void);
678 * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
679 *
680 * Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
681 *
682 * This could be used as follows:
683 *
684 * #define __kernel_get_tls() \
685 * ({ register unsigned int __val asm("r0"); \
686 * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
687 * : "=r" (__val) : : "lr","cc" ); \
688 * __val; })
689 */
690
691__kuser_get_tls: @ 0xffff0fe0
692
693#ifndef CONFIG_HAS_TLS_REG
694
695#ifdef CONFIG_SMP /* sanity check */
696#error "CONFIG_SMP without CONFIG_HAS_TLS_REG is wrong"
697#endif
698
699 ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0
700 mov pc, lr
701
702#else
703
704 mrc p15, 0, r0, c13, c0, 3 @ read TLS register
705 mov pc, lr
706
707#endif
708
709 .rep 5
710 .word 0 @ pad up to __kuser_helper_version
711 .endr
712
713/*
714 * Reference declaration:
715 *
716 * extern unsigned int __kernel_helper_version;
717 *
718 * Definition and user space usage example:
719 *
720 * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
721 *
722 * User space may read this to determine the curent number of helpers
723 * available.
724 */
725
726__kuser_helper_version: @ 0xffff0ffc
727 .word ((__kuser_helper_end - __kuser_helper_start) >> 5)
728
729 .globl __kuser_helper_end
730__kuser_helper_end:
731
732
522/* 733/*
523 * Vector stubs. 734 * Vector stubs.
524 * 735 *