aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/entry_64.S
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-06 02:27:59 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-08 21:25:06 -0500
commit7230c5644188cd9e3fb380cc97dde00c464a3ba7 (patch)
tree8e71a0a2e8167e21b46e96165b7dd53fa7e7b7f2 /arch/powerpc/kernel/entry_64.S
parentd9ada91ae2969ae6b6dc3574fd08a6ebda5df766 (diff)
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some issues that this tries to address. We don't do the various workarounds we need to do when re-enabling interrupts in some cases such as when returning from an interrupt and thus we may still lose or get delayed decrementer or doorbell interrupts. The current scheme also makes it much harder to handle the external "edge" interrupts provided by some BookE processors when using the EPR facility (External Proxy) and the Freescale Hypervisor. Additionally, we tend to keep interrupts hard disabled in a number of cases, such as decrementer interrupts, external interrupts, or when a masked decrementer interrupt is pending. This is sub-optimal. This is an attempt at fixing it all in one go by reworking the way we do the lazy interrupt disabling from the ground up. The base idea is to replace the "hard_enabled" field with a "irq_happened" field in which we store a bit mask of what interrupt occurred while soft-disabled. When re-enabling, either via arch_local_irq_restore() or when returning from an interrupt, we can now decide what to do by testing bits in that field. We then implement replaying of the missed interrupts either by re-using the existing exception frame (in exception exit case) or via the creation of a new one from an assembly trampoline (in the arch_local_irq_enable case). This removes the need to play with the decrementer to try to create fake interrupts, among others. In addition, this adds a few refinements: - We no longer hard disable decrementer interrupts that occur while soft-disabled. We now simply bump the decrementer back to max (on BookS) or leave it stopped (on BookE) and continue with hard interrupts enabled, which means that we'll potentially get better sample quality from performance monitor interrupts. - Timer, decrementer and doorbell interrupts now hard-enable shortly after removing the source of the interrupt, which means they no longer run entirely hard disabled. Again, this will improve perf sample quality. - On Book3E 64-bit, we now make the performance monitor interrupt act as an NMI like Book3S (the necessary C code for that to work appear to already be present in the FSL perf code, notably calling nmi_enter instead of irq_enter). (This also fixes a bug where BookE perfmon interrupts could clobber r14 ... oops) - We could make "masked" decrementer interrupts act as NMIs when doing timer-based perf sampling to improve the sample quality. Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- v2: - Add hard-enable to decrementer, timer and doorbells - Fix CR clobber in masked irq handling on BookE - Make embedded perf interrupt act as an NMI - Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want to retrigger an interrupt without preventing hard-enable v3: - Fix or vs. ori bug on Book3E - Fix enabling of interrupts for some exceptions on Book3E v4: - Fix resend of doorbells on return from interrupt on Book3E v5: - Rebased on top of my latest series, which involves some significant rework of some aspects of the patch. v6: - 32-bit compile fix - more compile fixes with various .config combos - factor out the asm code to soft-disable interrupts - remove the C wrapper around preempt_schedule_irq v7: - Fix a bug with hard irq state tracking on native power7
Diffstat (limited to 'arch/powerpc/kernel/entry_64.S')
-rw-r--r--arch/powerpc/kernel/entry_64.S153
1 files changed, 119 insertions, 34 deletions
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c513beb78b3..f8a7a1a1a9f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -32,6 +32,7 @@
32#include <asm/ptrace.h> 32#include <asm/ptrace.h>
33#include <asm/irqflags.h> 33#include <asm/irqflags.h>
34#include <asm/ftrace.h> 34#include <asm/ftrace.h>
35#include <asm/hw_irq.h>
35 36
36/* 37/*
37 * System calls. 38 * System calls.
@@ -583,18 +584,72 @@ _GLOBAL(ret_from_except_lite)
583 bne do_work 584 bne do_work
584#endif /* !CONFIG_PREEMPT */ 585#endif /* !CONFIG_PREEMPT */
585 586
587 .globl fast_exc_return_irq
588fast_exc_return_irq:
586restore: 589restore:
590 /*
591 * This is the main kernel exit path, we first check if we
592 * have to change our interrupt state.
593 */
587 ld r5,SOFTE(r1) 594 ld r5,SOFTE(r1)
588 TRACE_AND_RESTORE_IRQ(r5); 595 lbz r6,PACASOFTIRQEN(r13)
596 cmpwi cr1,r5,0
597 cmpw cr0,r5,r6
598 beq cr0,4f
599
600 /* We do, handle disable first, which is easy */
601 bne cr1,3f;
602 li r0,0
603 stb r0,PACASOFTIRQEN(r13);
604 TRACE_DISABLE_INTS
605 b 4f
589 606
590 /* extract EE bit and use it to restore paca->hard_enabled */ 6073: /*
591 ld r3,_MSR(r1) 608 * We are about to soft-enable interrupts (we are hard disabled
592 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ 609 * at this point). We check if there's anything that needs to
593 stb r4,PACAHARDIRQEN(r13) 610 * be replayed first.
611 */
612 lbz r0,PACAIRQHAPPENED(r13)
613 cmpwi cr0,r0,0
614 bne- restore_check_irq_replay
615
616 /*
617 * Get here when nothing happened while soft-disabled, just
618 * soft-enable and move-on. We will hard-enable as a side
619 * effect of rfi
620 */
621restore_no_replay:
622 TRACE_ENABLE_INTS
623 li r0,1
624 stb r0,PACASOFTIRQEN(r13);
594 625
626 /*
627 * Final return path. BookE is handled in a different file
628 */
6294:
595#ifdef CONFIG_PPC_BOOK3E 630#ifdef CONFIG_PPC_BOOK3E
596 b .exception_return_book3e 631 b .exception_return_book3e
597#else 632#else
633 /*
634 * Clear the reservation. If we know the CPU tracks the address of
635 * the reservation then we can potentially save some cycles and use
636 * a larx. On POWER6 and POWER7 this is significantly faster.
637 */
638BEGIN_FTR_SECTION
639 stdcx. r0,0,r1 /* to clear the reservation */
640FTR_SECTION_ELSE
641 ldarx r4,0,r1
642ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
643
644 /*
645 * Some code path such as load_up_fpu or altivec return directly
646 * here. They run entirely hard disabled and do not alter the
647 * interrupt state. They also don't use lwarx/stwcx. and thus
648 * are known not to leave dangling reservations.
649 */
650 .globl fast_exception_return
651fast_exception_return:
652 ld r3,_MSR(r1)
598 ld r4,_CTR(r1) 653 ld r4,_CTR(r1)
599 ld r0,_LINK(r1) 654 ld r0,_LINK(r1)
600 mtctr r4 655 mtctr r4
@@ -608,17 +663,6 @@ restore:
608 beq- unrecov_restore 663 beq- unrecov_restore
609 664
610 /* 665 /*
611 * Clear the reservation. If we know the CPU tracks the address of
612 * the reservation then we can potentially save some cycles and use
613 * a larx. On POWER6 and POWER7 this is significantly faster.
614 */
615BEGIN_FTR_SECTION
616 stdcx. r0,0,r1 /* to clear the reservation */
617FTR_SECTION_ELSE
618 ldarx r4,0,r1
619ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
620
621 /*
622 * Clear RI before restoring r13. If we are returning to 666 * Clear RI before restoring r13. If we are returning to
623 * userspace and we take an exception after restoring r13, 667 * userspace and we take an exception after restoring r13,
624 * we end up corrupting the userspace r13 value. 668 * we end up corrupting the userspace r13 value.
@@ -629,7 +673,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
629 673
630 /* 674 /*
631 * r13 is our per cpu area, only restore it if we are returning to 675 * r13 is our per cpu area, only restore it if we are returning to
632 * userspace 676 * userspace the value stored in the stack frame may belong to
677 * another CPU.
633 */ 678 */
634 andi. r0,r3,MSR_PR 679 andi. r0,r3,MSR_PR
635 beq 1f 680 beq 1f
@@ -654,6 +699,55 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
654 699
655#endif /* CONFIG_PPC_BOOK3E */ 700#endif /* CONFIG_PPC_BOOK3E */
656 701
702 /*
703 * Something did happen, check if a re-emit is needed
704 * (this also clears paca->irq_happened)
705 */
706restore_check_irq_replay:
707 /* XXX: We could implement a fast path here where we check
708 * for irq_happened being just 0x01, in which case we can
709 * clear it and return. That means that we would potentially
710 * miss a decrementer having wrapped all the way around.
711 *
712 * Still, this might be useful for things like hash_page
713 */
714 bl .__check_irq_replay
715 cmpwi cr0,r3,0
716 beq restore_no_replay
717
718 /*
719 * We need to re-emit an interrupt. We do so by re-using our
720 * existing exception frame. We first change the trap value,
721 * but we need to ensure we preserve the low nibble of it
722 */
723 ld r4,_TRAP(r1)
724 clrldi r4,r4,60
725 or r4,r4,r3
726 std r4,_TRAP(r1)
727
728 /*
729 * Then find the right handler and call it. Interrupts are
730 * still soft-disabled and we keep them that way.
731 */
732 cmpwi cr0,r3,0x500
733 bne 1f
734 addi r3,r1,STACK_FRAME_OVERHEAD;
735 bl .do_IRQ
736 b .ret_from_except
7371: cmpwi cr0,r3,0x900
738 bne 1f
739 addi r3,r1,STACK_FRAME_OVERHEAD;
740 bl .timer_interrupt
741 b .ret_from_except
742#ifdef CONFIG_PPC_BOOK3E
7431: cmpwi cr0,r3,0x280
744 bne 1f
745 addi r3,r1,STACK_FRAME_OVERHEAD;
746 bl .doorbell_exception
747 b .ret_from_except
748#endif /* CONFIG_PPC_BOOK3E */
7491: b .ret_from_except /* What else to do here ? */
750
657do_work: 751do_work:
658#ifdef CONFIG_PREEMPT 752#ifdef CONFIG_PREEMPT
659 andi. r0,r3,MSR_PR /* Returning to user mode? */ 753 andi. r0,r3,MSR_PR /* Returning to user mode? */
@@ -666,18 +760,11 @@ do_work:
666 crandc eq,cr1*4+eq,eq 760 crandc eq,cr1*4+eq,eq
667 bne restore 761 bne restore
668 762
669 /* Here we are preempting the current task. 763 /*
670 * 764 * Here we are preempting the current task. We want to make
671 * Ensure interrupts are soft-disabled. We also properly mark 765 * sure we are soft-disabled first
672 * the PACA to reflect the fact that they are hard-disabled
673 * and trace the change
674 */ 766 */
675 li r0,0 767 SOFT_DISABLE_INTS(r3,r4)
676 stb r0,PACASOFTIRQEN(r13)
677 stb r0,PACAHARDIRQEN(r13)
678 TRACE_DISABLE_INTS
679
680 /* Call the scheduler with soft IRQs off */
6811: bl .preempt_schedule_irq 7681: bl .preempt_schedule_irq
682 769
683 /* Hard-disable interrupts again (and update PACA) */ 770 /* Hard-disable interrupts again (and update PACA) */
@@ -687,8 +774,8 @@ do_work:
687 ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ 774 ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
688 mtmsrd r10,1 775 mtmsrd r10,1
689#endif /* CONFIG_PPC_BOOK3E */ 776#endif /* CONFIG_PPC_BOOK3E */
690 li r0,0 777 li r0,PACA_IRQ_HARD_DIS
691 stb r0,PACAHARDIRQEN(r13) 778 stb r0,PACAIRQHAPPENED(r13)
692 779
693 /* Re-test flags and eventually loop */ 780 /* Re-test flags and eventually loop */
694 clrrdi r9,r1,THREAD_SHIFT 781 clrrdi r9,r1,THREAD_SHIFT
@@ -710,14 +797,12 @@ user_work:
710 797
711 andi. r0,r4,_TIF_NEED_RESCHED 798 andi. r0,r4,_TIF_NEED_RESCHED
712 beq 1f 799 beq 1f
713 li r5,1 800 bl .restore_interrupts
714 TRACE_AND_RESTORE_IRQ(r5);
715 bl .schedule 801 bl .schedule
716 b .ret_from_except_lite 802 b .ret_from_except_lite
717 803
7181: bl .save_nvgprs 8041: bl .save_nvgprs
719 li r5,1 805 bl .restore_interrupts
720 TRACE_AND_RESTORE_IRQ(r5);
721 addi r3,r1,STACK_FRAME_OVERHEAD 806 addi r3,r1,STACK_FRAME_OVERHEAD
722 bl .do_notify_resume 807 bl .do_notify_resume
723 b .ret_from_except 808 b .ret_from_except