diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/Kconfig | 14 | ||||
| -rw-r--r-- | arch/sparc/include/asm/jump_label.h | 1 | ||||
| -rw-r--r-- | arch/x86/Makefile_32.cpu | 13 | ||||
| -rw-r--r-- | arch/x86/kernel/alternative.c | 69 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 2 |
5 files changed, 43 insertions, 56 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 53d7f619a1b9..8bf0fa652eb6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -42,6 +42,20 @@ config KPROBES | |||
| 42 | for kernel debugging, non-intrusive instrumentation and testing. | 42 | for kernel debugging, non-intrusive instrumentation and testing. |
| 43 | If in doubt, say "N". | 43 | If in doubt, say "N". |
| 44 | 44 | ||
| 45 | config JUMP_LABEL | ||
| 46 | bool "Optimize trace point call sites" | ||
| 47 | depends on HAVE_ARCH_JUMP_LABEL | ||
| 48 | help | ||
| 49 | If it is detected that the compiler has support for "asm goto", | ||
| 50 | the kernel will compile trace point locations with just a | ||
| 51 | nop instruction. When trace points are enabled, the nop will | ||
| 52 | be converted to a jump to the trace function. This technique | ||
| 53 | lowers overhead and stress on the branch prediction of the | ||
| 54 | processor. | ||
| 55 | |||
| 56 | On i386, options added to the compiler flags may increase | ||
| 57 | the size of the kernel slightly. | ||
| 58 | |||
| 45 | config OPTPROBES | 59 | config OPTPROBES |
| 46 | def_bool y | 60 | def_bool y |
| 47 | depends on KPROBES && HAVE_OPTPROBES | 61 | depends on KPROBES && HAVE_OPTPROBES |
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 65c0d3029796..427d4684e0d2 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | "nop\n\t" \ | 13 | "nop\n\t" \ |
| 14 | "nop\n\t" \ | 14 | "nop\n\t" \ |
| 15 | ".pushsection __jump_table, \"a\"\n\t"\ | 15 | ".pushsection __jump_table, \"a\"\n\t"\ |
| 16 | ".align 4\n\t" \ | ||
| 16 | ".word 1b, %l[" #label "], %c0\n\t" \ | 17 | ".word 1b, %l[" #label "], %c0\n\t" \ |
| 17 | ".popsection \n\t" \ | 18 | ".popsection \n\t" \ |
| 18 | : : "i" (key) : : label);\ | 19 | : : "i" (key) : : label);\ |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 1255d953c65d..f2ee1abb1df9 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
| @@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) | |||
| 51 | # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph | 51 | # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph |
| 52 | # tracer assumptions. For i686, generic, core2 this is set by the | 52 | # tracer assumptions. For i686, generic, core2 this is set by the |
| 53 | # compiler anyway | 53 | # compiler anyway |
| 54 | cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) | 54 | ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) |
| 55 | ADD_ACCUMULATE_OUTGOING_ARGS := y | ||
| 56 | endif | ||
| 57 | |||
| 58 | # Work around to a bug with asm goto with first implementations of it | ||
| 59 | # in gcc causing gcc to mess up the push and pop of the stack in some | ||
| 60 | # uses of asm goto. | ||
| 61 | ifeq ($(CONFIG_JUMP_LABEL), y) | ||
| 62 | ADD_ACCUMULATE_OUTGOING_ARGS := y | ||
| 63 | endif | ||
| 64 | |||
| 65 | cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) | ||
| 55 | 66 | ||
| 56 | # Bug fix for binutils: this option is required in order to keep | 67 | # Bug fix for binutils: this option is required in order to keep |
| 57 | # binutils from generating NOPL instructions against our will. | 68 | # binutils from generating NOPL instructions against our will. |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5ceeca382820..5079f24c955a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -644,65 +644,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | |||
| 644 | 644 | ||
| 645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
| 646 | 646 | ||
| 647 | unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; | 647 | #ifdef CONFIG_X86_64 |
| 648 | unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; | ||
| 649 | #else | ||
| 650 | unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; | ||
| 651 | #endif | ||
| 648 | 652 | ||
| 649 | void __init arch_init_ideal_nop5(void) | 653 | void __init arch_init_ideal_nop5(void) |
| 650 | { | 654 | { |
| 651 | extern const unsigned char ftrace_test_p6nop[]; | ||
| 652 | extern const unsigned char ftrace_test_nop5[]; | ||
| 653 | extern const unsigned char ftrace_test_jmp[]; | ||
| 654 | int faulted = 0; | ||
| 655 | |||
| 656 | /* | 655 | /* |
| 657 | * There is no good nop for all x86 archs. | 656 | * There is no good nop for all x86 archs. This selection |
| 658 | * We will default to using the P6_NOP5, but first we | 657 | * algorithm should be unified with the one in find_nop_table(), |
| 659 | * will test to make sure that the nop will actually | 658 | * but this should be good enough for now. |
| 660 | * work on this CPU. If it faults, we will then | ||
| 661 | * go to a lesser efficient 5 byte nop. If that fails | ||
| 662 | * we then just use a jmp as our nop. This isn't the most | ||
| 663 | * efficient nop, but we can not use a multi part nop | ||
| 664 | * since we would then risk being preempted in the middle | ||
| 665 | * of that nop, and if we enabled tracing then, it might | ||
| 666 | * cause a system crash. | ||
| 667 | * | 659 | * |
| 668 | * TODO: check the cpuid to determine the best nop. | 660 | * For cases other than the ones below, use the safe (as in |
| 661 | * always functional) defaults above. | ||
| 669 | */ | 662 | */ |
| 670 | asm volatile ( | 663 | #ifdef CONFIG_X86_64 |
| 671 | "ftrace_test_jmp:" | 664 | /* Don't use these on 32 bits due to broken virtualizers */ |
| 672 | "jmp ftrace_test_p6nop\n" | 665 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) |
| 673 | "nop\n" | 666 | memcpy(ideal_nop5, p6_nops[5], 5); |
| 674 | "nop\n" | 667 | #endif |
| 675 | "nop\n" /* 2 byte jmp + 3 bytes */ | ||
| 676 | "ftrace_test_p6nop:" | ||
| 677 | P6_NOP5 | ||
| 678 | "jmp 1f\n" | ||
| 679 | "ftrace_test_nop5:" | ||
| 680 | ".byte 0x66,0x66,0x66,0x66,0x90\n" | ||
| 681 | "1:" | ||
| 682 | ".section .fixup, \"ax\"\n" | ||
| 683 | "2: movl $1, %0\n" | ||
| 684 | " jmp ftrace_test_nop5\n" | ||
| 685 | "3: movl $2, %0\n" | ||
| 686 | " jmp 1b\n" | ||
| 687 | ".previous\n" | ||
| 688 | _ASM_EXTABLE(ftrace_test_p6nop, 2b) | ||
| 689 | _ASM_EXTABLE(ftrace_test_nop5, 3b) | ||
| 690 | : "=r"(faulted) : "0" (faulted)); | ||
| 691 | |||
| 692 | switch (faulted) { | ||
| 693 | case 0: | ||
| 694 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); | ||
| 695 | memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); | ||
| 696 | break; | ||
| 697 | case 1: | ||
| 698 | pr_info("converting mcount calls to 66 66 66 66 90\n"); | ||
| 699 | memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); | ||
| 700 | break; | ||
| 701 | case 2: | ||
| 702 | pr_info("converting mcount calls to jmp . + 5\n"); | ||
| 703 | memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); | ||
| 704 | break; | ||
| 705 | } | ||
| 706 | |||
| 707 | } | 668 | } |
| 708 | #endif | 669 | #endif |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0929191d83cf..7cc0a721f628 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq) | |||
| 3109 | 3109 | ||
| 3110 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); | 3110 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); |
| 3111 | 3111 | ||
| 3112 | if (intr_remapping_enabled) | 3112 | if (irq_remapped(cfg)) |
| 3113 | free_irte(irq); | 3113 | free_irte(irq); |
| 3114 | raw_spin_lock_irqsave(&vector_lock, flags); | 3114 | raw_spin_lock_irqsave(&vector_lock, flags); |
| 3115 | __clear_irq_vector(irq, cfg); | 3115 | __clear_irq_vector(irq, cfg); |
