diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-02 19:17:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-02 19:17:03 -0400 |
commit | 63004afa718b1506fe9a286075b3b2d8c6ca2b9b (patch) | |
tree | 2ca957b939f36c7b6a8d85e162fec9d5a4bcca99 | |
parent | f309532bf3e1cc1b787403d84e3039812a7dbe50 (diff) | |
parent | 40b46a7d2938589a5abab132a7824fd17ae18f62 (diff) |
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull straggler x86 fixes from Peter Anvin:
"Three groups of patches:
- EFI boot stub documentation and the ability to print error messages;
- Removal for PTRACE_ARCH_PRCTL for x32 (obsolete interface which
should never have been ported, and the port is broken and
potentially dangerous.)
- ftrace stack corruption fixes. I'm not super-happy about the
technical implementation, but it is probably the least invasive in
the short term. In the future I would like a single method for
nesting the debug stack, however."
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86, x32, ptrace: Remove PTRACE_ARCH_PRCTL for x32
x86, efi: Add EFI boot stub documentation
x86, efi; Add EFI boot stub console support
x86, efi: Only close open files in error path
ftrace/x86: Do not change stacks in DEBUG when calling lockdep
x86: Allow nesting of the debug stack IDT setting
x86: Reset the debug_stack update counter
ftrace: Use breakpoint method to update ftrace caller
ftrace: Synchronize variable setting with breakpoints
-rw-r--r-- | Documentation/x86/efi-stub.txt | 65 | ||||
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/eboot.c | 87 | ||||
-rw-r--r-- | arch/x86/boot/compressed/eboot.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/ftrace.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 44 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 102 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 8 |
11 files changed, 297 insertions, 39 deletions
diff --git a/Documentation/x86/efi-stub.txt b/Documentation/x86/efi-stub.txt new file mode 100644 index 000000000000..44e6bb6ead10 --- /dev/null +++ b/Documentation/x86/efi-stub.txt | |||
@@ -0,0 +1,65 @@ | |||
1 | The EFI Boot Stub | ||
2 | --------------------------- | ||
3 | |||
4 | On the x86 platform, a bzImage can masquerade as a PE/COFF image, | ||
5 | thereby convincing EFI firmware loaders to load it as an EFI | ||
6 | executable. The code that modifies the bzImage header, along with the | ||
7 | EFI-specific entry point that the firmware loader jumps to are | ||
8 | collectively known as the "EFI boot stub", and live in | ||
9 | arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c, | ||
10 | respectively. | ||
11 | |||
12 | By using the EFI boot stub it's possible to boot a Linux kernel | ||
13 | without the use of a conventional EFI boot loader, such as grub or | ||
14 | elilo. Since the EFI boot stub performs the jobs of a boot loader, in | ||
15 | a certain sense it *IS* the boot loader. | ||
16 | |||
17 | The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option. | ||
18 | |||
19 | |||
20 | **** How to install bzImage.efi | ||
21 | |||
22 | The bzImage located in arch/x86/boot/bzImage must be copied to the EFI | ||
23 | System Partiion (ESP) and renamed with the extension ".efi". Without | ||
24 | the extension the EFI firmware loader will refuse to execute it. It's | ||
25 | not possible to execute bzImage.efi from the usual Linux file systems | ||
26 | because EFI firmware doesn't have support for them. | ||
27 | |||
28 | |||
29 | **** Passing kernel parameters from the EFI shell | ||
30 | |||
31 | Arguments to the kernel can be passed after bzImage.efi, e.g. | ||
32 | |||
33 | fs0:> bzImage.efi console=ttyS0 root=/dev/sda4 | ||
34 | |||
35 | |||
36 | **** The "initrd=" option | ||
37 | |||
38 | Like most boot loaders, the EFI stub allows the user to specify | ||
39 | multiple initrd files using the "initrd=" option. This is the only EFI | ||
40 | stub-specific command line parameter, everything else is passed to the | ||
41 | kernel when it boots. | ||
42 | |||
43 | The path to the initrd file must be an absolute path from the | ||
44 | beginning of the ESP, relative path names do not work. Also, the path | ||
45 | is an EFI-style path and directory elements must be separated with | ||
46 | backslashes (\). For example, given the following directory layout, | ||
47 | |||
48 | fs0:> | ||
49 | Kernels\ | ||
50 | bzImage.efi | ||
51 | initrd-large.img | ||
52 | |||
53 | Ramdisks\ | ||
54 | initrd-small.img | ||
55 | initrd-medium.img | ||
56 | |||
57 | to boot with the initrd-large.img file if the current working | ||
58 | directory is fs0:\Kernels, the following command must be used, | ||
59 | |||
60 | fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img | ||
61 | |||
62 | Notice how bzImage.efi can be specified with a relative path. That's | ||
63 | because the image we're executing is interpreted by the EFI shell, | ||
64 | which understands relative paths, whereas the rest of the command line | ||
65 | is passed to bzImage.efi. | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d700811785ea..c70684f859e1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1506,6 +1506,8 @@ config EFI_STUB | |||
1506 | This kernel feature allows a bzImage to be loaded directly | 1506 | This kernel feature allows a bzImage to be loaded directly |
1507 | by EFI firmware without the use of a bootloader. | 1507 | by EFI firmware without the use of a bootloader. |
1508 | 1508 | ||
1509 | See Documentation/x86/efi-stub.txt for more information. | ||
1510 | |||
1509 | config SECCOMP | 1511 | config SECCOMP |
1510 | def_bool y | 1512 | def_bool y |
1511 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1513 | prompt "Enable seccomp to safely compute untrusted bytecode" |
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c14e76bb4c7..4e85f5f85837 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -16,6 +16,26 @@ | |||
16 | 16 | ||
17 | static efi_system_table_t *sys_table; | 17 | static efi_system_table_t *sys_table; |
18 | 18 | ||
19 | static void efi_printk(char *str) | ||
20 | { | ||
21 | char *s8; | ||
22 | |||
23 | for (s8 = str; *s8; s8++) { | ||
24 | struct efi_simple_text_output_protocol *out; | ||
25 | efi_char16_t ch[2] = { 0 }; | ||
26 | |||
27 | ch[0] = *s8; | ||
28 | out = (struct efi_simple_text_output_protocol *)sys_table->con_out; | ||
29 | |||
30 | if (*s8 == '\n') { | ||
31 | efi_char16_t nl[2] = { '\r', 0 }; | ||
32 | efi_call_phys2(out->output_string, out, nl); | ||
33 | } | ||
34 | |||
35 | efi_call_phys2(out->output_string, out, ch); | ||
36 | } | ||
37 | } | ||
38 | |||
19 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, | 39 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, |
20 | unsigned long *desc_size) | 40 | unsigned long *desc_size) |
21 | { | 41 | { |
@@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | |||
531 | EFI_LOADER_DATA, | 551 | EFI_LOADER_DATA, |
532 | nr_initrds * sizeof(*initrds), | 552 | nr_initrds * sizeof(*initrds), |
533 | &initrds); | 553 | &initrds); |
534 | if (status != EFI_SUCCESS) | 554 | if (status != EFI_SUCCESS) { |
555 | efi_printk("Failed to alloc mem for initrds\n"); | ||
535 | goto fail; | 556 | goto fail; |
557 | } | ||
536 | 558 | ||
537 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | 559 | str = (char *)(unsigned long)hdr->cmd_line_ptr; |
538 | for (i = 0; i < nr_initrds; i++) { | 560 | for (i = 0; i < nr_initrds; i++) { |
@@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | |||
575 | 597 | ||
576 | status = efi_call_phys3(boottime->handle_protocol, | 598 | status = efi_call_phys3(boottime->handle_protocol, |
577 | image->device_handle, &fs_proto, &io); | 599 | image->device_handle, &fs_proto, &io); |
578 | if (status != EFI_SUCCESS) | 600 | if (status != EFI_SUCCESS) { |
601 | efi_printk("Failed to handle fs_proto\n"); | ||
579 | goto free_initrds; | 602 | goto free_initrds; |
603 | } | ||
580 | 604 | ||
581 | status = efi_call_phys2(io->open_volume, io, &fh); | 605 | status = efi_call_phys2(io->open_volume, io, &fh); |
582 | if (status != EFI_SUCCESS) | 606 | if (status != EFI_SUCCESS) { |
607 | efi_printk("Failed to open volume\n"); | ||
583 | goto free_initrds; | 608 | goto free_initrds; |
609 | } | ||
584 | } | 610 | } |
585 | 611 | ||
586 | status = efi_call_phys5(fh->open, fh, &h, filename_16, | 612 | status = efi_call_phys5(fh->open, fh, &h, filename_16, |
587 | EFI_FILE_MODE_READ, (u64)0); | 613 | EFI_FILE_MODE_READ, (u64)0); |
588 | if (status != EFI_SUCCESS) | 614 | if (status != EFI_SUCCESS) { |
615 | efi_printk("Failed to open initrd file\n"); | ||
589 | goto close_handles; | 616 | goto close_handles; |
617 | } | ||
590 | 618 | ||
591 | initrd->handle = h; | 619 | initrd->handle = h; |
592 | 620 | ||
593 | info_sz = 0; | 621 | info_sz = 0; |
594 | status = efi_call_phys4(h->get_info, h, &info_guid, | 622 | status = efi_call_phys4(h->get_info, h, &info_guid, |
595 | &info_sz, NULL); | 623 | &info_sz, NULL); |
596 | if (status != EFI_BUFFER_TOO_SMALL) | 624 | if (status != EFI_BUFFER_TOO_SMALL) { |
625 | efi_printk("Failed to get initrd info size\n"); | ||
597 | goto close_handles; | 626 | goto close_handles; |
627 | } | ||
598 | 628 | ||
599 | grow: | 629 | grow: |
600 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 630 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
601 | EFI_LOADER_DATA, info_sz, &info); | 631 | EFI_LOADER_DATA, info_sz, &info); |
602 | if (status != EFI_SUCCESS) | 632 | if (status != EFI_SUCCESS) { |
633 | efi_printk("Failed to alloc mem for initrd info\n"); | ||
603 | goto close_handles; | 634 | goto close_handles; |
635 | } | ||
604 | 636 | ||
605 | status = efi_call_phys4(h->get_info, h, &info_guid, | 637 | status = efi_call_phys4(h->get_info, h, &info_guid, |
606 | &info_sz, info); | 638 | &info_sz, info); |
@@ -612,8 +644,10 @@ grow: | |||
612 | file_sz = info->file_size; | 644 | file_sz = info->file_size; |
613 | efi_call_phys1(sys_table->boottime->free_pool, info); | 645 | efi_call_phys1(sys_table->boottime->free_pool, info); |
614 | 646 | ||
615 | if (status != EFI_SUCCESS) | 647 | if (status != EFI_SUCCESS) { |
648 | efi_printk("Failed to get initrd info\n"); | ||
616 | goto close_handles; | 649 | goto close_handles; |
650 | } | ||
617 | 651 | ||
618 | initrd->size = file_sz; | 652 | initrd->size = file_sz; |
619 | initrd_total += file_sz; | 653 | initrd_total += file_sz; |
@@ -629,11 +663,14 @@ grow: | |||
629 | */ | 663 | */ |
630 | status = high_alloc(initrd_total, 0x1000, | 664 | status = high_alloc(initrd_total, 0x1000, |
631 | &initrd_addr, hdr->initrd_addr_max); | 665 | &initrd_addr, hdr->initrd_addr_max); |
632 | if (status != EFI_SUCCESS) | 666 | if (status != EFI_SUCCESS) { |
667 | efi_printk("Failed to alloc highmem for initrds\n"); | ||
633 | goto close_handles; | 668 | goto close_handles; |
669 | } | ||
634 | 670 | ||
635 | /* We've run out of free low memory. */ | 671 | /* We've run out of free low memory. */ |
636 | if (initrd_addr > hdr->initrd_addr_max) { | 672 | if (initrd_addr > hdr->initrd_addr_max) { |
673 | efi_printk("We've run out of free low memory\n"); | ||
637 | status = EFI_INVALID_PARAMETER; | 674 | status = EFI_INVALID_PARAMETER; |
638 | goto free_initrd_total; | 675 | goto free_initrd_total; |
639 | } | 676 | } |
@@ -652,8 +689,10 @@ grow: | |||
652 | status = efi_call_phys3(fh->read, | 689 | status = efi_call_phys3(fh->read, |
653 | initrds[j].handle, | 690 | initrds[j].handle, |
654 | &chunksize, addr); | 691 | &chunksize, addr); |
655 | if (status != EFI_SUCCESS) | 692 | if (status != EFI_SUCCESS) { |
693 | efi_printk("Failed to read initrd\n"); | ||
656 | goto free_initrd_total; | 694 | goto free_initrd_total; |
695 | } | ||
657 | addr += chunksize; | 696 | addr += chunksize; |
658 | size -= chunksize; | 697 | size -= chunksize; |
659 | } | 698 | } |
@@ -674,7 +713,7 @@ free_initrd_total: | |||
674 | low_free(initrd_total, initrd_addr); | 713 | low_free(initrd_total, initrd_addr); |
675 | 714 | ||
676 | close_handles: | 715 | close_handles: |
677 | for (k = j; k < nr_initrds; k++) | 716 | for (k = j; k < i; k++) |
678 | efi_call_phys1(fh->close, initrds[k].handle); | 717 | efi_call_phys1(fh->close, initrds[k].handle); |
679 | free_initrds: | 718 | free_initrds: |
680 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | 719 | efi_call_phys1(sys_table->boottime->free_pool, initrds); |
@@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params, | |||
732 | options_size++; /* NUL termination */ | 771 | options_size++; /* NUL termination */ |
733 | 772 | ||
734 | status = low_alloc(options_size, 1, &cmdline); | 773 | status = low_alloc(options_size, 1, &cmdline); |
735 | if (status != EFI_SUCCESS) | 774 | if (status != EFI_SUCCESS) { |
775 | efi_printk("Failed to alloc mem for cmdline\n"); | ||
736 | goto fail; | 776 | goto fail; |
777 | } | ||
737 | 778 | ||
738 | s1 = (u8 *)(unsigned long)cmdline; | 779 | s1 = (u8 *)(unsigned long)cmdline; |
739 | s2 = (u16 *)options; | 780 | s2 = (u16 *)options; |
@@ -895,12 +936,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
895 | 936 | ||
896 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 937 | status = efi_call_phys3(sys_table->boottime->handle_protocol, |
897 | handle, &proto, (void *)&image); | 938 | handle, &proto, (void *)&image); |
898 | if (status != EFI_SUCCESS) | 939 | if (status != EFI_SUCCESS) { |
940 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
899 | goto fail; | 941 | goto fail; |
942 | } | ||
900 | 943 | ||
901 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | 944 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); |
902 | if (status != EFI_SUCCESS) | 945 | if (status != EFI_SUCCESS) { |
946 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
903 | goto fail; | 947 | goto fail; |
948 | } | ||
904 | 949 | ||
905 | memset(boot_params, 0x0, 0x4000); | 950 | memset(boot_params, 0x0, 0x4000); |
906 | 951 | ||
@@ -933,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
933 | if (status != EFI_SUCCESS) { | 978 | if (status != EFI_SUCCESS) { |
934 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | 979 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, |
935 | &start); | 980 | &start); |
936 | if (status != EFI_SUCCESS) | 981 | if (status != EFI_SUCCESS) { |
982 | efi_printk("Failed to alloc mem for kernel\n"); | ||
937 | goto fail; | 983 | goto fail; |
984 | } | ||
938 | } | 985 | } |
939 | 986 | ||
940 | hdr->code32_start = (__u32)start; | 987 | hdr->code32_start = (__u32)start; |
@@ -945,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
945 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 992 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
946 | EFI_LOADER_DATA, sizeof(*gdt), | 993 | EFI_LOADER_DATA, sizeof(*gdt), |
947 | (void **)&gdt); | 994 | (void **)&gdt); |
948 | if (status != EFI_SUCCESS) | 995 | if (status != EFI_SUCCESS) { |
996 | efi_printk("Failed to alloc mem for gdt structure\n"); | ||
949 | goto fail; | 997 | goto fail; |
998 | } | ||
950 | 999 | ||
951 | gdt->size = 0x800; | 1000 | gdt->size = 0x800; |
952 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); | 1001 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); |
953 | if (status != EFI_SUCCESS) | 1002 | if (status != EFI_SUCCESS) { |
1003 | efi_printk("Failed to alloc mem for gdt\n"); | ||
954 | goto fail; | 1004 | goto fail; |
1005 | } | ||
955 | 1006 | ||
956 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1007 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
957 | EFI_LOADER_DATA, sizeof(*idt), | 1008 | EFI_LOADER_DATA, sizeof(*idt), |
958 | (void **)&idt); | 1009 | (void **)&idt); |
959 | if (status != EFI_SUCCESS) | 1010 | if (status != EFI_SUCCESS) { |
1011 | efi_printk("Failed to alloc mem for idt structure\n"); | ||
960 | goto fail; | 1012 | goto fail; |
1013 | } | ||
961 | 1014 | ||
962 | idt->size = 0; | 1015 | idt->size = 0; |
963 | idt->address = 0; | 1016 | idt->address = 0; |
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 39251663e65b..3b6e15627c55 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h | |||
@@ -58,4 +58,10 @@ struct efi_uga_draw_protocol { | |||
58 | void *blt; | 58 | void *blt; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | struct efi_simple_text_output_protocol { | ||
62 | void *reset; | ||
63 | void *output_string; | ||
64 | void *test_string; | ||
65 | }; | ||
66 | |||
61 | #endif /* BOOT_COMPRESSED_EBOOT_H */ | 67 | #endif /* BOOT_COMPRESSED_EBOOT_H */ |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 18d9005d9e4f..b0767bc08740 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -34,7 +34,7 @@ | |||
34 | 34 | ||
35 | #ifndef __ASSEMBLY__ | 35 | #ifndef __ASSEMBLY__ |
36 | extern void mcount(void); | 36 | extern void mcount(void); |
37 | extern int modifying_ftrace_code; | 37 | extern atomic_t modifying_ftrace_code; |
38 | 38 | ||
39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
40 | { | 40 | { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f29e70d058..6b9333b429ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) | |||
1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); | ||
1105 | |||
1104 | void debug_stack_set_zero(void) | 1106 | void debug_stack_set_zero(void) |
1105 | { | 1107 | { |
1108 | this_cpu_inc(debug_stack_use_ctr); | ||
1106 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | 1109 | load_idt((const struct desc_ptr *)&nmi_idt_descr); |
1107 | } | 1110 | } |
1108 | 1111 | ||
1109 | void debug_stack_reset(void) | 1112 | void debug_stack_reset(void) |
1110 | { | 1113 | { |
1111 | load_idt((const struct desc_ptr *)&idt_descr); | 1114 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) |
1115 | return; | ||
1116 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) | ||
1117 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1112 | } | 1118 | } |
1113 | 1119 | ||
1114 | #else /* CONFIG_X86_64 */ | 1120 | #else /* CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d02026..7d65133b51be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) | |||
191 | .endm | 191 | .endm |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * When dynamic function tracer is enabled it will add a breakpoint | ||
195 | * to all locations that it is about to modify, sync CPUs, update | ||
196 | * all the code, sync CPUs, then remove the breakpoints. In this time | ||
197 | * if lockdep is enabled, it might jump back into the debug handler | ||
198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | ||
199 | * | ||
200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | ||
201 | * make sure the stack pointer does not get reset back to the top | ||
202 | * of the debug stack, and instead just reuses the current stack. | ||
203 | */ | ||
204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | ||
205 | |||
206 | .macro TRACE_IRQS_OFF_DEBUG | ||
207 | call debug_stack_set_zero | ||
208 | TRACE_IRQS_OFF | ||
209 | call debug_stack_reset | ||
210 | .endm | ||
211 | |||
212 | .macro TRACE_IRQS_ON_DEBUG | ||
213 | call debug_stack_set_zero | ||
214 | TRACE_IRQS_ON | ||
215 | call debug_stack_reset | ||
216 | .endm | ||
217 | |||
218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | ||
219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
220 | jnc 1f | ||
221 | TRACE_IRQS_ON_DEBUG | ||
222 | 1: | ||
223 | .endm | ||
224 | |||
225 | #else | ||
226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | ||
227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | ||
228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | ||
229 | #endif | ||
230 | |||
231 | /* | ||
194 | * C code is not supposed to know about undefined top of stack. Every time | 232 | * C code is not supposed to know about undefined top of stack. Every time |
195 | * a C function with an pt_regs argument is called from the SYSCALL based | 233 | * a C function with an pt_regs argument is called from the SYSCALL based |
196 | * fast path FIXUP_TOP_OF_STACK is needed. | 234 | * fast path FIXUP_TOP_OF_STACK is needed. |
@@ -1098,7 +1136,7 @@ ENTRY(\sym) | |||
1098 | subq $ORIG_RAX-R15, %rsp | 1136 | subq $ORIG_RAX-R15, %rsp |
1099 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1100 | call save_paranoid | 1138 | call save_paranoid |
1101 | TRACE_IRQS_OFF | 1139 | TRACE_IRQS_OFF_DEBUG |
1102 | movq %rsp,%rdi /* pt_regs pointer */ | 1140 | movq %rsp,%rdi /* pt_regs pointer */ |
1103 | xorl %esi,%esi /* no error code */ | 1141 | xorl %esi,%esi /* no error code */ |
1104 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1142 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
1393 | ENTRY(paranoid_exit) | 1431 | ENTRY(paranoid_exit) |
1394 | DEFAULT_FRAME | 1432 | DEFAULT_FRAME |
1395 | DISABLE_INTERRUPTS(CLBR_NONE) | 1433 | DISABLE_INTERRUPTS(CLBR_NONE) |
1396 | TRACE_IRQS_OFF | 1434 | TRACE_IRQS_OFF_DEBUG |
1397 | testl %ebx,%ebx /* swapgs needed? */ | 1435 | testl %ebx,%ebx /* swapgs needed? */ |
1398 | jnz paranoid_restore | 1436 | jnz paranoid_restore |
1399 | testl $3,CS(%rsp) | 1437 | testl $3,CS(%rsp) |
@@ -1404,7 +1442,7 @@ paranoid_swapgs: | |||
1404 | RESTORE_ALL 8 | 1442 | RESTORE_ALL 8 |
1405 | jmp irq_return | 1443 | jmp irq_return |
1406 | paranoid_restore: | 1444 | paranoid_restore: |
1407 | TRACE_IRQS_IRETQ 0 | 1445 | TRACE_IRQS_IRETQ_DEBUG 0 |
1408 | RESTORE_ALL 8 | 1446 | RESTORE_ALL 8 |
1409 | jmp irq_return | 1447 | jmp irq_return |
1410 | paranoid_userspace: | 1448 | paranoid_userspace: |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 32ff36596ab1..c3a7cb4bf6e6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) | |||
100 | } | 100 | } |
101 | 101 | ||
102 | static int | 102 | static int |
103 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 103 | ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, |
104 | unsigned const char *new_code) | 104 | unsigned const char *new_code) |
105 | { | 105 | { |
106 | unsigned char replaced[MCOUNT_INSN_SIZE]; | 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; |
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, | |||
141 | old = ftrace_call_replace(ip, addr); | 141 | old = ftrace_call_replace(ip, addr); |
142 | new = ftrace_nop_replace(); | 142 | new = ftrace_nop_replace(); |
143 | 143 | ||
144 | return ftrace_modify_code(rec->ip, old, new); | 144 | /* |
145 | * On boot up, and when modules are loaded, the MCOUNT_ADDR | ||
146 | * is converted to a nop, and will never become MCOUNT_ADDR | ||
147 | * again. This code is either running before SMP (on boot up) | ||
148 | * or before the code will ever be executed (module load). | ||
149 | * We do not want to use the breakpoint version in this case, | ||
150 | * just modify the code directly. | ||
151 | */ | ||
152 | if (addr == MCOUNT_ADDR) | ||
153 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
154 | |||
155 | /* Normal cases use add_brk_on_nop */ | ||
156 | WARN_ONCE(1, "invalid use of ftrace_make_nop"); | ||
157 | return -EINVAL; | ||
145 | } | 158 | } |
146 | 159 | ||
147 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 160 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) |
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
152 | old = ftrace_nop_replace(); | 165 | old = ftrace_nop_replace(); |
153 | new = ftrace_call_replace(ip, addr); | 166 | new = ftrace_call_replace(ip, addr); |
154 | 167 | ||
155 | return ftrace_modify_code(rec->ip, old, new); | 168 | /* Should only be called when module is loaded */ |
169 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
156 | } | 170 | } |
157 | 171 | ||
172 | /* | ||
173 | * The modifying_ftrace_code is used to tell the breakpoint | ||
174 | * handler to call ftrace_int3_handler(). If it fails to | ||
175 | * call this handler for a breakpoint added by ftrace, then | ||
176 | * the kernel may crash. | ||
177 | * | ||
178 | * As atomic_writes on x86 do not need a barrier, we do not | ||
179 | * need to add smp_mb()s for this to work. It is also considered | ||
180 | * that we can not read the modifying_ftrace_code before | ||
181 | * executing the breakpoint. That would be quite remarkable if | ||
182 | * it could do that. Here's the flow that is required: | ||
183 | * | ||
184 | * CPU-0 CPU-1 | ||
185 | * | ||
186 | * atomic_inc(mfc); | ||
187 | * write int3s | ||
188 | * <trap-int3> // implicit (r)mb | ||
189 | * if (atomic_read(mfc)) | ||
190 | * call ftrace_int3_handler() | ||
191 | * | ||
192 | * Then when we are finished: | ||
193 | * | ||
194 | * atomic_dec(mfc); | ||
195 | * | ||
196 | * If we hit a breakpoint that was not set by ftrace, it does not | ||
197 | * matter if ftrace_int3_handler() is called or not. It will | ||
198 | * simply be ignored. But it is crucial that a ftrace nop/caller | ||
199 | * breakpoint is handled. No other user should ever place a | ||
200 | * breakpoint on an ftrace nop/caller location. It must only | ||
201 | * be done by this code. | ||
202 | */ | ||
203 | atomic_t modifying_ftrace_code __read_mostly; | ||
204 | |||
205 | static int | ||
206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
207 | unsigned const char *new_code); | ||
208 | |||
158 | int ftrace_update_ftrace_func(ftrace_func_t func) | 209 | int ftrace_update_ftrace_func(ftrace_func_t func) |
159 | { | 210 | { |
160 | unsigned long ip = (unsigned long)(&ftrace_call); | 211 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
163 | 214 | ||
164 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | 215 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); |
165 | new = ftrace_call_replace(ip, (unsigned long)func); | 216 | new = ftrace_call_replace(ip, (unsigned long)func); |
217 | |||
218 | /* See comment above by declaration of modifying_ftrace_code */ | ||
219 | atomic_inc(&modifying_ftrace_code); | ||
220 | |||
166 | ret = ftrace_modify_code(ip, old, new); | 221 | ret = ftrace_modify_code(ip, old, new); |
167 | 222 | ||
223 | atomic_dec(&modifying_ftrace_code); | ||
224 | |||
168 | return ret; | 225 | return ret; |
169 | } | 226 | } |
170 | 227 | ||
171 | int modifying_ftrace_code __read_mostly; | ||
172 | |||
173 | /* | 228 | /* |
174 | * A breakpoint was added to the code address we are about to | 229 | * A breakpoint was added to the code address we are about to |
175 | * modify, and this is the handle that will just skip over it. | 230 | * modify, and this is the handle that will just skip over it. |
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) | |||
489 | } | 544 | } |
490 | } | 545 | } |
491 | 546 | ||
547 | static int | ||
548 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
549 | unsigned const char *new_code) | ||
550 | { | ||
551 | int ret; | ||
552 | |||
553 | ret = add_break(ip, old_code); | ||
554 | if (ret) | ||
555 | goto out; | ||
556 | |||
557 | run_sync(); | ||
558 | |||
559 | ret = add_update_code(ip, new_code); | ||
560 | if (ret) | ||
561 | goto fail_update; | ||
562 | |||
563 | run_sync(); | ||
564 | |||
565 | ret = ftrace_write(ip, new_code, 1); | ||
566 | if (ret) { | ||
567 | ret = -EPERM; | ||
568 | goto out; | ||
569 | } | ||
570 | run_sync(); | ||
571 | out: | ||
572 | return ret; | ||
573 | |||
574 | fail_update: | ||
575 | probe_kernel_write((void *)ip, &old_code[0], 1); | ||
576 | goto out; | ||
577 | } | ||
578 | |||
492 | void arch_ftrace_update_code(int command) | 579 | void arch_ftrace_update_code(int command) |
493 | { | 580 | { |
494 | modifying_ftrace_code++; | 581 | /* See comment above by declaration of modifying_ftrace_code */ |
582 | atomic_inc(&modifying_ftrace_code); | ||
495 | 583 | ||
496 | ftrace_modify_all_code(command); | 584 | ftrace_modify_all_code(command); |
497 | 585 | ||
498 | modifying_ftrace_code--; | 586 | atomic_dec(&modifying_ftrace_code); |
499 | } | 587 | } |
500 | 588 | ||
501 | int __init ftrace_dyn_arch_init(void *data) | 589 | int __init ftrace_dyn_arch_init(void *data) |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 90875279ef3d..a0b2f84457be 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
444 | */ | 444 | */ |
445 | if (unlikely(is_debug_stack(regs->sp))) { | 445 | if (unlikely(is_debug_stack(regs->sp))) { |
446 | debug_stack_set_zero(); | 446 | debug_stack_set_zero(); |
447 | __get_cpu_var(update_debug_stack) = 1; | 447 | this_cpu_write(update_debug_stack, 1); |
448 | } | 448 | } |
449 | } | 449 | } |
450 | 450 | ||
451 | static inline void nmi_nesting_postprocess(void) | 451 | static inline void nmi_nesting_postprocess(void) |
452 | { | 452 | { |
453 | if (unlikely(__get_cpu_var(update_debug_stack))) | 453 | if (unlikely(this_cpu_read(update_debug_stack))) { |
454 | debug_stack_reset(); | 454 | debug_stack_reset(); |
455 | this_cpu_write(update_debug_stack, 0); | ||
456 | } | ||
455 | } | 457 | } |
456 | #endif | 458 | #endif |
457 | 459 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 13b1990c7c58..c4c6a5c2bf0f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child, | |||
1211 | 0, sizeof(struct user_i387_struct), | 1211 | 0, sizeof(struct user_i387_struct), |
1212 | datap); | 1212 | datap); |
1213 | 1213 | ||
1214 | /* normal 64bit interface to access TLS data. | ||
1215 | Works just like arch_prctl, except that the arguments | ||
1216 | are reversed. */ | ||
1217 | case PTRACE_ARCH_PRCTL: | ||
1218 | return do_arch_prctl(child, data, addr); | ||
1219 | |||
1220 | default: | 1214 | default: |
1221 | return compat_ptrace_request(child, request, addr, data); | 1215 | return compat_ptrace_request(child, request, addr, data); |
1222 | } | 1216 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff08457a025d..05b31d92f69c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -303,8 +303,12 @@ gp_in_kernel: | |||
303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) | 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
304 | { | 304 | { |
305 | #ifdef CONFIG_DYNAMIC_FTRACE | 305 | #ifdef CONFIG_DYNAMIC_FTRACE |
306 | /* ftrace must be first, everything else may cause a recursive crash */ | 306 | /* |
307 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | 307 | * ftrace must be first, everything else may cause a recursive crash. |
308 | * See note by declaration of modifying_ftrace_code in ftrace.c | ||
309 | */ | ||
310 | if (unlikely(atomic_read(&modifying_ftrace_code)) && | ||
311 | ftrace_int3_handler(regs)) | ||
308 | return; | 312 | return; |
309 | #endif | 313 | #endif |
310 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 314 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |