diff options
| -rw-r--r-- | Documentation/x86/efi-stub.txt | 65 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/boot/compressed/eboot.c | 87 | ||||
| -rw-r--r-- | arch/x86/boot/compressed/eboot.h | 6 | ||||
| -rw-r--r-- | arch/x86/include/asm/ftrace.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/entry_64.S | 44 | ||||
| -rw-r--r-- | arch/x86/kernel/ftrace.c | 102 | ||||
| -rw-r--r-- | arch/x86/kernel/nmi.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/ptrace.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 8 |
11 files changed, 297 insertions, 39 deletions
diff --git a/Documentation/x86/efi-stub.txt b/Documentation/x86/efi-stub.txt new file mode 100644 index 000000000000..44e6bb6ead10 --- /dev/null +++ b/Documentation/x86/efi-stub.txt | |||
| @@ -0,0 +1,65 @@ | |||
| 1 | The EFI Boot Stub | ||
| 2 | --------------------------- | ||
| 3 | |||
| 4 | On the x86 platform, a bzImage can masquerade as a PE/COFF image, | ||
| 5 | thereby convincing EFI firmware loaders to load it as an EFI | ||
| 6 | executable. The code that modifies the bzImage header, along with the | ||
| 7 | EFI-specific entry point that the firmware loader jumps to are | ||
| 8 | collectively known as the "EFI boot stub", and live in | ||
| 9 | arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c, | ||
| 10 | respectively. | ||
| 11 | |||
| 12 | By using the EFI boot stub it's possible to boot a Linux kernel | ||
| 13 | without the use of a conventional EFI boot loader, such as grub or | ||
| 14 | elilo. Since the EFI boot stub performs the jobs of a boot loader, in | ||
| 15 | a certain sense it *IS* the boot loader. | ||
| 16 | |||
| 17 | The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option. | ||
| 18 | |||
| 19 | |||
| 20 | **** How to install bzImage.efi | ||
| 21 | |||
| 22 | The bzImage located in arch/x86/boot/bzImage must be copied to the EFI | ||
| 23 | System Partiion (ESP) and renamed with the extension ".efi". Without | ||
| 24 | the extension the EFI firmware loader will refuse to execute it. It's | ||
| 25 | not possible to execute bzImage.efi from the usual Linux file systems | ||
| 26 | because EFI firmware doesn't have support for them. | ||
| 27 | |||
| 28 | |||
| 29 | **** Passing kernel parameters from the EFI shell | ||
| 30 | |||
| 31 | Arguments to the kernel can be passed after bzImage.efi, e.g. | ||
| 32 | |||
| 33 | fs0:> bzImage.efi console=ttyS0 root=/dev/sda4 | ||
| 34 | |||
| 35 | |||
| 36 | **** The "initrd=" option | ||
| 37 | |||
| 38 | Like most boot loaders, the EFI stub allows the user to specify | ||
| 39 | multiple initrd files using the "initrd=" option. This is the only EFI | ||
| 40 | stub-specific command line parameter, everything else is passed to the | ||
| 41 | kernel when it boots. | ||
| 42 | |||
| 43 | The path to the initrd file must be an absolute path from the | ||
| 44 | beginning of the ESP, relative path names do not work. Also, the path | ||
| 45 | is an EFI-style path and directory elements must be separated with | ||
| 46 | backslashes (\). For example, given the following directory layout, | ||
| 47 | |||
| 48 | fs0:> | ||
| 49 | Kernels\ | ||
| 50 | bzImage.efi | ||
| 51 | initrd-large.img | ||
| 52 | |||
| 53 | Ramdisks\ | ||
| 54 | initrd-small.img | ||
| 55 | initrd-medium.img | ||
| 56 | |||
| 57 | to boot with the initrd-large.img file if the current working | ||
| 58 | directory is fs0:\Kernels, the following command must be used, | ||
| 59 | |||
| 60 | fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img | ||
| 61 | |||
| 62 | Notice how bzImage.efi can be specified with a relative path. That's | ||
| 63 | because the image we're executing is interpreted by the EFI shell, | ||
| 64 | which understands relative paths, whereas the rest of the command line | ||
| 65 | is passed to bzImage.efi. | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d700811785ea..c70684f859e1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -1506,6 +1506,8 @@ config EFI_STUB | |||
| 1506 | This kernel feature allows a bzImage to be loaded directly | 1506 | This kernel feature allows a bzImage to be loaded directly |
| 1507 | by EFI firmware without the use of a bootloader. | 1507 | by EFI firmware without the use of a bootloader. |
| 1508 | 1508 | ||
| 1509 | See Documentation/x86/efi-stub.txt for more information. | ||
| 1510 | |||
| 1509 | config SECCOMP | 1511 | config SECCOMP |
| 1510 | def_bool y | 1512 | def_bool y |
| 1511 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1513 | prompt "Enable seccomp to safely compute untrusted bytecode" |
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c14e76bb4c7..4e85f5f85837 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
| @@ -16,6 +16,26 @@ | |||
| 16 | 16 | ||
| 17 | static efi_system_table_t *sys_table; | 17 | static efi_system_table_t *sys_table; |
| 18 | 18 | ||
| 19 | static void efi_printk(char *str) | ||
| 20 | { | ||
| 21 | char *s8; | ||
| 22 | |||
| 23 | for (s8 = str; *s8; s8++) { | ||
| 24 | struct efi_simple_text_output_protocol *out; | ||
| 25 | efi_char16_t ch[2] = { 0 }; | ||
| 26 | |||
| 27 | ch[0] = *s8; | ||
| 28 | out = (struct efi_simple_text_output_protocol *)sys_table->con_out; | ||
| 29 | |||
| 30 | if (*s8 == '\n') { | ||
| 31 | efi_char16_t nl[2] = { '\r', 0 }; | ||
| 32 | efi_call_phys2(out->output_string, out, nl); | ||
| 33 | } | ||
| 34 | |||
| 35 | efi_call_phys2(out->output_string, out, ch); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 19 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, | 39 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, |
| 20 | unsigned long *desc_size) | 40 | unsigned long *desc_size) |
| 21 | { | 41 | { |
| @@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | |||
| 531 | EFI_LOADER_DATA, | 551 | EFI_LOADER_DATA, |
| 532 | nr_initrds * sizeof(*initrds), | 552 | nr_initrds * sizeof(*initrds), |
| 533 | &initrds); | 553 | &initrds); |
| 534 | if (status != EFI_SUCCESS) | 554 | if (status != EFI_SUCCESS) { |
| 555 | efi_printk("Failed to alloc mem for initrds\n"); | ||
| 535 | goto fail; | 556 | goto fail; |
| 557 | } | ||
| 536 | 558 | ||
| 537 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | 559 | str = (char *)(unsigned long)hdr->cmd_line_ptr; |
| 538 | for (i = 0; i < nr_initrds; i++) { | 560 | for (i = 0; i < nr_initrds; i++) { |
| @@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | |||
| 575 | 597 | ||
| 576 | status = efi_call_phys3(boottime->handle_protocol, | 598 | status = efi_call_phys3(boottime->handle_protocol, |
| 577 | image->device_handle, &fs_proto, &io); | 599 | image->device_handle, &fs_proto, &io); |
| 578 | if (status != EFI_SUCCESS) | 600 | if (status != EFI_SUCCESS) { |
| 601 | efi_printk("Failed to handle fs_proto\n"); | ||
| 579 | goto free_initrds; | 602 | goto free_initrds; |
| 603 | } | ||
| 580 | 604 | ||
| 581 | status = efi_call_phys2(io->open_volume, io, &fh); | 605 | status = efi_call_phys2(io->open_volume, io, &fh); |
| 582 | if (status != EFI_SUCCESS) | 606 | if (status != EFI_SUCCESS) { |
| 607 | efi_printk("Failed to open volume\n"); | ||
| 583 | goto free_initrds; | 608 | goto free_initrds; |
| 609 | } | ||
| 584 | } | 610 | } |
| 585 | 611 | ||
| 586 | status = efi_call_phys5(fh->open, fh, &h, filename_16, | 612 | status = efi_call_phys5(fh->open, fh, &h, filename_16, |
| 587 | EFI_FILE_MODE_READ, (u64)0); | 613 | EFI_FILE_MODE_READ, (u64)0); |
| 588 | if (status != EFI_SUCCESS) | 614 | if (status != EFI_SUCCESS) { |
| 615 | efi_printk("Failed to open initrd file\n"); | ||
| 589 | goto close_handles; | 616 | goto close_handles; |
| 617 | } | ||
| 590 | 618 | ||
| 591 | initrd->handle = h; | 619 | initrd->handle = h; |
| 592 | 620 | ||
| 593 | info_sz = 0; | 621 | info_sz = 0; |
| 594 | status = efi_call_phys4(h->get_info, h, &info_guid, | 622 | status = efi_call_phys4(h->get_info, h, &info_guid, |
| 595 | &info_sz, NULL); | 623 | &info_sz, NULL); |
| 596 | if (status != EFI_BUFFER_TOO_SMALL) | 624 | if (status != EFI_BUFFER_TOO_SMALL) { |
| 625 | efi_printk("Failed to get initrd info size\n"); | ||
| 597 | goto close_handles; | 626 | goto close_handles; |
| 627 | } | ||
| 598 | 628 | ||
| 599 | grow: | 629 | grow: |
| 600 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 630 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
| 601 | EFI_LOADER_DATA, info_sz, &info); | 631 | EFI_LOADER_DATA, info_sz, &info); |
| 602 | if (status != EFI_SUCCESS) | 632 | if (status != EFI_SUCCESS) { |
| 633 | efi_printk("Failed to alloc mem for initrd info\n"); | ||
| 603 | goto close_handles; | 634 | goto close_handles; |
| 635 | } | ||
| 604 | 636 | ||
| 605 | status = efi_call_phys4(h->get_info, h, &info_guid, | 637 | status = efi_call_phys4(h->get_info, h, &info_guid, |
| 606 | &info_sz, info); | 638 | &info_sz, info); |
| @@ -612,8 +644,10 @@ grow: | |||
| 612 | file_sz = info->file_size; | 644 | file_sz = info->file_size; |
| 613 | efi_call_phys1(sys_table->boottime->free_pool, info); | 645 | efi_call_phys1(sys_table->boottime->free_pool, info); |
| 614 | 646 | ||
| 615 | if (status != EFI_SUCCESS) | 647 | if (status != EFI_SUCCESS) { |
| 648 | efi_printk("Failed to get initrd info\n"); | ||
| 616 | goto close_handles; | 649 | goto close_handles; |
| 650 | } | ||
| 617 | 651 | ||
| 618 | initrd->size = file_sz; | 652 | initrd->size = file_sz; |
| 619 | initrd_total += file_sz; | 653 | initrd_total += file_sz; |
| @@ -629,11 +663,14 @@ grow: | |||
| 629 | */ | 663 | */ |
| 630 | status = high_alloc(initrd_total, 0x1000, | 664 | status = high_alloc(initrd_total, 0x1000, |
| 631 | &initrd_addr, hdr->initrd_addr_max); | 665 | &initrd_addr, hdr->initrd_addr_max); |
| 632 | if (status != EFI_SUCCESS) | 666 | if (status != EFI_SUCCESS) { |
| 667 | efi_printk("Failed to alloc highmem for initrds\n"); | ||
| 633 | goto close_handles; | 668 | goto close_handles; |
| 669 | } | ||
| 634 | 670 | ||
| 635 | /* We've run out of free low memory. */ | 671 | /* We've run out of free low memory. */ |
| 636 | if (initrd_addr > hdr->initrd_addr_max) { | 672 | if (initrd_addr > hdr->initrd_addr_max) { |
| 673 | efi_printk("We've run out of free low memory\n"); | ||
| 637 | status = EFI_INVALID_PARAMETER; | 674 | status = EFI_INVALID_PARAMETER; |
| 638 | goto free_initrd_total; | 675 | goto free_initrd_total; |
| 639 | } | 676 | } |
| @@ -652,8 +689,10 @@ grow: | |||
| 652 | status = efi_call_phys3(fh->read, | 689 | status = efi_call_phys3(fh->read, |
| 653 | initrds[j].handle, | 690 | initrds[j].handle, |
| 654 | &chunksize, addr); | 691 | &chunksize, addr); |
| 655 | if (status != EFI_SUCCESS) | 692 | if (status != EFI_SUCCESS) { |
| 693 | efi_printk("Failed to read initrd\n"); | ||
| 656 | goto free_initrd_total; | 694 | goto free_initrd_total; |
| 695 | } | ||
| 657 | addr += chunksize; | 696 | addr += chunksize; |
| 658 | size -= chunksize; | 697 | size -= chunksize; |
| 659 | } | 698 | } |
| @@ -674,7 +713,7 @@ free_initrd_total: | |||
| 674 | low_free(initrd_total, initrd_addr); | 713 | low_free(initrd_total, initrd_addr); |
| 675 | 714 | ||
| 676 | close_handles: | 715 | close_handles: |
| 677 | for (k = j; k < nr_initrds; k++) | 716 | for (k = j; k < i; k++) |
| 678 | efi_call_phys1(fh->close, initrds[k].handle); | 717 | efi_call_phys1(fh->close, initrds[k].handle); |
| 679 | free_initrds: | 718 | free_initrds: |
| 680 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | 719 | efi_call_phys1(sys_table->boottime->free_pool, initrds); |
| @@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params, | |||
| 732 | options_size++; /* NUL termination */ | 771 | options_size++; /* NUL termination */ |
| 733 | 772 | ||
| 734 | status = low_alloc(options_size, 1, &cmdline); | 773 | status = low_alloc(options_size, 1, &cmdline); |
| 735 | if (status != EFI_SUCCESS) | 774 | if (status != EFI_SUCCESS) { |
| 775 | efi_printk("Failed to alloc mem for cmdline\n"); | ||
| 736 | goto fail; | 776 | goto fail; |
| 777 | } | ||
| 737 | 778 | ||
| 738 | s1 = (u8 *)(unsigned long)cmdline; | 779 | s1 = (u8 *)(unsigned long)cmdline; |
| 739 | s2 = (u16 *)options; | 780 | s2 = (u16 *)options; |
| @@ -895,12 +936,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
| 895 | 936 | ||
| 896 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 937 | status = efi_call_phys3(sys_table->boottime->handle_protocol, |
| 897 | handle, &proto, (void *)&image); | 938 | handle, &proto, (void *)&image); |
| 898 | if (status != EFI_SUCCESS) | 939 | if (status != EFI_SUCCESS) { |
| 940 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
| 899 | goto fail; | 941 | goto fail; |
| 942 | } | ||
| 900 | 943 | ||
| 901 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | 944 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); |
| 902 | if (status != EFI_SUCCESS) | 945 | if (status != EFI_SUCCESS) { |
| 946 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
| 903 | goto fail; | 947 | goto fail; |
| 948 | } | ||
| 904 | 949 | ||
| 905 | memset(boot_params, 0x0, 0x4000); | 950 | memset(boot_params, 0x0, 0x4000); |
| 906 | 951 | ||
| @@ -933,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
| 933 | if (status != EFI_SUCCESS) { | 978 | if (status != EFI_SUCCESS) { |
| 934 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | 979 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, |
| 935 | &start); | 980 | &start); |
| 936 | if (status != EFI_SUCCESS) | 981 | if (status != EFI_SUCCESS) { |
| 982 | efi_printk("Failed to alloc mem for kernel\n"); | ||
| 937 | goto fail; | 983 | goto fail; |
| 984 | } | ||
| 938 | } | 985 | } |
| 939 | 986 | ||
| 940 | hdr->code32_start = (__u32)start; | 987 | hdr->code32_start = (__u32)start; |
| @@ -945,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
| 945 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 992 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
| 946 | EFI_LOADER_DATA, sizeof(*gdt), | 993 | EFI_LOADER_DATA, sizeof(*gdt), |
| 947 | (void **)&gdt); | 994 | (void **)&gdt); |
| 948 | if (status != EFI_SUCCESS) | 995 | if (status != EFI_SUCCESS) { |
| 996 | efi_printk("Failed to alloc mem for gdt structure\n"); | ||
| 949 | goto fail; | 997 | goto fail; |
| 998 | } | ||
| 950 | 999 | ||
| 951 | gdt->size = 0x800; | 1000 | gdt->size = 0x800; |
| 952 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); | 1001 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); |
| 953 | if (status != EFI_SUCCESS) | 1002 | if (status != EFI_SUCCESS) { |
| 1003 | efi_printk("Failed to alloc mem for gdt\n"); | ||
| 954 | goto fail; | 1004 | goto fail; |
| 1005 | } | ||
| 955 | 1006 | ||
| 956 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1007 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
| 957 | EFI_LOADER_DATA, sizeof(*idt), | 1008 | EFI_LOADER_DATA, sizeof(*idt), |
| 958 | (void **)&idt); | 1009 | (void **)&idt); |
| 959 | if (status != EFI_SUCCESS) | 1010 | if (status != EFI_SUCCESS) { |
| 1011 | efi_printk("Failed to alloc mem for idt structure\n"); | ||
| 960 | goto fail; | 1012 | goto fail; |
| 1013 | } | ||
| 961 | 1014 | ||
| 962 | idt->size = 0; | 1015 | idt->size = 0; |
| 963 | idt->address = 0; | 1016 | idt->address = 0; |
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 39251663e65b..3b6e15627c55 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h | |||
| @@ -58,4 +58,10 @@ struct efi_uga_draw_protocol { | |||
| 58 | void *blt; | 58 | void *blt; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | struct efi_simple_text_output_protocol { | ||
| 62 | void *reset; | ||
| 63 | void *output_string; | ||
| 64 | void *test_string; | ||
| 65 | }; | ||
| 66 | |||
| 61 | #endif /* BOOT_COMPRESSED_EBOOT_H */ | 67 | #endif /* BOOT_COMPRESSED_EBOOT_H */ |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 18d9005d9e4f..b0767bc08740 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | 34 | ||
| 35 | #ifndef __ASSEMBLY__ | 35 | #ifndef __ASSEMBLY__ |
| 36 | extern void mcount(void); | 36 | extern void mcount(void); |
| 37 | extern int modifying_ftrace_code; | 37 | extern atomic_t modifying_ftrace_code; |
| 38 | 38 | ||
| 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
| 40 | { | 40 | { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f29e70d058..6b9333b429ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) | |||
| 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); |
| 1102 | } | 1102 | } |
| 1103 | 1103 | ||
| 1104 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); | ||
| 1105 | |||
| 1104 | void debug_stack_set_zero(void) | 1106 | void debug_stack_set_zero(void) |
| 1105 | { | 1107 | { |
| 1108 | this_cpu_inc(debug_stack_use_ctr); | ||
| 1106 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | 1109 | load_idt((const struct desc_ptr *)&nmi_idt_descr); |
| 1107 | } | 1110 | } |
| 1108 | 1111 | ||
| 1109 | void debug_stack_reset(void) | 1112 | void debug_stack_reset(void) |
| 1110 | { | 1113 | { |
| 1111 | load_idt((const struct desc_ptr *)&idt_descr); | 1114 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) |
| 1115 | return; | ||
| 1116 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) | ||
| 1117 | load_idt((const struct desc_ptr *)&idt_descr); | ||
| 1112 | } | 1118 | } |
| 1113 | 1119 | ||
| 1114 | #else /* CONFIG_X86_64 */ | 1120 | #else /* CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d02026..7d65133b51be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) | |||
| 191 | .endm | 191 | .endm |
| 192 | 192 | ||
| 193 | /* | 193 | /* |
| 194 | * When dynamic function tracer is enabled it will add a breakpoint | ||
| 195 | * to all locations that it is about to modify, sync CPUs, update | ||
| 196 | * all the code, sync CPUs, then remove the breakpoints. In this time | ||
| 197 | * if lockdep is enabled, it might jump back into the debug handler | ||
| 198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | ||
| 199 | * | ||
| 200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | ||
| 201 | * make sure the stack pointer does not get reset back to the top | ||
| 202 | * of the debug stack, and instead just reuses the current stack. | ||
| 203 | */ | ||
| 204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | ||
| 205 | |||
| 206 | .macro TRACE_IRQS_OFF_DEBUG | ||
| 207 | call debug_stack_set_zero | ||
| 208 | TRACE_IRQS_OFF | ||
| 209 | call debug_stack_reset | ||
| 210 | .endm | ||
| 211 | |||
| 212 | .macro TRACE_IRQS_ON_DEBUG | ||
| 213 | call debug_stack_set_zero | ||
| 214 | TRACE_IRQS_ON | ||
| 215 | call debug_stack_reset | ||
| 216 | .endm | ||
| 217 | |||
| 218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | ||
| 219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
| 220 | jnc 1f | ||
| 221 | TRACE_IRQS_ON_DEBUG | ||
| 222 | 1: | ||
| 223 | .endm | ||
| 224 | |||
| 225 | #else | ||
| 226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | ||
| 227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | ||
| 228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | ||
| 229 | #endif | ||
| 230 | |||
| 231 | /* | ||
| 194 | * C code is not supposed to know about undefined top of stack. Every time | 232 | * C code is not supposed to know about undefined top of stack. Every time |
| 195 | * a C function with an pt_regs argument is called from the SYSCALL based | 233 | * a C function with an pt_regs argument is called from the SYSCALL based |
| 196 | * fast path FIXUP_TOP_OF_STACK is needed. | 234 | * fast path FIXUP_TOP_OF_STACK is needed. |
| @@ -1098,7 +1136,7 @@ ENTRY(\sym) | |||
| 1098 | subq $ORIG_RAX-R15, %rsp | 1136 | subq $ORIG_RAX-R15, %rsp |
| 1099 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1100 | call save_paranoid | 1138 | call save_paranoid |
| 1101 | TRACE_IRQS_OFF | 1139 | TRACE_IRQS_OFF_DEBUG |
| 1102 | movq %rsp,%rdi /* pt_regs pointer */ | 1140 | movq %rsp,%rdi /* pt_regs pointer */ |
| 1103 | xorl %esi,%esi /* no error code */ | 1141 | xorl %esi,%esi /* no error code */ |
| 1104 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1142 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
| @@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
| 1393 | ENTRY(paranoid_exit) | 1431 | ENTRY(paranoid_exit) |
| 1394 | DEFAULT_FRAME | 1432 | DEFAULT_FRAME |
| 1395 | DISABLE_INTERRUPTS(CLBR_NONE) | 1433 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 1396 | TRACE_IRQS_OFF | 1434 | TRACE_IRQS_OFF_DEBUG |
| 1397 | testl %ebx,%ebx /* swapgs needed? */ | 1435 | testl %ebx,%ebx /* swapgs needed? */ |
| 1398 | jnz paranoid_restore | 1436 | jnz paranoid_restore |
| 1399 | testl $3,CS(%rsp) | 1437 | testl $3,CS(%rsp) |
| @@ -1404,7 +1442,7 @@ paranoid_swapgs: | |||
| 1404 | RESTORE_ALL 8 | 1442 | RESTORE_ALL 8 |
| 1405 | jmp irq_return | 1443 | jmp irq_return |
| 1406 | paranoid_restore: | 1444 | paranoid_restore: |
| 1407 | TRACE_IRQS_IRETQ 0 | 1445 | TRACE_IRQS_IRETQ_DEBUG 0 |
| 1408 | RESTORE_ALL 8 | 1446 | RESTORE_ALL 8 |
| 1409 | jmp irq_return | 1447 | jmp irq_return |
| 1410 | paranoid_userspace: | 1448 | paranoid_userspace: |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 32ff36596ab1..c3a7cb4bf6e6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | static int | 102 | static int |
| 103 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 103 | ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, |
| 104 | unsigned const char *new_code) | 104 | unsigned const char *new_code) |
| 105 | { | 105 | { |
| 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; | 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; |
| @@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, | |||
| 141 | old = ftrace_call_replace(ip, addr); | 141 | old = ftrace_call_replace(ip, addr); |
| 142 | new = ftrace_nop_replace(); | 142 | new = ftrace_nop_replace(); |
| 143 | 143 | ||
| 144 | return ftrace_modify_code(rec->ip, old, new); | 144 | /* |
| 145 | * On boot up, and when modules are loaded, the MCOUNT_ADDR | ||
| 146 | * is converted to a nop, and will never become MCOUNT_ADDR | ||
| 147 | * again. This code is either running before SMP (on boot up) | ||
| 148 | * or before the code will ever be executed (module load). | ||
| 149 | * We do not want to use the breakpoint version in this case, | ||
| 150 | * just modify the code directly. | ||
| 151 | */ | ||
| 152 | if (addr == MCOUNT_ADDR) | ||
| 153 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
| 154 | |||
| 155 | /* Normal cases use add_brk_on_nop */ | ||
| 156 | WARN_ONCE(1, "invalid use of ftrace_make_nop"); | ||
| 157 | return -EINVAL; | ||
| 145 | } | 158 | } |
| 146 | 159 | ||
| 147 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 160 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) |
| @@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
| 152 | old = ftrace_nop_replace(); | 165 | old = ftrace_nop_replace(); |
| 153 | new = ftrace_call_replace(ip, addr); | 166 | new = ftrace_call_replace(ip, addr); |
| 154 | 167 | ||
| 155 | return ftrace_modify_code(rec->ip, old, new); | 168 | /* Should only be called when module is loaded */ |
| 169 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
| 156 | } | 170 | } |
| 157 | 171 | ||
| 172 | /* | ||
| 173 | * The modifying_ftrace_code is used to tell the breakpoint | ||
| 174 | * handler to call ftrace_int3_handler(). If it fails to | ||
| 175 | * call this handler for a breakpoint added by ftrace, then | ||
| 176 | * the kernel may crash. | ||
| 177 | * | ||
| 178 | * As atomic_writes on x86 do not need a barrier, we do not | ||
| 179 | * need to add smp_mb()s for this to work. It is also considered | ||
| 180 | * that we can not read the modifying_ftrace_code before | ||
| 181 | * executing the breakpoint. That would be quite remarkable if | ||
| 182 | * it could do that. Here's the flow that is required: | ||
| 183 | * | ||
| 184 | * CPU-0 CPU-1 | ||
| 185 | * | ||
| 186 | * atomic_inc(mfc); | ||
| 187 | * write int3s | ||
| 188 | * <trap-int3> // implicit (r)mb | ||
| 189 | * if (atomic_read(mfc)) | ||
| 190 | * call ftrace_int3_handler() | ||
| 191 | * | ||
| 192 | * Then when we are finished: | ||
| 193 | * | ||
| 194 | * atomic_dec(mfc); | ||
| 195 | * | ||
| 196 | * If we hit a breakpoint that was not set by ftrace, it does not | ||
| 197 | * matter if ftrace_int3_handler() is called or not. It will | ||
| 198 | * simply be ignored. But it is crucial that a ftrace nop/caller | ||
| 199 | * breakpoint is handled. No other user should ever place a | ||
| 200 | * breakpoint on an ftrace nop/caller location. It must only | ||
| 201 | * be done by this code. | ||
| 202 | */ | ||
| 203 | atomic_t modifying_ftrace_code __read_mostly; | ||
| 204 | |||
| 205 | static int | ||
| 206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
| 207 | unsigned const char *new_code); | ||
| 208 | |||
| 158 | int ftrace_update_ftrace_func(ftrace_func_t func) | 209 | int ftrace_update_ftrace_func(ftrace_func_t func) |
| 159 | { | 210 | { |
| 160 | unsigned long ip = (unsigned long)(&ftrace_call); | 211 | unsigned long ip = (unsigned long)(&ftrace_call); |
| @@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
| 163 | 214 | ||
| 164 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | 215 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); |
| 165 | new = ftrace_call_replace(ip, (unsigned long)func); | 216 | new = ftrace_call_replace(ip, (unsigned long)func); |
| 217 | |||
| 218 | /* See comment above by declaration of modifying_ftrace_code */ | ||
| 219 | atomic_inc(&modifying_ftrace_code); | ||
| 220 | |||
| 166 | ret = ftrace_modify_code(ip, old, new); | 221 | ret = ftrace_modify_code(ip, old, new); |
| 167 | 222 | ||
| 223 | atomic_dec(&modifying_ftrace_code); | ||
| 224 | |||
| 168 | return ret; | 225 | return ret; |
| 169 | } | 226 | } |
| 170 | 227 | ||
| 171 | int modifying_ftrace_code __read_mostly; | ||
| 172 | |||
| 173 | /* | 228 | /* |
| 174 | * A breakpoint was added to the code address we are about to | 229 | * A breakpoint was added to the code address we are about to |
| 175 | * modify, and this is the handle that will just skip over it. | 230 | * modify, and this is the handle that will just skip over it. |
| @@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) | |||
| 489 | } | 544 | } |
| 490 | } | 545 | } |
| 491 | 546 | ||
| 547 | static int | ||
| 548 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
| 549 | unsigned const char *new_code) | ||
| 550 | { | ||
| 551 | int ret; | ||
| 552 | |||
| 553 | ret = add_break(ip, old_code); | ||
| 554 | if (ret) | ||
| 555 | goto out; | ||
| 556 | |||
| 557 | run_sync(); | ||
| 558 | |||
| 559 | ret = add_update_code(ip, new_code); | ||
| 560 | if (ret) | ||
| 561 | goto fail_update; | ||
| 562 | |||
| 563 | run_sync(); | ||
| 564 | |||
| 565 | ret = ftrace_write(ip, new_code, 1); | ||
| 566 | if (ret) { | ||
| 567 | ret = -EPERM; | ||
| 568 | goto out; | ||
| 569 | } | ||
| 570 | run_sync(); | ||
| 571 | out: | ||
| 572 | return ret; | ||
| 573 | |||
| 574 | fail_update: | ||
| 575 | probe_kernel_write((void *)ip, &old_code[0], 1); | ||
| 576 | goto out; | ||
| 577 | } | ||
| 578 | |||
| 492 | void arch_ftrace_update_code(int command) | 579 | void arch_ftrace_update_code(int command) |
| 493 | { | 580 | { |
| 494 | modifying_ftrace_code++; | 581 | /* See comment above by declaration of modifying_ftrace_code */ |
| 582 | atomic_inc(&modifying_ftrace_code); | ||
| 495 | 583 | ||
| 496 | ftrace_modify_all_code(command); | 584 | ftrace_modify_all_code(command); |
| 497 | 585 | ||
| 498 | modifying_ftrace_code--; | 586 | atomic_dec(&modifying_ftrace_code); |
| 499 | } | 587 | } |
| 500 | 588 | ||
| 501 | int __init ftrace_dyn_arch_init(void *data) | 589 | int __init ftrace_dyn_arch_init(void *data) |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 90875279ef3d..a0b2f84457be 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
| @@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
| 444 | */ | 444 | */ |
| 445 | if (unlikely(is_debug_stack(regs->sp))) { | 445 | if (unlikely(is_debug_stack(regs->sp))) { |
| 446 | debug_stack_set_zero(); | 446 | debug_stack_set_zero(); |
| 447 | __get_cpu_var(update_debug_stack) = 1; | 447 | this_cpu_write(update_debug_stack, 1); |
| 448 | } | 448 | } |
| 449 | } | 449 | } |
| 450 | 450 | ||
| 451 | static inline void nmi_nesting_postprocess(void) | 451 | static inline void nmi_nesting_postprocess(void) |
| 452 | { | 452 | { |
| 453 | if (unlikely(__get_cpu_var(update_debug_stack))) | 453 | if (unlikely(this_cpu_read(update_debug_stack))) { |
| 454 | debug_stack_reset(); | 454 | debug_stack_reset(); |
| 455 | this_cpu_write(update_debug_stack, 0); | ||
| 456 | } | ||
| 455 | } | 457 | } |
| 456 | #endif | 458 | #endif |
| 457 | 459 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 13b1990c7c58..c4c6a5c2bf0f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child, | |||
| 1211 | 0, sizeof(struct user_i387_struct), | 1211 | 0, sizeof(struct user_i387_struct), |
| 1212 | datap); | 1212 | datap); |
| 1213 | 1213 | ||
| 1214 | /* normal 64bit interface to access TLS data. | ||
| 1215 | Works just like arch_prctl, except that the arguments | ||
| 1216 | are reversed. */ | ||
| 1217 | case PTRACE_ARCH_PRCTL: | ||
| 1218 | return do_arch_prctl(child, data, addr); | ||
| 1219 | |||
| 1220 | default: | 1214 | default: |
| 1221 | return compat_ptrace_request(child, request, addr, data); | 1215 | return compat_ptrace_request(child, request, addr, data); |
| 1222 | } | 1216 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff08457a025d..05b31d92f69c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -303,8 +303,12 @@ gp_in_kernel: | |||
| 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) | 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
| 304 | { | 304 | { |
| 305 | #ifdef CONFIG_DYNAMIC_FTRACE | 305 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 306 | /* ftrace must be first, everything else may cause a recursive crash */ | 306 | /* |
| 307 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | 307 | * ftrace must be first, everything else may cause a recursive crash. |
| 308 | * See note by declaration of modifying_ftrace_code in ftrace.c | ||
| 309 | */ | ||
| 310 | if (unlikely(atomic_read(&modifying_ftrace_code)) && | ||
| 311 | ftrace_int3_handler(regs)) | ||
| 308 | return; | 312 | return; |
| 309 | #endif | 313 | #endif |
| 310 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 314 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
