diff options
Diffstat (limited to 'arch/x86/kernel')
54 files changed, 1440 insertions, 879 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5d4502c8b983..cdb1b70ddad0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -16,6 +16,10 @@ CFLAGS_REMOVE_ftrace.o = -pg | |||
16 | CFLAGS_REMOVE_early_printk.o = -pg | 16 | CFLAGS_REMOVE_early_printk.o = -pg |
17 | endif | 17 | endif |
18 | 18 | ||
19 | KASAN_SANITIZE_head$(BITS).o := n | ||
20 | KASAN_SANITIZE_dumpstack.o := n | ||
21 | KASAN_SANITIZE_dumpstack_$(BITS).o := n | ||
22 | |||
19 | CFLAGS_irq.o := -I$(src)/../include/asm/trace | 23 | CFLAGS_irq.o := -I$(src)/../include/asm/trace |
20 | 24 | ||
21 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 25 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
@@ -63,6 +67,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o | |||
63 | obj-y += apic/ | 67 | obj-y += apic/ |
64 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 68 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
65 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 69 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
70 | obj-$(CONFIG_LIVEPATCH) += livepatch.o | ||
66 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 71 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
67 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o | 72 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o |
68 | obj-$(CONFIG_X86_TSC) += trace_clock.o | 73 | obj-$(CONFIG_X86_TSC) += trace_clock.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b9e30daa0881..3d525c6124f6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) | |||
613 | { | 613 | { |
614 | int rc, irq, trigger, polarity; | 614 | int rc, irq, trigger, polarity; |
615 | 615 | ||
616 | if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { | ||
617 | *irqp = gsi; | ||
618 | return 0; | ||
619 | } | ||
620 | |||
616 | rc = acpi_get_override_irq(gsi, &trigger, &polarity); | 621 | rc = acpi_get_override_irq(gsi, &trigger, &polarity); |
617 | if (rc == 0) { | 622 | if (rc == 0) { |
618 | trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; | 623 | trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; |
@@ -653,6 +658,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, | |||
653 | return gsi; | 658 | return gsi; |
654 | } | 659 | } |
655 | 660 | ||
661 | #ifdef CONFIG_X86_LOCAL_APIC | ||
656 | static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, | 662 | static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, |
657 | int trigger, int polarity) | 663 | int trigger, int polarity) |
658 | { | 664 | { |
@@ -675,6 +681,7 @@ static void acpi_unregister_gsi_ioapic(u32 gsi) | |||
675 | mutex_unlock(&acpi_ioapic_lock); | 681 | mutex_unlock(&acpi_ioapic_lock); |
676 | #endif | 682 | #endif |
677 | } | 683 | } |
684 | #endif | ||
678 | 685 | ||
679 | int (*__acpi_register_gsi)(struct device *dev, u32 gsi, | 686 | int (*__acpi_register_gsi)(struct device *dev, u32 gsi, |
680 | int trigger, int polarity) = acpi_register_gsi_pic; | 687 | int trigger, int polarity) = acpi_register_gsi_pic; |
@@ -843,13 +850,7 @@ int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base) | |||
843 | 850 | ||
844 | static int __init acpi_parse_sbf(struct acpi_table_header *table) | 851 | static int __init acpi_parse_sbf(struct acpi_table_header *table) |
845 | { | 852 | { |
846 | struct acpi_table_boot *sb; | 853 | struct acpi_table_boot *sb = (struct acpi_table_boot *)table; |
847 | |||
848 | sb = (struct acpi_table_boot *)table; | ||
849 | if (!sb) { | ||
850 | printk(KERN_WARNING PREFIX "Unable to map SBF\n"); | ||
851 | return -ENODEV; | ||
852 | } | ||
853 | 854 | ||
854 | sbf_port = sb->cmos_index; /* Save CMOS port */ | 855 | sbf_port = sb->cmos_index; /* Save CMOS port */ |
855 | 856 | ||
@@ -863,13 +864,7 @@ static struct resource *hpet_res __initdata; | |||
863 | 864 | ||
864 | static int __init acpi_parse_hpet(struct acpi_table_header *table) | 865 | static int __init acpi_parse_hpet(struct acpi_table_header *table) |
865 | { | 866 | { |
866 | struct acpi_table_hpet *hpet_tbl; | 867 | struct acpi_table_hpet *hpet_tbl = (struct acpi_table_hpet *)table; |
867 | |||
868 | hpet_tbl = (struct acpi_table_hpet *)table; | ||
869 | if (!hpet_tbl) { | ||
870 | printk(KERN_WARNING PREFIX "Unable to map HPET\n"); | ||
871 | return -ENODEV; | ||
872 | } | ||
873 | 868 | ||
874 | if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { | 869 | if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { |
875 | printk(KERN_WARNING PREFIX "HPET timers must be located in " | 870 | printk(KERN_WARNING PREFIX "HPET timers must be located in " |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 31368207837c..d1daead5fcdd 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void) | |||
78 | 78 | ||
79 | header->pmode_cr0 = read_cr0(); | 79 | header->pmode_cr0 = read_cr0(); |
80 | if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { | 80 | if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { |
81 | header->pmode_cr4 = read_cr4(); | 81 | header->pmode_cr4 = __read_cr4(); |
82 | header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); | 82 | header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); |
83 | } | 83 | } |
84 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, | 84 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index b708738d016e..6a7c23ff21d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -135,14 +135,6 @@ static inline void apbt_clear_mapping(void) | |||
135 | apbt_virt_address = NULL; | 135 | apbt_virt_address = NULL; |
136 | } | 136 | } |
137 | 137 | ||
138 | /* | ||
139 | * APBT timer interrupt enable / disable | ||
140 | */ | ||
141 | static inline int is_apbt_capable(void) | ||
142 | { | ||
143 | return apbt_virt_address ? 1 : 0; | ||
144 | } | ||
145 | |||
146 | static int __init apbt_clockevent_register(void) | 138 | static int __init apbt_clockevent_register(void) |
147 | { | 139 | { |
148 | struct sfi_timer_table_entry *mtmr; | 140 | struct sfi_timer_table_entry *mtmr; |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 29b5b18afa27..ad3639ae1b9b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -134,9 +134,6 @@ static inline void imcr_apic_to_pic(void) | |||
134 | */ | 134 | */ |
135 | static int force_enable_local_apic __initdata; | 135 | static int force_enable_local_apic __initdata; |
136 | 136 | ||
137 | /* Control whether x2APIC mode is enabled or not */ | ||
138 | static bool nox2apic __initdata; | ||
139 | |||
140 | /* | 137 | /* |
141 | * APIC command line parameters | 138 | * APIC command line parameters |
142 | */ | 139 | */ |
@@ -161,33 +158,6 @@ static __init int setup_apicpmtimer(char *s) | |||
161 | __setup("apicpmtimer", setup_apicpmtimer); | 158 | __setup("apicpmtimer", setup_apicpmtimer); |
162 | #endif | 159 | #endif |
163 | 160 | ||
164 | int x2apic_mode; | ||
165 | #ifdef CONFIG_X86_X2APIC | ||
166 | /* x2apic enabled before OS handover */ | ||
167 | int x2apic_preenabled; | ||
168 | static int x2apic_disabled; | ||
169 | static int __init setup_nox2apic(char *str) | ||
170 | { | ||
171 | if (x2apic_enabled()) { | ||
172 | int apicid = native_apic_msr_read(APIC_ID); | ||
173 | |||
174 | if (apicid >= 255) { | ||
175 | pr_warning("Apicid: %08x, cannot enforce nox2apic\n", | ||
176 | apicid); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | pr_warning("x2apic already enabled. will disable it\n"); | ||
181 | } else | ||
182 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
183 | |||
184 | nox2apic = true; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | early_param("nox2apic", setup_nox2apic); | ||
189 | #endif | ||
190 | |||
191 | unsigned long mp_lapic_addr; | 161 | unsigned long mp_lapic_addr; |
192 | int disable_apic; | 162 | int disable_apic; |
193 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ | 163 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ |
@@ -1475,7 +1445,7 @@ void setup_local_APIC(void) | |||
1475 | #endif | 1445 | #endif |
1476 | } | 1446 | } |
1477 | 1447 | ||
1478 | void end_local_APIC_setup(void) | 1448 | static void end_local_APIC_setup(void) |
1479 | { | 1449 | { |
1480 | lapic_setup_esr(); | 1450 | lapic_setup_esr(); |
1481 | 1451 | ||
@@ -1492,116 +1462,183 @@ void end_local_APIC_setup(void) | |||
1492 | apic_pm_activate(); | 1462 | apic_pm_activate(); |
1493 | } | 1463 | } |
1494 | 1464 | ||
1495 | void __init bsp_end_local_APIC_setup(void) | 1465 | /* |
1466 | * APIC setup function for application processors. Called from smpboot.c | ||
1467 | */ | ||
1468 | void apic_ap_setup(void) | ||
1496 | { | 1469 | { |
1470 | setup_local_APIC(); | ||
1497 | end_local_APIC_setup(); | 1471 | end_local_APIC_setup(); |
1498 | |||
1499 | /* | ||
1500 | * Now that local APIC setup is completed for BP, configure the fault | ||
1501 | * handling for interrupt remapping. | ||
1502 | */ | ||
1503 | irq_remap_enable_fault_handling(); | ||
1504 | |||
1505 | } | 1472 | } |
1506 | 1473 | ||
1507 | #ifdef CONFIG_X86_X2APIC | 1474 | #ifdef CONFIG_X86_X2APIC |
1508 | /* | 1475 | int x2apic_mode; |
1509 | * Need to disable xapic and x2apic at the same time and then enable xapic mode | ||
1510 | */ | ||
1511 | static inline void __disable_x2apic(u64 msr) | ||
1512 | { | ||
1513 | wrmsrl(MSR_IA32_APICBASE, | ||
1514 | msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); | ||
1515 | wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); | ||
1516 | } | ||
1517 | 1476 | ||
1518 | static __init void disable_x2apic(void) | 1477 | enum { |
1478 | X2APIC_OFF, | ||
1479 | X2APIC_ON, | ||
1480 | X2APIC_DISABLED, | ||
1481 | }; | ||
1482 | static int x2apic_state; | ||
1483 | |||
1484 | static inline void __x2apic_disable(void) | ||
1519 | { | 1485 | { |
1520 | u64 msr; | 1486 | u64 msr; |
1521 | 1487 | ||
1522 | if (!cpu_has_x2apic) | 1488 | if (cpu_has_apic) |
1523 | return; | 1489 | return; |
1524 | 1490 | ||
1525 | rdmsrl(MSR_IA32_APICBASE, msr); | 1491 | rdmsrl(MSR_IA32_APICBASE, msr); |
1526 | if (msr & X2APIC_ENABLE) { | 1492 | if (!(msr & X2APIC_ENABLE)) |
1527 | u32 x2apic_id = read_apic_id(); | 1493 | return; |
1528 | 1494 | /* Disable xapic and x2apic first and then reenable xapic mode */ | |
1529 | if (x2apic_id >= 255) | 1495 | wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); |
1530 | panic("Cannot disable x2apic, id: %08x\n", x2apic_id); | 1496 | wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); |
1497 | printk_once(KERN_INFO "x2apic disabled\n"); | ||
1498 | } | ||
1531 | 1499 | ||
1532 | pr_info("Disabling x2apic\n"); | 1500 | static inline void __x2apic_enable(void) |
1533 | __disable_x2apic(msr); | 1501 | { |
1502 | u64 msr; | ||
1534 | 1503 | ||
1535 | if (nox2apic) { | 1504 | rdmsrl(MSR_IA32_APICBASE, msr); |
1536 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); | 1505 | if (msr & X2APIC_ENABLE) |
1537 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | 1506 | return; |
1538 | } | 1507 | wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); |
1508 | printk_once(KERN_INFO "x2apic enabled\n"); | ||
1509 | } | ||
1539 | 1510 | ||
1540 | x2apic_disabled = 1; | 1511 | static int __init setup_nox2apic(char *str) |
1541 | x2apic_mode = 0; | 1512 | { |
1513 | if (x2apic_enabled()) { | ||
1514 | int apicid = native_apic_msr_read(APIC_ID); | ||
1542 | 1515 | ||
1543 | register_lapic_address(mp_lapic_addr); | 1516 | if (apicid >= 255) { |
1517 | pr_warning("Apicid: %08x, cannot enforce nox2apic\n", | ||
1518 | apicid); | ||
1519 | return 0; | ||
1520 | } | ||
1521 | pr_warning("x2apic already enabled.\n"); | ||
1522 | __x2apic_disable(); | ||
1544 | } | 1523 | } |
1524 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
1525 | x2apic_state = X2APIC_DISABLED; | ||
1526 | x2apic_mode = 0; | ||
1527 | return 0; | ||
1545 | } | 1528 | } |
1529 | early_param("nox2apic", setup_nox2apic); | ||
1546 | 1530 | ||
1547 | void check_x2apic(void) | 1531 | /* Called from cpu_init() to enable x2apic on (secondary) cpus */ |
1532 | void x2apic_setup(void) | ||
1548 | { | 1533 | { |
1549 | if (x2apic_enabled()) { | 1534 | /* |
1550 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1535 | * If x2apic is not in ON state, disable it if already enabled |
1551 | x2apic_preenabled = x2apic_mode = 1; | 1536 | * from BIOS. |
1537 | */ | ||
1538 | if (x2apic_state != X2APIC_ON) { | ||
1539 | __x2apic_disable(); | ||
1540 | return; | ||
1552 | } | 1541 | } |
1542 | __x2apic_enable(); | ||
1553 | } | 1543 | } |
1554 | 1544 | ||
1555 | void enable_x2apic(void) | 1545 | static __init void x2apic_disable(void) |
1556 | { | 1546 | { |
1557 | u64 msr; | 1547 | u32 x2apic_id; |
1558 | 1548 | ||
1559 | rdmsrl(MSR_IA32_APICBASE, msr); | 1549 | if (x2apic_state != X2APIC_ON) |
1560 | if (x2apic_disabled) { | 1550 | goto out; |
1561 | __disable_x2apic(msr); | 1551 | |
1552 | x2apic_id = read_apic_id(); | ||
1553 | if (x2apic_id >= 255) | ||
1554 | panic("Cannot disable x2apic, id: %08x\n", x2apic_id); | ||
1555 | |||
1556 | __x2apic_disable(); | ||
1557 | register_lapic_address(mp_lapic_addr); | ||
1558 | out: | ||
1559 | x2apic_state = X2APIC_DISABLED; | ||
1560 | x2apic_mode = 0; | ||
1561 | } | ||
1562 | |||
1563 | static __init void x2apic_enable(void) | ||
1564 | { | ||
1565 | if (x2apic_state != X2APIC_OFF) | ||
1562 | return; | 1566 | return; |
1563 | } | ||
1564 | 1567 | ||
1565 | if (!x2apic_mode) | 1568 | x2apic_mode = 1; |
1569 | x2apic_state = X2APIC_ON; | ||
1570 | __x2apic_enable(); | ||
1571 | } | ||
1572 | |||
1573 | static __init void try_to_enable_x2apic(int remap_mode) | ||
1574 | { | ||
1575 | if (x2apic_state == X2APIC_DISABLED) | ||
1566 | return; | 1576 | return; |
1567 | 1577 | ||
1568 | if (!(msr & X2APIC_ENABLE)) { | 1578 | if (remap_mode != IRQ_REMAP_X2APIC_MODE) { |
1569 | printk_once(KERN_INFO "Enabling x2apic\n"); | 1579 | /* IR is required if there is APIC ID > 255 even when running |
1570 | wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); | 1580 | * under KVM |
1581 | */ | ||
1582 | if (max_physical_apicid > 255 || | ||
1583 | !hypervisor_x2apic_available()) { | ||
1584 | pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); | ||
1585 | x2apic_disable(); | ||
1586 | return; | ||
1587 | } | ||
1588 | |||
1589 | /* | ||
1590 | * without IR all CPUs can be addressed by IOAPIC/MSI | ||
1591 | * only in physical mode | ||
1592 | */ | ||
1593 | x2apic_phys = 1; | ||
1571 | } | 1594 | } |
1595 | x2apic_enable(); | ||
1572 | } | 1596 | } |
1573 | #endif /* CONFIG_X86_X2APIC */ | ||
1574 | 1597 | ||
1575 | int __init enable_IR(void) | 1598 | void __init check_x2apic(void) |
1576 | { | 1599 | { |
1577 | #ifdef CONFIG_IRQ_REMAP | 1600 | if (x2apic_enabled()) { |
1578 | if (!irq_remapping_supported()) { | 1601 | pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); |
1579 | pr_debug("intr-remapping not supported\n"); | 1602 | x2apic_mode = 1; |
1580 | return -1; | 1603 | x2apic_state = X2APIC_ON; |
1604 | } else if (!cpu_has_x2apic) { | ||
1605 | x2apic_state = X2APIC_DISABLED; | ||
1581 | } | 1606 | } |
1607 | } | ||
1608 | #else /* CONFIG_X86_X2APIC */ | ||
1609 | static int __init validate_x2apic(void) | ||
1610 | { | ||
1611 | if (!apic_is_x2apic_enabled()) | ||
1612 | return 0; | ||
1613 | /* | ||
1614 | * Checkme: Can we simply turn off x2apic here instead of panic? | ||
1615 | */ | ||
1616 | panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n"); | ||
1617 | } | ||
1618 | early_initcall(validate_x2apic); | ||
1582 | 1619 | ||
1583 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1620 | static inline void try_to_enable_x2apic(int remap_mode) { } |
1584 | pr_info("Skipped enabling intr-remap because of skipping " | 1621 | static inline void __x2apic_enable(void) { } |
1585 | "io-apic setup\n"); | 1622 | #endif /* !CONFIG_X86_X2APIC */ |
1623 | |||
1624 | static int __init try_to_enable_IR(void) | ||
1625 | { | ||
1626 | #ifdef CONFIG_X86_IO_APIC | ||
1627 | if (!x2apic_enabled() && skip_ioapic_setup) { | ||
1628 | pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); | ||
1586 | return -1; | 1629 | return -1; |
1587 | } | 1630 | } |
1588 | |||
1589 | return irq_remapping_enable(); | ||
1590 | #endif | 1631 | #endif |
1591 | return -1; | 1632 | return irq_remapping_enable(); |
1592 | } | 1633 | } |
1593 | 1634 | ||
1594 | void __init enable_IR_x2apic(void) | 1635 | void __init enable_IR_x2apic(void) |
1595 | { | 1636 | { |
1596 | unsigned long flags; | 1637 | unsigned long flags; |
1597 | int ret, x2apic_enabled = 0; | 1638 | int ret, ir_stat; |
1598 | int hardware_init_ret; | ||
1599 | |||
1600 | /* Make sure irq_remap_ops are initialized */ | ||
1601 | setup_irq_remapping_ops(); | ||
1602 | 1639 | ||
1603 | hardware_init_ret = irq_remapping_prepare(); | 1640 | ir_stat = irq_remapping_prepare(); |
1604 | if (hardware_init_ret && !x2apic_supported()) | 1641 | if (ir_stat < 0 && !x2apic_supported()) |
1605 | return; | 1642 | return; |
1606 | 1643 | ||
1607 | ret = save_ioapic_entries(); | 1644 | ret = save_ioapic_entries(); |
@@ -1614,49 +1651,13 @@ void __init enable_IR_x2apic(void) | |||
1614 | legacy_pic->mask_all(); | 1651 | legacy_pic->mask_all(); |
1615 | mask_ioapic_entries(); | 1652 | mask_ioapic_entries(); |
1616 | 1653 | ||
1617 | if (x2apic_preenabled && nox2apic) | 1654 | /* If irq_remapping_prepare() succeded, try to enable it */ |
1618 | disable_x2apic(); | 1655 | if (ir_stat >= 0) |
1619 | 1656 | ir_stat = try_to_enable_IR(); | |
1620 | if (hardware_init_ret) | 1657 | /* ir_stat contains the remap mode or an error code */ |
1621 | ret = -1; | 1658 | try_to_enable_x2apic(ir_stat); |
1622 | else | ||
1623 | ret = enable_IR(); | ||
1624 | |||
1625 | if (!x2apic_supported()) | ||
1626 | goto skip_x2apic; | ||
1627 | |||
1628 | if (ret < 0) { | ||
1629 | /* IR is required if there is APIC ID > 255 even when running | ||
1630 | * under KVM | ||
1631 | */ | ||
1632 | if (max_physical_apicid > 255 || | ||
1633 | !hypervisor_x2apic_available()) { | ||
1634 | if (x2apic_preenabled) | ||
1635 | disable_x2apic(); | ||
1636 | goto skip_x2apic; | ||
1637 | } | ||
1638 | /* | ||
1639 | * without IR all CPUs can be addressed by IOAPIC/MSI | ||
1640 | * only in physical mode | ||
1641 | */ | ||
1642 | x2apic_force_phys(); | ||
1643 | } | ||
1644 | 1659 | ||
1645 | if (ret == IRQ_REMAP_XAPIC_MODE) { | 1660 | if (ir_stat < 0) |
1646 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); | ||
1647 | goto skip_x2apic; | ||
1648 | } | ||
1649 | |||
1650 | x2apic_enabled = 1; | ||
1651 | |||
1652 | if (x2apic_supported() && !x2apic_mode) { | ||
1653 | x2apic_mode = 1; | ||
1654 | enable_x2apic(); | ||
1655 | pr_info("Enabled x2apic\n"); | ||
1656 | } | ||
1657 | |||
1658 | skip_x2apic: | ||
1659 | if (ret < 0) /* IR enabling failed */ | ||
1660 | restore_ioapic_entries(); | 1661 | restore_ioapic_entries(); |
1661 | legacy_pic->restore_mask(); | 1662 | legacy_pic->restore_mask(); |
1662 | local_irq_restore(flags); | 1663 | local_irq_restore(flags); |
@@ -1847,82 +1848,8 @@ void __init register_lapic_address(unsigned long address) | |||
1847 | } | 1848 | } |
1848 | } | 1849 | } |
1849 | 1850 | ||
1850 | /* | ||
1851 | * This initializes the IO-APIC and APIC hardware if this is | ||
1852 | * a UP kernel. | ||
1853 | */ | ||
1854 | int apic_version[MAX_LOCAL_APIC]; | 1851 | int apic_version[MAX_LOCAL_APIC]; |
1855 | 1852 | ||
1856 | int __init APIC_init_uniprocessor(void) | ||
1857 | { | ||
1858 | if (disable_apic) { | ||
1859 | pr_info("Apic disabled\n"); | ||
1860 | return -1; | ||
1861 | } | ||
1862 | #ifdef CONFIG_X86_64 | ||
1863 | if (!cpu_has_apic) { | ||
1864 | disable_apic = 1; | ||
1865 | pr_info("Apic disabled by BIOS\n"); | ||
1866 | return -1; | ||
1867 | } | ||
1868 | #else | ||
1869 | if (!smp_found_config && !cpu_has_apic) | ||
1870 | return -1; | ||
1871 | |||
1872 | /* | ||
1873 | * Complain if the BIOS pretends there is one. | ||
1874 | */ | ||
1875 | if (!cpu_has_apic && | ||
1876 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | ||
1877 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | ||
1878 | boot_cpu_physical_apicid); | ||
1879 | return -1; | ||
1880 | } | ||
1881 | #endif | ||
1882 | |||
1883 | default_setup_apic_routing(); | ||
1884 | |||
1885 | verify_local_APIC(); | ||
1886 | connect_bsp_APIC(); | ||
1887 | |||
1888 | #ifdef CONFIG_X86_64 | ||
1889 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); | ||
1890 | #else | ||
1891 | /* | ||
1892 | * Hack: In case of kdump, after a crash, kernel might be booting | ||
1893 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | ||
1894 | * might be zero if read from MP tables. Get it from LAPIC. | ||
1895 | */ | ||
1896 | # ifdef CONFIG_CRASH_DUMP | ||
1897 | boot_cpu_physical_apicid = read_apic_id(); | ||
1898 | # endif | ||
1899 | #endif | ||
1900 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | ||
1901 | setup_local_APIC(); | ||
1902 | |||
1903 | #ifdef CONFIG_X86_IO_APIC | ||
1904 | /* | ||
1905 | * Now enable IO-APICs, actually call clear_IO_APIC | ||
1906 | * We need clear_IO_APIC before enabling error vector | ||
1907 | */ | ||
1908 | if (!skip_ioapic_setup && nr_ioapics) | ||
1909 | enable_IO_APIC(); | ||
1910 | #endif | ||
1911 | |||
1912 | bsp_end_local_APIC_setup(); | ||
1913 | |||
1914 | #ifdef CONFIG_X86_IO_APIC | ||
1915 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | ||
1916 | setup_IO_APIC(); | ||
1917 | else { | ||
1918 | nr_ioapics = 0; | ||
1919 | } | ||
1920 | #endif | ||
1921 | |||
1922 | x86_init.timers.setup_percpu_clockev(); | ||
1923 | return 0; | ||
1924 | } | ||
1925 | |||
1926 | /* | 1853 | /* |
1927 | * Local APIC interrupts | 1854 | * Local APIC interrupts |
1928 | */ | 1855 | */ |
@@ -2027,7 +1954,7 @@ __visible void smp_trace_error_interrupt(struct pt_regs *regs) | |||
2027 | /** | 1954 | /** |
2028 | * connect_bsp_APIC - attach the APIC to the interrupt system | 1955 | * connect_bsp_APIC - attach the APIC to the interrupt system |
2029 | */ | 1956 | */ |
2030 | void __init connect_bsp_APIC(void) | 1957 | static void __init connect_bsp_APIC(void) |
2031 | { | 1958 | { |
2032 | #ifdef CONFIG_X86_32 | 1959 | #ifdef CONFIG_X86_32 |
2033 | if (pic_mode) { | 1960 | if (pic_mode) { |
@@ -2274,6 +2201,100 @@ void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) | |||
2274 | } | 2201 | } |
2275 | } | 2202 | } |
2276 | 2203 | ||
2204 | static void __init apic_bsp_up_setup(void) | ||
2205 | { | ||
2206 | #ifdef CONFIG_X86_64 | ||
2207 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); | ||
2208 | #else | ||
2209 | /* | ||
2210 | * Hack: In case of kdump, after a crash, kernel might be booting | ||
2211 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | ||
2212 | * might be zero if read from MP tables. Get it from LAPIC. | ||
2213 | */ | ||
2214 | # ifdef CONFIG_CRASH_DUMP | ||
2215 | boot_cpu_physical_apicid = read_apic_id(); | ||
2216 | # endif | ||
2217 | #endif | ||
2218 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | ||
2219 | } | ||
2220 | |||
2221 | /** | ||
2222 | * apic_bsp_setup - Setup function for local apic and io-apic | ||
2223 | * @upmode: Force UP mode (for APIC_init_uniprocessor) | ||
2224 | * | ||
2225 | * Returns: | ||
2226 | * apic_id of BSP APIC | ||
2227 | */ | ||
2228 | int __init apic_bsp_setup(bool upmode) | ||
2229 | { | ||
2230 | int id; | ||
2231 | |||
2232 | connect_bsp_APIC(); | ||
2233 | if (upmode) | ||
2234 | apic_bsp_up_setup(); | ||
2235 | setup_local_APIC(); | ||
2236 | |||
2237 | if (x2apic_mode) | ||
2238 | id = apic_read(APIC_LDR); | ||
2239 | else | ||
2240 | id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
2241 | |||
2242 | enable_IO_APIC(); | ||
2243 | end_local_APIC_setup(); | ||
2244 | irq_remap_enable_fault_handling(); | ||
2245 | setup_IO_APIC(); | ||
2246 | /* Setup local timer */ | ||
2247 | x86_init.timers.setup_percpu_clockev(); | ||
2248 | return id; | ||
2249 | } | ||
2250 | |||
2251 | /* | ||
2252 | * This initializes the IO-APIC and APIC hardware if this is | ||
2253 | * a UP kernel. | ||
2254 | */ | ||
2255 | int __init APIC_init_uniprocessor(void) | ||
2256 | { | ||
2257 | if (disable_apic) { | ||
2258 | pr_info("Apic disabled\n"); | ||
2259 | return -1; | ||
2260 | } | ||
2261 | #ifdef CONFIG_X86_64 | ||
2262 | if (!cpu_has_apic) { | ||
2263 | disable_apic = 1; | ||
2264 | pr_info("Apic disabled by BIOS\n"); | ||
2265 | return -1; | ||
2266 | } | ||
2267 | #else | ||
2268 | if (!smp_found_config && !cpu_has_apic) | ||
2269 | return -1; | ||
2270 | |||
2271 | /* | ||
2272 | * Complain if the BIOS pretends there is one. | ||
2273 | */ | ||
2274 | if (!cpu_has_apic && | ||
2275 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | ||
2276 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | ||
2277 | boot_cpu_physical_apicid); | ||
2278 | return -1; | ||
2279 | } | ||
2280 | #endif | ||
2281 | |||
2282 | if (!smp_found_config) | ||
2283 | disable_ioapic_support(); | ||
2284 | |||
2285 | default_setup_apic_routing(); | ||
2286 | verify_local_APIC(); | ||
2287 | apic_bsp_setup(true); | ||
2288 | return 0; | ||
2289 | } | ||
2290 | |||
2291 | #ifdef CONFIG_UP_LATE_INIT | ||
2292 | void __init up_late_init(void) | ||
2293 | { | ||
2294 | APIC_init_uniprocessor(); | ||
2295 | } | ||
2296 | #endif | ||
2297 | |||
2277 | /* | 2298 | /* |
2278 | * Power management | 2299 | * Power management |
2279 | */ | 2300 | */ |
@@ -2359,9 +2380,9 @@ static void lapic_resume(void) | |||
2359 | mask_ioapic_entries(); | 2380 | mask_ioapic_entries(); |
2360 | legacy_pic->mask_all(); | 2381 | legacy_pic->mask_all(); |
2361 | 2382 | ||
2362 | if (x2apic_mode) | 2383 | if (x2apic_mode) { |
2363 | enable_x2apic(); | 2384 | __x2apic_enable(); |
2364 | else { | 2385 | } else { |
2365 | /* | 2386 | /* |
2366 | * Make sure the APICBASE points to the right address | 2387 | * Make sure the APICBASE points to the right address |
2367 | * | 2388 | * |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f60406ab1..f4dc2462a1ac 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1507,7 +1507,10 @@ void __init enable_IO_APIC(void) | |||
1507 | int i8259_apic, i8259_pin; | 1507 | int i8259_apic, i8259_pin; |
1508 | int apic, pin; | 1508 | int apic, pin; |
1509 | 1509 | ||
1510 | if (!nr_legacy_irqs()) | 1510 | if (skip_ioapic_setup) |
1511 | nr_ioapics = 0; | ||
1512 | |||
1513 | if (!nr_legacy_irqs() || !nr_ioapics) | ||
1511 | return; | 1514 | return; |
1512 | 1515 | ||
1513 | for_each_ioapic_pin(apic, pin) { | 1516 | for_each_ioapic_pin(apic, pin) { |
@@ -2295,7 +2298,7 @@ static inline void __init check_timer(void) | |||
2295 | } | 2298 | } |
2296 | local_irq_disable(); | 2299 | local_irq_disable(); |
2297 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | 2300 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2298 | if (x2apic_preenabled) | 2301 | if (apic_is_x2apic_enabled()) |
2299 | apic_printk(APIC_QUIET, KERN_INFO | 2302 | apic_printk(APIC_QUIET, KERN_INFO |
2300 | "Perhaps problem with the pre-enabled x2apic mode\n" | 2303 | "Perhaps problem with the pre-enabled x2apic mode\n" |
2301 | "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); | 2304 | "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); |
@@ -2373,9 +2376,9 @@ void __init setup_IO_APIC(void) | |||
2373 | { | 2376 | { |
2374 | int ioapic; | 2377 | int ioapic; |
2375 | 2378 | ||
2376 | /* | 2379 | if (skip_ioapic_setup || !nr_ioapics) |
2377 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 2380 | return; |
2378 | */ | 2381 | |
2379 | io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; | 2382 | io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; |
2380 | 2383 | ||
2381 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 2384 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15c5df92f74e..a220239cea65 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) | |||
869 | 869 | ||
870 | return false; | 870 | return false; |
871 | } | 871 | } |
872 | |||
873 | void set_dr_addr_mask(unsigned long mask, int dr) | ||
874 | { | ||
875 | if (!cpu_has_bpext) | ||
876 | return; | ||
877 | |||
878 | switch (dr) { | ||
879 | case 0: | ||
880 | wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); | ||
881 | break; | ||
882 | case 1: | ||
883 | case 2: | ||
884 | case 3: | ||
885 | wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); | ||
886 | break; | ||
887 | default: | ||
888 | break; | ||
889 | } | ||
890 | } | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c6049650c093..2346c95c6ab1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/archrandom.h> | 19 | #include <asm/archrandom.h> |
20 | #include <asm/hypervisor.h> | 20 | #include <asm/hypervisor.h> |
21 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
22 | #include <asm/tlbflush.h> | ||
22 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
23 | #include <asm/sections.h> | 24 | #include <asm/sections.h> |
24 | #include <asm/vsyscall.h> | 25 | #include <asm/vsyscall.h> |
@@ -278,7 +279,7 @@ __setup("nosmep", setup_disable_smep); | |||
278 | static __always_inline void setup_smep(struct cpuinfo_x86 *c) | 279 | static __always_inline void setup_smep(struct cpuinfo_x86 *c) |
279 | { | 280 | { |
280 | if (cpu_has(c, X86_FEATURE_SMEP)) | 281 | if (cpu_has(c, X86_FEATURE_SMEP)) |
281 | set_in_cr4(X86_CR4_SMEP); | 282 | cr4_set_bits(X86_CR4_SMEP); |
282 | } | 283 | } |
283 | 284 | ||
284 | static __init int setup_disable_smap(char *arg) | 285 | static __init int setup_disable_smap(char *arg) |
@@ -298,9 +299,9 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) | |||
298 | 299 | ||
299 | if (cpu_has(c, X86_FEATURE_SMAP)) { | 300 | if (cpu_has(c, X86_FEATURE_SMAP)) { |
300 | #ifdef CONFIG_X86_SMAP | 301 | #ifdef CONFIG_X86_SMAP |
301 | set_in_cr4(X86_CR4_SMAP); | 302 | cr4_set_bits(X86_CR4_SMAP); |
302 | #else | 303 | #else |
303 | clear_in_cr4(X86_CR4_SMAP); | 304 | cr4_clear_bits(X86_CR4_SMAP); |
304 | #endif | 305 | #endif |
305 | } | 306 | } |
306 | } | 307 | } |
@@ -491,17 +492,18 @@ u16 __read_mostly tlb_lld_2m[NR_INFO]; | |||
491 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | 492 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
492 | u16 __read_mostly tlb_lld_1g[NR_INFO]; | 493 | u16 __read_mostly tlb_lld_1g[NR_INFO]; |
493 | 494 | ||
494 | void cpu_detect_tlb(struct cpuinfo_x86 *c) | 495 | static void cpu_detect_tlb(struct cpuinfo_x86 *c) |
495 | { | 496 | { |
496 | if (this_cpu->c_detect_tlb) | 497 | if (this_cpu->c_detect_tlb) |
497 | this_cpu->c_detect_tlb(c); | 498 | this_cpu->c_detect_tlb(c); |
498 | 499 | ||
499 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" | 500 | pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", |
500 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", | ||
501 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 501 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
502 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 502 | tlb_lli_4m[ENTRIES]); |
503 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 503 | |
504 | tlb_lld_1g[ENTRIES]); | 504 | pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", |
505 | tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], | ||
506 | tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); | ||
505 | } | 507 | } |
506 | 508 | ||
507 | void detect_ht(struct cpuinfo_x86 *c) | 509 | void detect_ht(struct cpuinfo_x86 *c) |
@@ -1294,6 +1296,12 @@ void cpu_init(void) | |||
1294 | wait_for_master_cpu(cpu); | 1296 | wait_for_master_cpu(cpu); |
1295 | 1297 | ||
1296 | /* | 1298 | /* |
1299 | * Initialize the CR4 shadow before doing anything that could | ||
1300 | * try to read it. | ||
1301 | */ | ||
1302 | cr4_init_shadow(); | ||
1303 | |||
1304 | /* | ||
1297 | * Load microcode on this cpu if a valid microcode is available. | 1305 | * Load microcode on this cpu if a valid microcode is available. |
1298 | * This is early microcode loading procedure. | 1306 | * This is early microcode loading procedure. |
1299 | */ | 1307 | */ |
@@ -1312,7 +1320,7 @@ void cpu_init(void) | |||
1312 | 1320 | ||
1313 | pr_debug("Initializing CPU#%d\n", cpu); | 1321 | pr_debug("Initializing CPU#%d\n", cpu); |
1314 | 1322 | ||
1315 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1323 | cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1316 | 1324 | ||
1317 | /* | 1325 | /* |
1318 | * Initialize the per-CPU GDT with the boot GDT, | 1326 | * Initialize the per-CPU GDT with the boot GDT, |
@@ -1332,7 +1340,7 @@ void cpu_init(void) | |||
1332 | barrier(); | 1340 | barrier(); |
1333 | 1341 | ||
1334 | x86_configure_nx(); | 1342 | x86_configure_nx(); |
1335 | enable_x2apic(); | 1343 | x2apic_setup(); |
1336 | 1344 | ||
1337 | /* | 1345 | /* |
1338 | * set up and load the per-CPU TSS | 1346 | * set up and load the per-CPU TSS |
@@ -1388,12 +1396,18 @@ void cpu_init(void) | |||
1388 | 1396 | ||
1389 | wait_for_master_cpu(cpu); | 1397 | wait_for_master_cpu(cpu); |
1390 | 1398 | ||
1399 | /* | ||
1400 | * Initialize the CR4 shadow before doing anything that could | ||
1401 | * try to read it. | ||
1402 | */ | ||
1403 | cr4_init_shadow(); | ||
1404 | |||
1391 | show_ucode_info_early(); | 1405 | show_ucode_info_early(); |
1392 | 1406 | ||
1393 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | 1407 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); |
1394 | 1408 | ||
1395 | if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) | 1409 | if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) |
1396 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1410 | cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1397 | 1411 | ||
1398 | load_current_idt(); | 1412 | load_current_idt(); |
1399 | switch_to_new_gdt(cpu); | 1413 | switch_to_new_gdt(cpu); |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9cc6b6f25f42..50163fa9034f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -487,10 +487,8 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
487 | 487 | ||
488 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | 488 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
489 | if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { | 489 | if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { |
490 | printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" | 490 | pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); |
491 | " Set to 'normal', was 'performance'\n" | 491 | pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); |
492 | "ENERGY_PERF_BIAS: View and update with" | ||
493 | " x86_energy_perf_policy(8)\n"); | ||
494 | epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; | 492 | epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; |
495 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | 493 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
496 | } | 494 | } |
@@ -567,8 +565,8 @@ static const struct _tlb_table intel_tlb_table[] = { | |||
567 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | 565 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, |
568 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | 566 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, |
569 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | 567 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, |
570 | { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" }, | 568 | { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, |
571 | { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" }, | 569 | { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, |
572 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | 570 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, |
573 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | 571 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, |
574 | { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, | 572 | { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c7035073dfc1..659643376dbf 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -952,20 +952,18 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, | |||
952 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | 952 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, |
953 | int type, char *buf) | 953 | int type, char *buf) |
954 | { | 954 | { |
955 | ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; | 955 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); |
956 | int n = 0; | 956 | int ret; |
957 | 957 | ||
958 | if (len > 1) { | 958 | if (type) |
959 | const struct cpumask *mask; | 959 | ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", |
960 | 960 | cpumask_pr_args(mask)); | |
961 | mask = to_cpumask(this_leaf->shared_cpu_map); | 961 | else |
962 | n = type ? | 962 | ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb", |
963 | cpulist_scnprintf(buf, len-2, mask) : | 963 | cpumask_pr_args(mask)); |
964 | cpumask_scnprintf(buf, len-2, mask); | 964 | buf[ret++] = '\n'; |
965 | buf[n++] = '\n'; | 965 | buf[ret] = '\0'; |
966 | buf[n] = '\0'; | 966 | return ret; |
967 | } | ||
968 | return n; | ||
969 | } | 967 | } |
970 | 968 | ||
971 | static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, | 969 | static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..3c036cb4a370 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/export.h> | 43 | #include <linux/export.h> |
44 | 44 | ||
45 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
46 | #include <asm/traps.h> | ||
47 | #include <asm/tlbflush.h> | ||
46 | #include <asm/mce.h> | 48 | #include <asm/mce.h> |
47 | #include <asm/msr.h> | 49 | #include <asm/msr.h> |
48 | 50 | ||
@@ -115,7 +117,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | |||
115 | * CPU/chipset specific EDAC code can register a notifier call here to print | 117 | * CPU/chipset specific EDAC code can register a notifier call here to print |
116 | * MCE errors in a human-readable form. | 118 | * MCE errors in a human-readable form. |
117 | */ | 119 | */ |
118 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | 120 | static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); |
119 | 121 | ||
120 | /* Do initial initialization of a struct mce */ | 122 | /* Do initial initialization of a struct mce */ |
121 | void mce_setup(struct mce *m) | 123 | void mce_setup(struct mce *m) |
@@ -150,14 +152,11 @@ static struct mce_log mcelog = { | |||
150 | void mce_log(struct mce *mce) | 152 | void mce_log(struct mce *mce) |
151 | { | 153 | { |
152 | unsigned next, entry; | 154 | unsigned next, entry; |
153 | int ret = 0; | ||
154 | 155 | ||
155 | /* Emit the trace record: */ | 156 | /* Emit the trace record: */ |
156 | trace_mce_record(mce); | 157 | trace_mce_record(mce); |
157 | 158 | ||
158 | ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); | 159 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); |
159 | if (ret == NOTIFY_STOP) | ||
160 | return; | ||
161 | 160 | ||
162 | mce->finished = 0; | 161 | mce->finished = 0; |
163 | wmb(); | 162 | wmb(); |
@@ -311,7 +310,7 @@ static void wait_for_panic(void) | |||
311 | panic("Panicing machine check CPU died"); | 310 | panic("Panicing machine check CPU died"); |
312 | } | 311 | } |
313 | 312 | ||
314 | static void mce_panic(char *msg, struct mce *final, char *exp) | 313 | static void mce_panic(const char *msg, struct mce *final, char *exp) |
315 | { | 314 | { |
316 | int i, apei_err = 0; | 315 | int i, apei_err = 0; |
317 | 316 | ||
@@ -529,7 +528,7 @@ static void mce_schedule_work(void) | |||
529 | schedule_work(this_cpu_ptr(&mce_work)); | 528 | schedule_work(this_cpu_ptr(&mce_work)); |
530 | } | 529 | } |
531 | 530 | ||
532 | DEFINE_PER_CPU(struct irq_work, mce_irq_work); | 531 | static DEFINE_PER_CPU(struct irq_work, mce_irq_work); |
533 | 532 | ||
534 | static void mce_irq_work_cb(struct irq_work *entry) | 533 | static void mce_irq_work_cb(struct irq_work *entry) |
535 | { | 534 | { |
@@ -735,7 +734,7 @@ static atomic_t mce_callin; | |||
735 | /* | 734 | /* |
736 | * Check if a timeout waiting for other CPUs happened. | 735 | * Check if a timeout waiting for other CPUs happened. |
737 | */ | 736 | */ |
738 | static int mce_timed_out(u64 *t) | 737 | static int mce_timed_out(u64 *t, const char *msg) |
739 | { | 738 | { |
740 | /* | 739 | /* |
741 | * The others already did panic for some reason. | 740 | * The others already did panic for some reason. |
@@ -750,8 +749,7 @@ static int mce_timed_out(u64 *t) | |||
750 | goto out; | 749 | goto out; |
751 | if ((s64)*t < SPINUNIT) { | 750 | if ((s64)*t < SPINUNIT) { |
752 | if (mca_cfg.tolerant <= 1) | 751 | if (mca_cfg.tolerant <= 1) |
753 | mce_panic("Timeout synchronizing machine check over CPUs", | 752 | mce_panic(msg, NULL, NULL); |
754 | NULL, NULL); | ||
755 | cpu_missing = 1; | 753 | cpu_missing = 1; |
756 | return 1; | 754 | return 1; |
757 | } | 755 | } |
@@ -867,7 +865,8 @@ static int mce_start(int *no_way_out) | |||
867 | * Wait for everyone. | 865 | * Wait for everyone. |
868 | */ | 866 | */ |
869 | while (atomic_read(&mce_callin) != cpus) { | 867 | while (atomic_read(&mce_callin) != cpus) { |
870 | if (mce_timed_out(&timeout)) { | 868 | if (mce_timed_out(&timeout, |
869 | "Timeout: Not all CPUs entered broadcast exception handler")) { | ||
871 | atomic_set(&global_nwo, 0); | 870 | atomic_set(&global_nwo, 0); |
872 | return -1; | 871 | return -1; |
873 | } | 872 | } |
@@ -892,7 +891,8 @@ static int mce_start(int *no_way_out) | |||
892 | * only seen by one CPU before cleared, avoiding duplicates. | 891 | * only seen by one CPU before cleared, avoiding duplicates. |
893 | */ | 892 | */ |
894 | while (atomic_read(&mce_executing) < order) { | 893 | while (atomic_read(&mce_executing) < order) { |
895 | if (mce_timed_out(&timeout)) { | 894 | if (mce_timed_out(&timeout, |
895 | "Timeout: Subject CPUs unable to finish machine check processing")) { | ||
896 | atomic_set(&global_nwo, 0); | 896 | atomic_set(&global_nwo, 0); |
897 | return -1; | 897 | return -1; |
898 | } | 898 | } |
@@ -936,7 +936,8 @@ static int mce_end(int order) | |||
936 | * loops. | 936 | * loops. |
937 | */ | 937 | */ |
938 | while (atomic_read(&mce_executing) <= cpus) { | 938 | while (atomic_read(&mce_executing) <= cpus) { |
939 | if (mce_timed_out(&timeout)) | 939 | if (mce_timed_out(&timeout, |
940 | "Timeout: Monarch CPU unable to finish machine check processing")) | ||
940 | goto reset; | 941 | goto reset; |
941 | ndelay(SPINUNIT); | 942 | ndelay(SPINUNIT); |
942 | } | 943 | } |
@@ -949,7 +950,8 @@ static int mce_end(int order) | |||
949 | * Subject: Wait for Monarch to finish. | 950 | * Subject: Wait for Monarch to finish. |
950 | */ | 951 | */ |
951 | while (atomic_read(&mce_executing) != 0) { | 952 | while (atomic_read(&mce_executing) != 0) { |
952 | if (mce_timed_out(&timeout)) | 953 | if (mce_timed_out(&timeout, |
954 | "Timeout: Monarch CPU did not finish machine check processing")) | ||
953 | goto reset; | 955 | goto reset; |
954 | ndelay(SPINUNIT); | 956 | ndelay(SPINUNIT); |
955 | } | 957 | } |
@@ -1003,51 +1005,6 @@ static void mce_clear_state(unsigned long *toclear) | |||
1003 | } | 1005 | } |
1004 | 1006 | ||
1005 | /* | 1007 | /* |
1006 | * Need to save faulting physical address associated with a process | ||
1007 | * in the machine check handler some place where we can grab it back | ||
1008 | * later in mce_notify_process() | ||
1009 | */ | ||
1010 | #define MCE_INFO_MAX 16 | ||
1011 | |||
1012 | struct mce_info { | ||
1013 | atomic_t inuse; | ||
1014 | struct task_struct *t; | ||
1015 | __u64 paddr; | ||
1016 | int restartable; | ||
1017 | } mce_info[MCE_INFO_MAX]; | ||
1018 | |||
1019 | static void mce_save_info(__u64 addr, int c) | ||
1020 | { | ||
1021 | struct mce_info *mi; | ||
1022 | |||
1023 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { | ||
1024 | if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | ||
1025 | mi->t = current; | ||
1026 | mi->paddr = addr; | ||
1027 | mi->restartable = c; | ||
1028 | return; | ||
1029 | } | ||
1030 | } | ||
1031 | |||
1032 | mce_panic("Too many concurrent recoverable errors", NULL, NULL); | ||
1033 | } | ||
1034 | |||
1035 | static struct mce_info *mce_find_info(void) | ||
1036 | { | ||
1037 | struct mce_info *mi; | ||
1038 | |||
1039 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) | ||
1040 | if (atomic_read(&mi->inuse) && mi->t == current) | ||
1041 | return mi; | ||
1042 | return NULL; | ||
1043 | } | ||
1044 | |||
1045 | static void mce_clear_info(struct mce_info *mi) | ||
1046 | { | ||
1047 | atomic_set(&mi->inuse, 0); | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
1051 | * The actual machine check handler. This only handles real | 1008 | * The actual machine check handler. This only handles real |
1052 | * exceptions when something got corrupted coming in through int 18. | 1009 | * exceptions when something got corrupted coming in through int 18. |
1053 | * | 1010 | * |
@@ -1063,6 +1020,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1063 | { | 1020 | { |
1064 | struct mca_config *cfg = &mca_cfg; | 1021 | struct mca_config *cfg = &mca_cfg; |
1065 | struct mce m, *final; | 1022 | struct mce m, *final; |
1023 | enum ctx_state prev_state; | ||
1066 | int i; | 1024 | int i; |
1067 | int worst = 0; | 1025 | int worst = 0; |
1068 | int severity; | 1026 | int severity; |
@@ -1084,6 +1042,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1084 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1042 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
1085 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | 1043 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); |
1086 | char *msg = "Unknown"; | 1044 | char *msg = "Unknown"; |
1045 | u64 recover_paddr = ~0ull; | ||
1046 | int flags = MF_ACTION_REQUIRED; | ||
1047 | |||
1048 | prev_state = ist_enter(regs); | ||
1087 | 1049 | ||
1088 | this_cpu_inc(mce_exception_count); | 1050 | this_cpu_inc(mce_exception_count); |
1089 | 1051 | ||
@@ -1203,9 +1165,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1203 | if (no_way_out) | 1165 | if (no_way_out) |
1204 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1166 | mce_panic("Fatal machine check on current CPU", &m, msg); |
1205 | if (worst == MCE_AR_SEVERITY) { | 1167 | if (worst == MCE_AR_SEVERITY) { |
1206 | /* schedule action before return to userland */ | 1168 | recover_paddr = m.addr; |
1207 | mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); | 1169 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
1208 | set_thread_flag(TIF_MCE_NOTIFY); | 1170 | flags |= MF_MUST_KILL; |
1209 | } else if (kill_it) { | 1171 | } else if (kill_it) { |
1210 | force_sig(SIGBUS, current); | 1172 | force_sig(SIGBUS, current); |
1211 | } | 1173 | } |
@@ -1216,6 +1178,27 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1216 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | 1178 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); |
1217 | out: | 1179 | out: |
1218 | sync_core(); | 1180 | sync_core(); |
1181 | |||
1182 | if (recover_paddr == ~0ull) | ||
1183 | goto done; | ||
1184 | |||
1185 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1186 | recover_paddr); | ||
1187 | /* | ||
1188 | * We must call memory_failure() here even if the current process is | ||
1189 | * doomed. We still need to mark the page as poisoned and alert any | ||
1190 | * other users of the page. | ||
1191 | */ | ||
1192 | ist_begin_non_atomic(regs); | ||
1193 | local_irq_enable(); | ||
1194 | if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { | ||
1195 | pr_err("Memory error not recovered"); | ||
1196 | force_sig(SIGBUS, current); | ||
1197 | } | ||
1198 | local_irq_disable(); | ||
1199 | ist_end_non_atomic(); | ||
1200 | done: | ||
1201 | ist_exit(regs, prev_state); | ||
1219 | } | 1202 | } |
1220 | EXPORT_SYMBOL_GPL(do_machine_check); | 1203 | EXPORT_SYMBOL_GPL(do_machine_check); |
1221 | 1204 | ||
@@ -1233,42 +1216,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
1233 | #endif | 1216 | #endif |
1234 | 1217 | ||
1235 | /* | 1218 | /* |
1236 | * Called in process context that interrupted by MCE and marked with | ||
1237 | * TIF_MCE_NOTIFY, just before returning to erroneous userland. | ||
1238 | * This code is allowed to sleep. | ||
1239 | * Attempt possible recovery such as calling the high level VM handler to | ||
1240 | * process any corrupted pages, and kill/signal current process if required. | ||
1241 | * Action required errors are handled here. | ||
1242 | */ | ||
1243 | void mce_notify_process(void) | ||
1244 | { | ||
1245 | unsigned long pfn; | ||
1246 | struct mce_info *mi = mce_find_info(); | ||
1247 | int flags = MF_ACTION_REQUIRED; | ||
1248 | |||
1249 | if (!mi) | ||
1250 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | ||
1251 | pfn = mi->paddr >> PAGE_SHIFT; | ||
1252 | |||
1253 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1254 | |||
1255 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1256 | mi->paddr); | ||
1257 | /* | ||
1258 | * We must call memory_failure() here even if the current process is | ||
1259 | * doomed. We still need to mark the page as poisoned and alert any | ||
1260 | * other users of the page. | ||
1261 | */ | ||
1262 | if (!mi->restartable) | ||
1263 | flags |= MF_MUST_KILL; | ||
1264 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1265 | pr_err("Memory error not recovered"); | ||
1266 | force_sig(SIGBUS, current); | ||
1267 | } | ||
1268 | mce_clear_info(mi); | ||
1269 | } | ||
1270 | |||
1271 | /* | ||
1272 | * Action optional processing happens here (picking up | 1219 | * Action optional processing happens here (picking up |
1273 | * from the list of faulting pages that do_machine_check() | 1220 | * from the list of faulting pages that do_machine_check() |
1274 | * placed into the "ring"). | 1221 | * placed into the "ring"). |
@@ -1503,7 +1450,7 @@ static void __mcheck_cpu_init_generic(void) | |||
1503 | bitmap_fill(all_banks, MAX_NR_BANKS); | 1450 | bitmap_fill(all_banks, MAX_NR_BANKS); |
1504 | machine_check_poll(MCP_UC | m_fl, &all_banks); | 1451 | machine_check_poll(MCP_UC | m_fl, &all_banks); |
1505 | 1452 | ||
1506 | set_in_cr4(X86_CR4_MCE); | 1453 | cr4_set_bits(X86_CR4_MCE); |
1507 | 1454 | ||
1508 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1455 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
1509 | if (cap & MCG_CTL_P) | 1456 | if (cap & MCG_CTL_P) |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index a3042989398c..737b0ad4e61a 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -8,6 +8,8 @@ | |||
8 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
9 | 9 | ||
10 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
11 | #include <asm/traps.h> | ||
12 | #include <asm/tlbflush.h> | ||
11 | #include <asm/mce.h> | 13 | #include <asm/mce.h> |
12 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
13 | 15 | ||
@@ -17,8 +19,11 @@ int mce_p5_enabled __read_mostly; | |||
17 | /* Machine check handler for Pentium class Intel CPUs: */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
18 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 21 | { |
22 | enum ctx_state prev_state; | ||
20 | u32 loaddr, hi, lotype; | 23 | u32 loaddr, hi, lotype; |
21 | 24 | ||
25 | prev_state = ist_enter(regs); | ||
26 | |||
22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 27 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
23 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | 28 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); |
24 | 29 | ||
@@ -33,6 +38,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) | |||
33 | } | 38 | } |
34 | 39 | ||
35 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 40 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
41 | |||
42 | ist_exit(regs, prev_state); | ||
36 | } | 43 | } |
37 | 44 | ||
38 | /* Set up machine check reporting for processors with Intel style MCE: */ | 45 | /* Set up machine check reporting for processors with Intel style MCE: */ |
@@ -59,7 +66,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | |||
59 | "Intel old style machine check architecture supported.\n"); | 66 | "Intel old style machine check architecture supported.\n"); |
60 | 67 | ||
61 | /* Enable MCE: */ | 68 | /* Enable MCE: */ |
62 | set_in_cr4(X86_CR4_MCE); | 69 | cr4_set_bits(X86_CR4_MCE); |
63 | printk(KERN_INFO | 70 | printk(KERN_INFO |
64 | "Intel old style machine check reporting enabled on CPU#%d.\n", | 71 | "Intel old style machine check reporting enabled on CPU#%d.\n", |
65 | smp_processor_id()); | 72 | smp_processor_id()); |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 7dc5564d0cdf..44f138296fbe 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -7,14 +7,20 @@ | |||
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | 8 | ||
9 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
10 | #include <asm/traps.h> | ||
11 | #include <asm/tlbflush.h> | ||
10 | #include <asm/mce.h> | 12 | #include <asm/mce.h> |
11 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
12 | 14 | ||
13 | /* Machine check handler for WinChip C6: */ | 15 | /* Machine check handler for WinChip C6: */ |
14 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
15 | { | 17 | { |
18 | enum ctx_state prev_state = ist_enter(regs); | ||
19 | |||
16 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
17 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 21 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
22 | |||
23 | ist_exit(regs, prev_state); | ||
18 | } | 24 | } |
19 | 25 | ||
20 | /* Set up machine check reporting on the Winchip C6 series */ | 26 | /* Set up machine check reporting on the Winchip C6 series */ |
@@ -31,7 +37,7 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) | |||
31 | lo &= ~(1<<4); /* Enable MCE */ | 37 | lo &= ~(1<<4); /* Enable MCE */ |
32 | wrmsr(MSR_IDT_FCR1, lo, hi); | 38 | wrmsr(MSR_IDT_FCR1, lo, hi); |
33 | 39 | ||
34 | set_in_cr4(X86_CR4_MCE); | 40 | cr4_set_bits(X86_CR4_MCE); |
35 | 41 | ||
36 | printk(KERN_INFO | 42 | printk(KERN_INFO |
37 | "Winchip machine check reporting enabled on CPU#0.\n"); | 43 | "Winchip machine check reporting enabled on CPU#0.\n"); |
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 15c29096136b..36a83617eb21 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
@@ -552,7 +552,7 @@ static int __init microcode_init(void) | |||
552 | int error; | 552 | int error; |
553 | 553 | ||
554 | if (paravirt_enabled() || dis_ucode_ldr) | 554 | if (paravirt_enabled() || dis_ucode_ldr) |
555 | return 0; | 555 | return -EINVAL; |
556 | 556 | ||
557 | if (c->x86_vendor == X86_VENDOR_INTEL) | 557 | if (c->x86_vendor == X86_VENDOR_INTEL) |
558 | microcode_ops = init_intel_microcode(); | 558 | microcode_ops = init_intel_microcode(); |
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index c6826d1e8082..746e7fd08aad 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
196 | struct microcode_header_intel mc_header; | 196 | struct microcode_header_intel mc_header; |
197 | unsigned int mc_size; | 197 | unsigned int mc_size; |
198 | 198 | ||
199 | if (leftover < sizeof(mc_header)) { | ||
200 | pr_err("error! Truncated header in microcode data file\n"); | ||
201 | break; | ||
202 | } | ||
203 | |||
199 | if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) | 204 | if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) |
200 | break; | 205 | break; |
201 | 206 | ||
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index ec9df6f9cd47..420eb933189c 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c | |||
@@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start, | |||
321 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; | 321 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; |
322 | int i; | 322 | int i; |
323 | 323 | ||
324 | while (leftover) { | 324 | while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { |
325 | |||
326 | if (leftover < sizeof(mc_header)) | ||
327 | break; | ||
328 | |||
325 | mc_header = (struct microcode_header_intel *)ucode_ptr; | 329 | mc_header = (struct microcode_header_intel *)ucode_ptr; |
326 | 330 | ||
327 | mc_size = get_totalsize(mc_header); | 331 | mc_size = get_totalsize(mc_header); |
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 9e451b0876b5..f8c81ba0b465 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -138,8 +138,8 @@ static void prepare_set(void) | |||
138 | 138 | ||
139 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 139 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
140 | if (cpu_has_pge) { | 140 | if (cpu_has_pge) { |
141 | cr4 = read_cr4(); | 141 | cr4 = __read_cr4(); |
142 | write_cr4(cr4 & ~X86_CR4_PGE); | 142 | __write_cr4(cr4 & ~X86_CR4_PGE); |
143 | } | 143 | } |
144 | 144 | ||
145 | /* | 145 | /* |
@@ -171,7 +171,7 @@ static void post_set(void) | |||
171 | 171 | ||
172 | /* Restore value of CR4 */ | 172 | /* Restore value of CR4 */ |
173 | if (cpu_has_pge) | 173 | if (cpu_has_pge) |
174 | write_cr4(cr4); | 174 | __write_cr4(cr4); |
175 | } | 175 | } |
176 | 176 | ||
177 | static void cyrix_set_arr(unsigned int reg, unsigned long base, | 177 | static void cyrix_set_arr(unsigned int reg, unsigned long base, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0e25a1bc5ab5..7d74f7b3c6ba 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
678 | 678 | ||
679 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 679 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
680 | if (cpu_has_pge) { | 680 | if (cpu_has_pge) { |
681 | cr4 = read_cr4(); | 681 | cr4 = __read_cr4(); |
682 | write_cr4(cr4 & ~X86_CR4_PGE); | 682 | __write_cr4(cr4 & ~X86_CR4_PGE); |
683 | } | 683 | } |
684 | 684 | ||
685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | 685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock) | |||
708 | 708 | ||
709 | /* Restore value of CR4 */ | 709 | /* Restore value of CR4 */ |
710 | if (cpu_has_pge) | 710 | if (cpu_has_pge) |
711 | write_cr4(cr4); | 711 | __write_cr4(cr4); |
712 | raw_spin_unlock(&set_atomicity_lock); | 712 | raw_spin_unlock(&set_atomicity_lock); |
713 | } | 713 | } |
714 | 714 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 143e5f5dc855..b71a7f86d68a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/mmu_context.h> | ||
35 | #include <asm/tlbflush.h> | ||
34 | #include <asm/timer.h> | 36 | #include <asm/timer.h> |
35 | #include <asm/desc.h> | 37 | #include <asm/desc.h> |
36 | #include <asm/ldt.h> | 38 | #include <asm/ldt.h> |
@@ -43,6 +45,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | |||
43 | .enabled = 1, | 45 | .enabled = 1, |
44 | }; | 46 | }; |
45 | 47 | ||
48 | struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE; | ||
49 | |||
46 | u64 __read_mostly hw_cache_event_ids | 50 | u64 __read_mostly hw_cache_event_ids |
47 | [PERF_COUNT_HW_CACHE_MAX] | 51 | [PERF_COUNT_HW_CACHE_MAX] |
48 | [PERF_COUNT_HW_CACHE_OP_MAX] | 52 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -1327,8 +1331,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1327 | break; | 1331 | break; |
1328 | 1332 | ||
1329 | case CPU_STARTING: | 1333 | case CPU_STARTING: |
1330 | if (x86_pmu.attr_rdpmc) | ||
1331 | set_in_cr4(X86_CR4_PCE); | ||
1332 | if (x86_pmu.cpu_starting) | 1334 | if (x86_pmu.cpu_starting) |
1333 | x86_pmu.cpu_starting(cpu); | 1335 | x86_pmu.cpu_starting(cpu); |
1334 | break; | 1336 | break; |
@@ -1804,14 +1806,44 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
1804 | event->destroy(event); | 1806 | event->destroy(event); |
1805 | } | 1807 | } |
1806 | 1808 | ||
1809 | if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) | ||
1810 | event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; | ||
1811 | |||
1807 | return err; | 1812 | return err; |
1808 | } | 1813 | } |
1809 | 1814 | ||
1815 | static void refresh_pce(void *ignored) | ||
1816 | { | ||
1817 | if (current->mm) | ||
1818 | load_mm_cr4(current->mm); | ||
1819 | } | ||
1820 | |||
1821 | static void x86_pmu_event_mapped(struct perf_event *event) | ||
1822 | { | ||
1823 | if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) | ||
1824 | return; | ||
1825 | |||
1826 | if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) | ||
1827 | on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); | ||
1828 | } | ||
1829 | |||
1830 | static void x86_pmu_event_unmapped(struct perf_event *event) | ||
1831 | { | ||
1832 | if (!current->mm) | ||
1833 | return; | ||
1834 | |||
1835 | if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) | ||
1836 | return; | ||
1837 | |||
1838 | if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) | ||
1839 | on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); | ||
1840 | } | ||
1841 | |||
1810 | static int x86_pmu_event_idx(struct perf_event *event) | 1842 | static int x86_pmu_event_idx(struct perf_event *event) |
1811 | { | 1843 | { |
1812 | int idx = event->hw.idx; | 1844 | int idx = event->hw.idx; |
1813 | 1845 | ||
1814 | if (!x86_pmu.attr_rdpmc) | 1846 | if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) |
1815 | return 0; | 1847 | return 0; |
1816 | 1848 | ||
1817 | if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { | 1849 | if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { |
@@ -1829,16 +1861,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev, | |||
1829 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | 1861 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); |
1830 | } | 1862 | } |
1831 | 1863 | ||
1832 | static void change_rdpmc(void *info) | ||
1833 | { | ||
1834 | bool enable = !!(unsigned long)info; | ||
1835 | |||
1836 | if (enable) | ||
1837 | set_in_cr4(X86_CR4_PCE); | ||
1838 | else | ||
1839 | clear_in_cr4(X86_CR4_PCE); | ||
1840 | } | ||
1841 | |||
1842 | static ssize_t set_attr_rdpmc(struct device *cdev, | 1864 | static ssize_t set_attr_rdpmc(struct device *cdev, |
1843 | struct device_attribute *attr, | 1865 | struct device_attribute *attr, |
1844 | const char *buf, size_t count) | 1866 | const char *buf, size_t count) |
@@ -1850,14 +1872,27 @@ static ssize_t set_attr_rdpmc(struct device *cdev, | |||
1850 | if (ret) | 1872 | if (ret) |
1851 | return ret; | 1873 | return ret; |
1852 | 1874 | ||
1875 | if (val > 2) | ||
1876 | return -EINVAL; | ||
1877 | |||
1853 | if (x86_pmu.attr_rdpmc_broken) | 1878 | if (x86_pmu.attr_rdpmc_broken) |
1854 | return -ENOTSUPP; | 1879 | return -ENOTSUPP; |
1855 | 1880 | ||
1856 | if (!!val != !!x86_pmu.attr_rdpmc) { | 1881 | if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) { |
1857 | x86_pmu.attr_rdpmc = !!val; | 1882 | /* |
1858 | on_each_cpu(change_rdpmc, (void *)val, 1); | 1883 | * Changing into or out of always available, aka |
1884 | * perf-event-bypassing mode. This path is extremely slow, | ||
1885 | * but only root can trigger it, so it's okay. | ||
1886 | */ | ||
1887 | if (val == 2) | ||
1888 | static_key_slow_inc(&rdpmc_always_available); | ||
1889 | else | ||
1890 | static_key_slow_dec(&rdpmc_always_available); | ||
1891 | on_each_cpu(refresh_pce, NULL, 1); | ||
1859 | } | 1892 | } |
1860 | 1893 | ||
1894 | x86_pmu.attr_rdpmc = val; | ||
1895 | |||
1861 | return count; | 1896 | return count; |
1862 | } | 1897 | } |
1863 | 1898 | ||
@@ -1900,6 +1935,9 @@ static struct pmu pmu = { | |||
1900 | 1935 | ||
1901 | .event_init = x86_pmu_event_init, | 1936 | .event_init = x86_pmu_event_init, |
1902 | 1937 | ||
1938 | .event_mapped = x86_pmu_event_mapped, | ||
1939 | .event_unmapped = x86_pmu_event_unmapped, | ||
1940 | |||
1903 | .add = x86_pmu_add, | 1941 | .add = x86_pmu_add, |
1904 | .del = x86_pmu_del, | 1942 | .del = x86_pmu_del, |
1905 | .start = x86_pmu_start, | 1943 | .start = x86_pmu_start, |
@@ -1914,13 +1952,15 @@ static struct pmu pmu = { | |||
1914 | .flush_branch_stack = x86_pmu_flush_branch_stack, | 1952 | .flush_branch_stack = x86_pmu_flush_branch_stack, |
1915 | }; | 1953 | }; |
1916 | 1954 | ||
1917 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 1955 | void arch_perf_update_userpage(struct perf_event *event, |
1956 | struct perf_event_mmap_page *userpg, u64 now) | ||
1918 | { | 1957 | { |
1919 | struct cyc2ns_data *data; | 1958 | struct cyc2ns_data *data; |
1920 | 1959 | ||
1921 | userpg->cap_user_time = 0; | 1960 | userpg->cap_user_time = 0; |
1922 | userpg->cap_user_time_zero = 0; | 1961 | userpg->cap_user_time_zero = 0; |
1923 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; | 1962 | userpg->cap_user_rdpmc = |
1963 | !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); | ||
1924 | userpg->pmc_width = x86_pmu.cntval_bits; | 1964 | userpg->pmc_width = x86_pmu.cntval_bits; |
1925 | 1965 | ||
1926 | if (!sched_clock_stable()) | 1966 | if (!sched_clock_stable()) |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 4e6cdb0ddc70..df525d2be1e8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -71,6 +71,8 @@ struct event_constraint { | |||
71 | #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ | 71 | #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ |
72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ | 72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ |
73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ | 73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ |
74 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ | ||
75 | |||
74 | 76 | ||
75 | struct amd_nb { | 77 | struct amd_nb { |
76 | int nb_id; /* NorthBridge id */ | 78 | int nb_id; /* NorthBridge id */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 944bf019b74f..498b6d967138 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -2431,6 +2431,7 @@ __init int intel_pmu_init(void) | |||
2431 | break; | 2431 | break; |
2432 | 2432 | ||
2433 | case 55: /* 22nm Atom "Silvermont" */ | 2433 | case 55: /* 22nm Atom "Silvermont" */ |
2434 | case 76: /* 14nm Atom "Airmont" */ | ||
2434 | case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ | 2435 | case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ |
2435 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, | 2436 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, |
2436 | sizeof(hw_cache_event_ids)); | 2437 | sizeof(hw_cache_event_ids)); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 6e434f8e5fc8..c4bb8b8e5017 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -142,7 +142,7 @@ static inline u64 rapl_scale(u64 v) | |||
142 | * or use ldexp(count, -32). | 142 | * or use ldexp(count, -32). |
143 | * Watts = Joules/Time delta | 143 | * Watts = Joules/Time delta |
144 | */ | 144 | */ |
145 | return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); | 145 | return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); |
146 | } | 146 | } |
147 | 147 | ||
148 | static u64 rapl_event_update(struct perf_event *event) | 148 | static u64 rapl_event_update(struct perf_event *event) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 10b8d3eaaf15..c635b8b49e93 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -840,7 +840,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id | |||
840 | box->phys_id = phys_id; | 840 | box->phys_id = phys_id; |
841 | box->pci_dev = pdev; | 841 | box->pci_dev = pdev; |
842 | box->pmu = pmu; | 842 | box->pmu = pmu; |
843 | uncore_box_init(box); | ||
844 | pci_set_drvdata(pdev, box); | 843 | pci_set_drvdata(pdev, box); |
845 | 844 | ||
846 | raw_spin_lock(&uncore_box_lock); | 845 | raw_spin_lock(&uncore_box_lock); |
@@ -1004,10 +1003,8 @@ static int uncore_cpu_starting(int cpu) | |||
1004 | pmu = &type->pmus[j]; | 1003 | pmu = &type->pmus[j]; |
1005 | box = *per_cpu_ptr(pmu->box, cpu); | 1004 | box = *per_cpu_ptr(pmu->box, cpu); |
1006 | /* called by uncore_cpu_init? */ | 1005 | /* called by uncore_cpu_init? */ |
1007 | if (box && box->phys_id >= 0) { | 1006 | if (box && box->phys_id >= 0) |
1008 | uncore_box_init(box); | ||
1009 | continue; | 1007 | continue; |
1010 | } | ||
1011 | 1008 | ||
1012 | for_each_online_cpu(k) { | 1009 | for_each_online_cpu(k) { |
1013 | exist = *per_cpu_ptr(pmu->box, k); | 1010 | exist = *per_cpu_ptr(pmu->box, k); |
@@ -1023,10 +1020,8 @@ static int uncore_cpu_starting(int cpu) | |||
1023 | } | 1020 | } |
1024 | } | 1021 | } |
1025 | 1022 | ||
1026 | if (box) { | 1023 | if (box) |
1027 | box->phys_id = phys_id; | 1024 | box->phys_id = phys_id; |
1028 | uncore_box_init(box); | ||
1029 | } | ||
1030 | } | 1025 | } |
1031 | } | 1026 | } |
1032 | return 0; | 1027 | return 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 863d9b02563e..6c8c1e7e69d8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -257,6 +257,14 @@ static inline int uncore_num_counters(struct intel_uncore_box *box) | |||
257 | return box->pmu->type->num_counters; | 257 | return box->pmu->type->num_counters; |
258 | } | 258 | } |
259 | 259 | ||
260 | static inline void uncore_box_init(struct intel_uncore_box *box) | ||
261 | { | ||
262 | if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { | ||
263 | if (box->pmu->type->ops->init_box) | ||
264 | box->pmu->type->ops->init_box(box); | ||
265 | } | ||
266 | } | ||
267 | |||
260 | static inline void uncore_disable_box(struct intel_uncore_box *box) | 268 | static inline void uncore_disable_box(struct intel_uncore_box *box) |
261 | { | 269 | { |
262 | if (box->pmu->type->ops->disable_box) | 270 | if (box->pmu->type->ops->disable_box) |
@@ -265,6 +273,8 @@ static inline void uncore_disable_box(struct intel_uncore_box *box) | |||
265 | 273 | ||
266 | static inline void uncore_enable_box(struct intel_uncore_box *box) | 274 | static inline void uncore_enable_box(struct intel_uncore_box *box) |
267 | { | 275 | { |
276 | uncore_box_init(box); | ||
277 | |||
268 | if (box->pmu->type->ops->enable_box) | 278 | if (box->pmu->type->ops->enable_box) |
269 | box->pmu->type->ops->enable_box(box); | 279 | box->pmu->type->ops->enable_box(box); |
270 | } | 280 | } |
@@ -287,14 +297,6 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box, | |||
287 | return box->pmu->type->ops->read_counter(box, event); | 297 | return box->pmu->type->ops->read_counter(box, event); |
288 | } | 298 | } |
289 | 299 | ||
290 | static inline void uncore_box_init(struct intel_uncore_box *box) | ||
291 | { | ||
292 | if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { | ||
293 | if (box->pmu->type->ops->init_box) | ||
294 | box->pmu->type->ops->init_box(box); | ||
295 | } | ||
296 | } | ||
297 | |||
298 | static inline bool uncore_box_is_fake(struct intel_uncore_box *box) | 300 | static inline bool uncore_box_is_fake(struct intel_uncore_box *box) |
299 | { | 301 | { |
300 | return (box->phys_id < 0); | 302 | return (box->phys_id < 0); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b74ebc7c4402..cf3df1d8d039 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -265,7 +265,10 @@ int __die(const char *str, struct pt_regs *regs, long err) | |||
265 | printk("SMP "); | 265 | printk("SMP "); |
266 | #endif | 266 | #endif |
267 | #ifdef CONFIG_DEBUG_PAGEALLOC | 267 | #ifdef CONFIG_DEBUG_PAGEALLOC |
268 | printk("DEBUG_PAGEALLOC"); | 268 | printk("DEBUG_PAGEALLOC "); |
269 | #endif | ||
270 | #ifdef CONFIG_KASAN | ||
271 | printk("KASAN"); | ||
269 | #endif | 272 | #endif |
270 | printk("\n"); | 273 | printk("\n"); |
271 | if (notify_die(DIE_OOPS, str, regs, err, | 274 | if (notify_die(DIE_OOPS, str, regs, err, |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index dd2f07ae9d0c..46201deee923 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -184,9 +184,9 @@ void __init e820_print_map(char *who) | |||
184 | * overwritten in the same location, starting at biosmap. | 184 | * overwritten in the same location, starting at biosmap. |
185 | * | 185 | * |
186 | * The integer pointed to by pnr_map must be valid on entry (the | 186 | * The integer pointed to by pnr_map must be valid on entry (the |
187 | * current number of valid entries located at biosmap) and will | 187 | * current number of valid entries located at biosmap). If the |
188 | * be updated on return, with the new number of valid entries | 188 | * sanitizing succeeds the *pnr_map will be updated with the new |
189 | * (something no more than max_nr_map.) | 189 | * number of valid entries (something no more than max_nr_map). |
190 | * | 190 | * |
191 | * The return value from sanitize_e820_map() is zero if it | 191 | * The return value from sanitize_e820_map() is zero if it |
192 | * successfully 'sanitized' the map entries passed in, and is -1 | 192 | * successfully 'sanitized' the map entries passed in, and is -1 |
@@ -561,23 +561,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |||
561 | 561 | ||
562 | void __init update_e820(void) | 562 | void __init update_e820(void) |
563 | { | 563 | { |
564 | u32 nr_map; | 564 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map)) |
565 | |||
566 | nr_map = e820.nr_map; | ||
567 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) | ||
568 | return; | 565 | return; |
569 | e820.nr_map = nr_map; | ||
570 | printk(KERN_INFO "e820: modified physical RAM map:\n"); | 566 | printk(KERN_INFO "e820: modified physical RAM map:\n"); |
571 | e820_print_map("modified"); | 567 | e820_print_map("modified"); |
572 | } | 568 | } |
573 | static void __init update_e820_saved(void) | 569 | static void __init update_e820_saved(void) |
574 | { | 570 | { |
575 | u32 nr_map; | 571 | sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), |
576 | 572 | &e820_saved.nr_map); | |
577 | nr_map = e820_saved.nr_map; | ||
578 | if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) | ||
579 | return; | ||
580 | e820_saved.nr_map = nr_map; | ||
581 | } | 573 | } |
582 | #define MAX_GAP_END 0x100000000ull | 574 | #define MAX_GAP_END 0x100000000ull |
583 | /* | 575 | /* |
@@ -898,11 +890,9 @@ early_param("memmap", parse_memmap_opt); | |||
898 | void __init finish_e820_parsing(void) | 890 | void __init finish_e820_parsing(void) |
899 | { | 891 | { |
900 | if (userdef) { | 892 | if (userdef) { |
901 | u32 nr = e820.nr_map; | 893 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), |
902 | 894 | &e820.nr_map) < 0) | |
903 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) | ||
904 | early_panic("Invalid user supplied memory map"); | 895 | early_panic("Invalid user supplied memory map"); |
905 | e820.nr_map = nr; | ||
906 | 896 | ||
907 | printk(KERN_INFO "e820: user-defined physical RAM map:\n"); | 897 | printk(KERN_INFO "e820: user-defined physical RAM map:\n"); |
908 | e820_print_map("user"); | 898 | e820_print_map("user"); |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 01d1c187c9f9..a62536a1be88 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/usb/ehci_def.h> | 19 | #include <linux/usb/ehci_def.h> |
20 | #include <linux/efi.h> | 20 | #include <linux/efi.h> |
21 | #include <asm/efi.h> | 21 | #include <asm/efi.h> |
22 | #include <asm/pci_x86.h> | ||
22 | 23 | ||
23 | /* Simple VGA output */ | 24 | /* Simple VGA output */ |
24 | #define VGABASE (__ISA_IO_base + 0xb8000) | 25 | #define VGABASE (__ISA_IO_base + 0xb8000) |
@@ -76,7 +77,7 @@ static struct console early_vga_console = { | |||
76 | 77 | ||
77 | /* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ | 78 | /* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ |
78 | 79 | ||
79 | static int early_serial_base = 0x3f8; /* ttyS0 */ | 80 | static unsigned long early_serial_base = 0x3f8; /* ttyS0 */ |
80 | 81 | ||
81 | #define XMTRDY 0x20 | 82 | #define XMTRDY 0x20 |
82 | 83 | ||
@@ -94,13 +95,40 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ | |||
94 | #define DLL 0 /* Divisor Latch Low */ | 95 | #define DLL 0 /* Divisor Latch Low */ |
95 | #define DLH 1 /* Divisor latch High */ | 96 | #define DLH 1 /* Divisor latch High */ |
96 | 97 | ||
98 | static void mem32_serial_out(unsigned long addr, int offset, int value) | ||
99 | { | ||
100 | uint32_t *vaddr = (uint32_t *)addr; | ||
101 | /* shift implied by pointer type */ | ||
102 | writel(value, vaddr + offset); | ||
103 | } | ||
104 | |||
105 | static unsigned int mem32_serial_in(unsigned long addr, int offset) | ||
106 | { | ||
107 | uint32_t *vaddr = (uint32_t *)addr; | ||
108 | /* shift implied by pointer type */ | ||
109 | return readl(vaddr + offset); | ||
110 | } | ||
111 | |||
112 | static unsigned int io_serial_in(unsigned long addr, int offset) | ||
113 | { | ||
114 | return inb(addr + offset); | ||
115 | } | ||
116 | |||
117 | static void io_serial_out(unsigned long addr, int offset, int value) | ||
118 | { | ||
119 | outb(value, addr + offset); | ||
120 | } | ||
121 | |||
122 | static unsigned int (*serial_in)(unsigned long addr, int offset) = io_serial_in; | ||
123 | static void (*serial_out)(unsigned long addr, int offset, int value) = io_serial_out; | ||
124 | |||
97 | static int early_serial_putc(unsigned char ch) | 125 | static int early_serial_putc(unsigned char ch) |
98 | { | 126 | { |
99 | unsigned timeout = 0xffff; | 127 | unsigned timeout = 0xffff; |
100 | 128 | ||
101 | while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) | 129 | while ((serial_in(early_serial_base, LSR) & XMTRDY) == 0 && --timeout) |
102 | cpu_relax(); | 130 | cpu_relax(); |
103 | outb(ch, early_serial_base + TXR); | 131 | serial_out(early_serial_base, TXR, ch); |
104 | return timeout ? 0 : -1; | 132 | return timeout ? 0 : -1; |
105 | } | 133 | } |
106 | 134 | ||
@@ -114,13 +142,28 @@ static void early_serial_write(struct console *con, const char *s, unsigned n) | |||
114 | } | 142 | } |
115 | } | 143 | } |
116 | 144 | ||
145 | static __init void early_serial_hw_init(unsigned divisor) | ||
146 | { | ||
147 | unsigned char c; | ||
148 | |||
149 | serial_out(early_serial_base, LCR, 0x3); /* 8n1 */ | ||
150 | serial_out(early_serial_base, IER, 0); /* no interrupt */ | ||
151 | serial_out(early_serial_base, FCR, 0); /* no fifo */ | ||
152 | serial_out(early_serial_base, MCR, 0x3); /* DTR + RTS */ | ||
153 | |||
154 | c = serial_in(early_serial_base, LCR); | ||
155 | serial_out(early_serial_base, LCR, c | DLAB); | ||
156 | serial_out(early_serial_base, DLL, divisor & 0xff); | ||
157 | serial_out(early_serial_base, DLH, (divisor >> 8) & 0xff); | ||
158 | serial_out(early_serial_base, LCR, c & ~DLAB); | ||
159 | } | ||
160 | |||
117 | #define DEFAULT_BAUD 9600 | 161 | #define DEFAULT_BAUD 9600 |
118 | 162 | ||
119 | static __init void early_serial_init(char *s) | 163 | static __init void early_serial_init(char *s) |
120 | { | 164 | { |
121 | unsigned char c; | ||
122 | unsigned divisor; | 165 | unsigned divisor; |
123 | unsigned baud = DEFAULT_BAUD; | 166 | unsigned long baud = DEFAULT_BAUD; |
124 | char *e; | 167 | char *e; |
125 | 168 | ||
126 | if (*s == ',') | 169 | if (*s == ',') |
@@ -145,24 +188,124 @@ static __init void early_serial_init(char *s) | |||
145 | s++; | 188 | s++; |
146 | } | 189 | } |
147 | 190 | ||
148 | outb(0x3, early_serial_base + LCR); /* 8n1 */ | 191 | if (*s) { |
149 | outb(0, early_serial_base + IER); /* no interrupt */ | 192 | if (kstrtoul(s, 0, &baud) < 0 || baud == 0) |
150 | outb(0, early_serial_base + FCR); /* no fifo */ | 193 | baud = DEFAULT_BAUD; |
151 | outb(0x3, early_serial_base + MCR); /* DTR + RTS */ | 194 | } |
195 | |||
196 | /* Convert from baud to divisor value */ | ||
197 | divisor = 115200 / baud; | ||
198 | |||
199 | /* These will always be IO based ports */ | ||
200 | serial_in = io_serial_in; | ||
201 | serial_out = io_serial_out; | ||
202 | |||
203 | /* Set up the HW */ | ||
204 | early_serial_hw_init(divisor); | ||
205 | } | ||
206 | |||
207 | #ifdef CONFIG_PCI | ||
208 | /* | ||
209 | * early_pci_serial_init() | ||
210 | * | ||
211 | * This function is invoked when the early_printk param starts with "pciserial" | ||
212 | * The rest of the param should be ",B:D.F,baud" where B, D & F describe the | ||
213 | * location of a PCI device that must be a UART device. | ||
214 | */ | ||
215 | static __init void early_pci_serial_init(char *s) | ||
216 | { | ||
217 | unsigned divisor; | ||
218 | unsigned long baud = DEFAULT_BAUD; | ||
219 | u8 bus, slot, func; | ||
220 | uint32_t classcode, bar0; | ||
221 | uint16_t cmdreg; | ||
222 | char *e; | ||
223 | |||
224 | |||
225 | /* | ||
226 | * First, part the param to get the BDF values | ||
227 | */ | ||
228 | if (*s == ',') | ||
229 | ++s; | ||
230 | |||
231 | if (*s == 0) | ||
232 | return; | ||
233 | |||
234 | bus = (u8)simple_strtoul(s, &e, 16); | ||
235 | s = e; | ||
236 | if (*s != ':') | ||
237 | return; | ||
238 | ++s; | ||
239 | slot = (u8)simple_strtoul(s, &e, 16); | ||
240 | s = e; | ||
241 | if (*s != '.') | ||
242 | return; | ||
243 | ++s; | ||
244 | func = (u8)simple_strtoul(s, &e, 16); | ||
245 | s = e; | ||
246 | |||
247 | /* A baud might be following */ | ||
248 | if (*s == ',') | ||
249 | s++; | ||
250 | |||
251 | /* | ||
252 | * Second, find the device from the BDF | ||
253 | */ | ||
254 | cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND); | ||
255 | classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); | ||
256 | bar0 = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); | ||
257 | |||
258 | /* | ||
259 | * Verify it is a UART type device | ||
260 | */ | ||
261 | if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) && | ||
262 | (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) || | ||
263 | (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ | ||
264 | return; | ||
265 | |||
266 | /* | ||
267 | * Determine if it is IO or memory mapped | ||
268 | */ | ||
269 | if (bar0 & 0x01) { | ||
270 | /* it is IO mapped */ | ||
271 | serial_in = io_serial_in; | ||
272 | serial_out = io_serial_out; | ||
273 | early_serial_base = bar0&0xfffffffc; | ||
274 | write_pci_config(bus, slot, func, PCI_COMMAND, | ||
275 | cmdreg|PCI_COMMAND_IO); | ||
276 | } else { | ||
277 | /* It is memory mapped - assume 32-bit alignment */ | ||
278 | serial_in = mem32_serial_in; | ||
279 | serial_out = mem32_serial_out; | ||
280 | /* WARNING! assuming the address is always in the first 4G */ | ||
281 | early_serial_base = | ||
282 | (unsigned long)early_ioremap(bar0 & 0xfffffff0, 0x10); | ||
283 | write_pci_config(bus, slot, func, PCI_COMMAND, | ||
284 | cmdreg|PCI_COMMAND_MEMORY); | ||
285 | } | ||
152 | 286 | ||
287 | /* | ||
288 | * Lastly, initalize the hardware | ||
289 | */ | ||
153 | if (*s) { | 290 | if (*s) { |
154 | baud = simple_strtoul(s, &e, 0); | 291 | if (strcmp(s, "nocfg") == 0) |
155 | if (baud == 0 || s == e) | 292 | /* Sometimes, we want to leave the UART alone |
293 | * and assume the BIOS has set it up correctly. | ||
294 | * "nocfg" tells us this is the case, and we | ||
295 | * should do no more setup. | ||
296 | */ | ||
297 | return; | ||
298 | if (kstrtoul(s, 0, &baud) < 0 || baud == 0) | ||
156 | baud = DEFAULT_BAUD; | 299 | baud = DEFAULT_BAUD; |
157 | } | 300 | } |
158 | 301 | ||
302 | /* Convert from baud to divisor value */ | ||
159 | divisor = 115200 / baud; | 303 | divisor = 115200 / baud; |
160 | c = inb(early_serial_base + LCR); | 304 | |
161 | outb(c | DLAB, early_serial_base + LCR); | 305 | /* Set up the HW */ |
162 | outb(divisor & 0xff, early_serial_base + DLL); | 306 | early_serial_hw_init(divisor); |
163 | outb((divisor >> 8) & 0xff, early_serial_base + DLH); | ||
164 | outb(c & ~DLAB, early_serial_base + LCR); | ||
165 | } | 307 | } |
308 | #endif | ||
166 | 309 | ||
167 | static struct console early_serial_console = { | 310 | static struct console early_serial_console = { |
168 | .name = "earlyser", | 311 | .name = "earlyser", |
@@ -210,6 +353,13 @@ static int __init setup_early_printk(char *buf) | |||
210 | early_serial_init(buf + 4); | 353 | early_serial_init(buf + 4); |
211 | early_console_register(&early_serial_console, keep); | 354 | early_console_register(&early_serial_console, keep); |
212 | } | 355 | } |
356 | #ifdef CONFIG_PCI | ||
357 | if (!strncmp(buf, "pciserial", 9)) { | ||
358 | early_pci_serial_init(buf + 9); | ||
359 | early_console_register(&early_serial_console, keep); | ||
360 | buf += 9; /* Keep from match the above "serial" */ | ||
361 | } | ||
362 | #endif | ||
213 | if (!strncmp(buf, "vga", 3) && | 363 | if (!strncmp(buf, "vga", 3) && |
214 | boot_params.screen_info.orig_video_isVGA == 1) { | 364 | boot_params.screen_info.orig_video_isVGA == 1) { |
215 | max_xpos = boot_params.screen_info.orig_video_cols; | 365 | max_xpos = boot_params.screen_info.orig_video_cols; |
@@ -226,11 +376,6 @@ static int __init setup_early_printk(char *buf) | |||
226 | early_console_register(&xenboot_console, keep); | 376 | early_console_register(&xenboot_console, keep); |
227 | #endif | 377 | #endif |
228 | #ifdef CONFIG_EARLY_PRINTK_INTEL_MID | 378 | #ifdef CONFIG_EARLY_PRINTK_INTEL_MID |
229 | if (!strncmp(buf, "mrst", 4)) { | ||
230 | mrst_early_console_init(); | ||
231 | early_console_register(&early_mrst_console, keep); | ||
232 | } | ||
233 | |||
234 | if (!strncmp(buf, "hsu", 3)) { | 379 | if (!strncmp(buf, "hsu", 3)) { |
235 | hsu_early_console_init(buf + 3); | 380 | hsu_early_console_init(buf + 3); |
236 | early_console_register(&early_hsu_console, keep); | 381 | early_console_register(&early_hsu_console, keep); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 000d4199b03e..31e2d5bf3e38 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -982,6 +982,9 @@ ENTRY(xen_hypervisor_callback) | |||
982 | ENTRY(xen_do_upcall) | 982 | ENTRY(xen_do_upcall) |
983 | 1: mov %esp, %eax | 983 | 1: mov %esp, %eax |
984 | call xen_evtchn_do_upcall | 984 | call xen_evtchn_do_upcall |
985 | #ifndef CONFIG_PREEMPT | ||
986 | call xen_maybe_preempt_hcall | ||
987 | #endif | ||
985 | jmp ret_from_intr | 988 | jmp ret_from_intr |
986 | CFI_ENDPROC | 989 | CFI_ENDPROC |
987 | ENDPROC(xen_hypervisor_callback) | 990 | ENDPROC(xen_hypervisor_callback) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9ebaf63ba182..10074ad9ebf8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -143,7 +143,8 @@ ENDPROC(native_usergs_sysret64) | |||
143 | movq \tmp,RSP+\offset(%rsp) | 143 | movq \tmp,RSP+\offset(%rsp) |
144 | movq $__USER_DS,SS+\offset(%rsp) | 144 | movq $__USER_DS,SS+\offset(%rsp) |
145 | movq $__USER_CS,CS+\offset(%rsp) | 145 | movq $__USER_CS,CS+\offset(%rsp) |
146 | movq $-1,RCX+\offset(%rsp) | 146 | movq RIP+\offset(%rsp),\tmp /* get rip */ |
147 | movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ | ||
147 | movq R11+\offset(%rsp),\tmp /* get eflags */ | 148 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
148 | movq \tmp,EFLAGS+\offset(%rsp) | 149 | movq \tmp,EFLAGS+\offset(%rsp) |
149 | .endm | 150 | .endm |
@@ -155,27 +156,6 @@ ENDPROC(native_usergs_sysret64) | |||
155 | movq \tmp,R11+\offset(%rsp) | 156 | movq \tmp,R11+\offset(%rsp) |
156 | .endm | 157 | .endm |
157 | 158 | ||
158 | .macro FAKE_STACK_FRAME child_rip | ||
159 | /* push in order ss, rsp, eflags, cs, rip */ | ||
160 | xorl %eax, %eax | ||
161 | pushq_cfi $__KERNEL_DS /* ss */ | ||
162 | /*CFI_REL_OFFSET ss,0*/ | ||
163 | pushq_cfi %rax /* rsp */ | ||
164 | CFI_REL_OFFSET rsp,0 | ||
165 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */ | ||
166 | /*CFI_REL_OFFSET rflags,0*/ | ||
167 | pushq_cfi $__KERNEL_CS /* cs */ | ||
168 | /*CFI_REL_OFFSET cs,0*/ | ||
169 | pushq_cfi \child_rip /* rip */ | ||
170 | CFI_REL_OFFSET rip,0 | ||
171 | pushq_cfi %rax /* orig rax */ | ||
172 | .endm | ||
173 | |||
174 | .macro UNFAKE_STACK_FRAME | ||
175 | addq $8*6, %rsp | ||
176 | CFI_ADJUST_CFA_OFFSET -(6*8) | ||
177 | .endm | ||
178 | |||
179 | /* | 159 | /* |
180 | * initial frame state for interrupts (and exceptions without error code) | 160 | * initial frame state for interrupts (and exceptions without error code) |
181 | */ | 161 | */ |
@@ -238,51 +218,6 @@ ENDPROC(native_usergs_sysret64) | |||
238 | CFI_REL_OFFSET r15, R15+\offset | 218 | CFI_REL_OFFSET r15, R15+\offset |
239 | .endm | 219 | .endm |
240 | 220 | ||
241 | /* save partial stack frame */ | ||
242 | .macro SAVE_ARGS_IRQ | ||
243 | cld | ||
244 | /* start from rbp in pt_regs and jump over */ | ||
245 | movq_cfi rdi, (RDI-RBP) | ||
246 | movq_cfi rsi, (RSI-RBP) | ||
247 | movq_cfi rdx, (RDX-RBP) | ||
248 | movq_cfi rcx, (RCX-RBP) | ||
249 | movq_cfi rax, (RAX-RBP) | ||
250 | movq_cfi r8, (R8-RBP) | ||
251 | movq_cfi r9, (R9-RBP) | ||
252 | movq_cfi r10, (R10-RBP) | ||
253 | movq_cfi r11, (R11-RBP) | ||
254 | |||
255 | /* Save rbp so that we can unwind from get_irq_regs() */ | ||
256 | movq_cfi rbp, 0 | ||
257 | |||
258 | /* Save previous stack value */ | ||
259 | movq %rsp, %rsi | ||
260 | |||
261 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | ||
262 | testl $3, CS-RBP(%rsi) | ||
263 | je 1f | ||
264 | SWAPGS | ||
265 | /* | ||
266 | * irq_count is used to check if a CPU is already on an interrupt stack | ||
267 | * or not. While this is essentially redundant with preempt_count it is | ||
268 | * a little cheaper to use a separate counter in the PDA (short of | ||
269 | * moving irq_enter into assembly, which would be too much work) | ||
270 | */ | ||
271 | 1: incl PER_CPU_VAR(irq_count) | ||
272 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | ||
273 | CFI_DEF_CFA_REGISTER rsi | ||
274 | |||
275 | /* Store previous stack value */ | ||
276 | pushq %rsi | ||
277 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | ||
278 | 0x77 /* DW_OP_breg7 */, 0, \ | ||
279 | 0x06 /* DW_OP_deref */, \ | ||
280 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | ||
281 | 0x22 /* DW_OP_plus */ | ||
282 | /* We entered an interrupt context - irqs are off: */ | ||
283 | TRACE_IRQS_OFF | ||
284 | .endm | ||
285 | |||
286 | ENTRY(save_paranoid) | 221 | ENTRY(save_paranoid) |
287 | XCPT_FRAME 1 RDI+8 | 222 | XCPT_FRAME 1 RDI+8 |
288 | cld | 223 | cld |
@@ -426,15 +361,12 @@ system_call_fastpath: | |||
426 | * Has incomplete stack frame and undefined top of stack. | 361 | * Has incomplete stack frame and undefined top of stack. |
427 | */ | 362 | */ |
428 | ret_from_sys_call: | 363 | ret_from_sys_call: |
429 | movl $_TIF_ALLWORK_MASK,%edi | 364 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
430 | /* edi: flagmask */ | 365 | jnz int_ret_from_sys_call_fixup /* Go the the slow path */ |
431 | sysret_check: | 366 | |
432 | LOCKDEP_SYS_EXIT | 367 | LOCKDEP_SYS_EXIT |
433 | DISABLE_INTERRUPTS(CLBR_NONE) | 368 | DISABLE_INTERRUPTS(CLBR_NONE) |
434 | TRACE_IRQS_OFF | 369 | TRACE_IRQS_OFF |
435 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx | ||
436 | andl %edi,%edx | ||
437 | jnz sysret_careful | ||
438 | CFI_REMEMBER_STATE | 370 | CFI_REMEMBER_STATE |
439 | /* | 371 | /* |
440 | * sysretq will re-enable interrupts: | 372 | * sysretq will re-enable interrupts: |
@@ -448,49 +380,10 @@ sysret_check: | |||
448 | USERGS_SYSRET64 | 380 | USERGS_SYSRET64 |
449 | 381 | ||
450 | CFI_RESTORE_STATE | 382 | CFI_RESTORE_STATE |
451 | /* Handle reschedules */ | ||
452 | /* edx: work, edi: workmask */ | ||
453 | sysret_careful: | ||
454 | bt $TIF_NEED_RESCHED,%edx | ||
455 | jnc sysret_signal | ||
456 | TRACE_IRQS_ON | ||
457 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
458 | pushq_cfi %rdi | ||
459 | SCHEDULE_USER | ||
460 | popq_cfi %rdi | ||
461 | jmp sysret_check | ||
462 | 383 | ||
463 | /* Handle a signal */ | 384 | int_ret_from_sys_call_fixup: |
464 | sysret_signal: | ||
465 | TRACE_IRQS_ON | ||
466 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
467 | #ifdef CONFIG_AUDITSYSCALL | ||
468 | bt $TIF_SYSCALL_AUDIT,%edx | ||
469 | jc sysret_audit | ||
470 | #endif | ||
471 | /* | ||
472 | * We have a signal, or exit tracing or single-step. | ||
473 | * These all wind up with the iret return path anyway, | ||
474 | * so just join that path right now. | ||
475 | */ | ||
476 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET | 385 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET |
477 | jmp int_check_syscall_exit_work | 386 | jmp int_ret_from_sys_call |
478 | |||
479 | #ifdef CONFIG_AUDITSYSCALL | ||
480 | /* | ||
481 | * Return fast path for syscall audit. Call __audit_syscall_exit() | ||
482 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | ||
483 | * masked off. | ||
484 | */ | ||
485 | sysret_audit: | ||
486 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | ||
487 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ | ||
488 | setbe %al /* 1 if so, 0 if not */ | ||
489 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
490 | call __audit_syscall_exit | ||
491 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
492 | jmp sysret_check | ||
493 | #endif /* CONFIG_AUDITSYSCALL */ | ||
494 | 387 | ||
495 | /* Do syscall tracing */ | 388 | /* Do syscall tracing */ |
496 | tracesys: | 389 | tracesys: |
@@ -626,19 +519,6 @@ END(\label) | |||
626 | FORK_LIKE vfork | 519 | FORK_LIKE vfork |
627 | FIXED_FRAME stub_iopl, sys_iopl | 520 | FIXED_FRAME stub_iopl, sys_iopl |
628 | 521 | ||
629 | ENTRY(ptregscall_common) | ||
630 | DEFAULT_FRAME 1 8 /* offset 8: return address */ | ||
631 | RESTORE_TOP_OF_STACK %r11, 8 | ||
632 | movq_cfi_restore R15+8, r15 | ||
633 | movq_cfi_restore R14+8, r14 | ||
634 | movq_cfi_restore R13+8, r13 | ||
635 | movq_cfi_restore R12+8, r12 | ||
636 | movq_cfi_restore RBP+8, rbp | ||
637 | movq_cfi_restore RBX+8, rbx | ||
638 | ret $REST_SKIP /* pop extended registers */ | ||
639 | CFI_ENDPROC | ||
640 | END(ptregscall_common) | ||
641 | |||
642 | ENTRY(stub_execve) | 522 | ENTRY(stub_execve) |
643 | CFI_STARTPROC | 523 | CFI_STARTPROC |
644 | addq $8, %rsp | 524 | addq $8, %rsp |
@@ -779,7 +659,48 @@ END(interrupt) | |||
779 | /* reserve pt_regs for scratch regs and rbp */ | 659 | /* reserve pt_regs for scratch regs and rbp */ |
780 | subq $ORIG_RAX-RBP, %rsp | 660 | subq $ORIG_RAX-RBP, %rsp |
781 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | 661 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP |
782 | SAVE_ARGS_IRQ | 662 | cld |
663 | /* start from rbp in pt_regs and jump over */ | ||
664 | movq_cfi rdi, (RDI-RBP) | ||
665 | movq_cfi rsi, (RSI-RBP) | ||
666 | movq_cfi rdx, (RDX-RBP) | ||
667 | movq_cfi rcx, (RCX-RBP) | ||
668 | movq_cfi rax, (RAX-RBP) | ||
669 | movq_cfi r8, (R8-RBP) | ||
670 | movq_cfi r9, (R9-RBP) | ||
671 | movq_cfi r10, (R10-RBP) | ||
672 | movq_cfi r11, (R11-RBP) | ||
673 | |||
674 | /* Save rbp so that we can unwind from get_irq_regs() */ | ||
675 | movq_cfi rbp, 0 | ||
676 | |||
677 | /* Save previous stack value */ | ||
678 | movq %rsp, %rsi | ||
679 | |||
680 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | ||
681 | testl $3, CS-RBP(%rsi) | ||
682 | je 1f | ||
683 | SWAPGS | ||
684 | /* | ||
685 | * irq_count is used to check if a CPU is already on an interrupt stack | ||
686 | * or not. While this is essentially redundant with preempt_count it is | ||
687 | * a little cheaper to use a separate counter in the PDA (short of | ||
688 | * moving irq_enter into assembly, which would be too much work) | ||
689 | */ | ||
690 | 1: incl PER_CPU_VAR(irq_count) | ||
691 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | ||
692 | CFI_DEF_CFA_REGISTER rsi | ||
693 | |||
694 | /* Store previous stack value */ | ||
695 | pushq %rsi | ||
696 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | ||
697 | 0x77 /* DW_OP_breg7 */, 0, \ | ||
698 | 0x06 /* DW_OP_deref */, \ | ||
699 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | ||
700 | 0x22 /* DW_OP_plus */ | ||
701 | /* We entered an interrupt context - irqs are off: */ | ||
702 | TRACE_IRQS_OFF | ||
703 | |||
783 | call \func | 704 | call \func |
784 | .endm | 705 | .endm |
785 | 706 | ||
@@ -831,6 +752,60 @@ retint_swapgs: /* return to user-space */ | |||
831 | */ | 752 | */ |
832 | DISABLE_INTERRUPTS(CLBR_ANY) | 753 | DISABLE_INTERRUPTS(CLBR_ANY) |
833 | TRACE_IRQS_IRETQ | 754 | TRACE_IRQS_IRETQ |
755 | |||
756 | /* | ||
757 | * Try to use SYSRET instead of IRET if we're returning to | ||
758 | * a completely clean 64-bit userspace context. | ||
759 | */ | ||
760 | movq (RCX-R11)(%rsp), %rcx | ||
761 | cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */ | ||
762 | jne opportunistic_sysret_failed | ||
763 | |||
764 | /* | ||
765 | * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP | ||
766 | * in kernel space. This essentially lets the user take over | ||
767 | * the kernel, since userspace controls RSP. It's not worth | ||
768 | * testing for canonicalness exactly -- this check detects any | ||
769 | * of the 17 high bits set, which is true for non-canonical | ||
770 | * or kernel addresses. (This will pessimize vsyscall=native. | ||
771 | * Big deal.) | ||
772 | * | ||
773 | * If virtual addresses ever become wider, this will need | ||
774 | * to be updated to remain correct on both old and new CPUs. | ||
775 | */ | ||
776 | .ifne __VIRTUAL_MASK_SHIFT - 47 | ||
777 | .error "virtual address width changed -- sysret checks need update" | ||
778 | .endif | ||
779 | shr $__VIRTUAL_MASK_SHIFT, %rcx | ||
780 | jnz opportunistic_sysret_failed | ||
781 | |||
782 | cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */ | ||
783 | jne opportunistic_sysret_failed | ||
784 | |||
785 | movq (R11-ARGOFFSET)(%rsp), %r11 | ||
786 | cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ | ||
787 | jne opportunistic_sysret_failed | ||
788 | |||
789 | testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ | ||
790 | jnz opportunistic_sysret_failed | ||
791 | |||
792 | /* nothing to check for RSP */ | ||
793 | |||
794 | cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */ | ||
795 | jne opportunistic_sysret_failed | ||
796 | |||
797 | /* | ||
798 | * We win! This label is here just for ease of understanding | ||
799 | * perf profiles. Nothing jumps here. | ||
800 | */ | ||
801 | irq_return_via_sysret: | ||
802 | CFI_REMEMBER_STATE | ||
803 | RESTORE_ARGS 1,8,1 | ||
804 | movq (RSP-RIP)(%rsp),%rsp | ||
805 | USERGS_SYSRET64 | ||
806 | CFI_RESTORE_STATE | ||
807 | |||
808 | opportunistic_sysret_failed: | ||
834 | SWAPGS | 809 | SWAPGS |
835 | jmp restore_args | 810 | jmp restore_args |
836 | 811 | ||
@@ -1048,6 +1023,11 @@ ENTRY(\sym) | |||
1048 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1023 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1049 | 1024 | ||
1050 | .if \paranoid | 1025 | .if \paranoid |
1026 | .if \paranoid == 1 | ||
1027 | CFI_REMEMBER_STATE | ||
1028 | testl $3, CS(%rsp) /* If coming from userspace, switch */ | ||
1029 | jnz 1f /* stacks. */ | ||
1030 | .endif | ||
1051 | call save_paranoid | 1031 | call save_paranoid |
1052 | .else | 1032 | .else |
1053 | call error_entry | 1033 | call error_entry |
@@ -1088,6 +1068,36 @@ ENTRY(\sym) | |||
1088 | jmp error_exit /* %ebx: no swapgs flag */ | 1068 | jmp error_exit /* %ebx: no swapgs flag */ |
1089 | .endif | 1069 | .endif |
1090 | 1070 | ||
1071 | .if \paranoid == 1 | ||
1072 | CFI_RESTORE_STATE | ||
1073 | /* | ||
1074 | * Paranoid entry from userspace. Switch stacks and treat it | ||
1075 | * as a normal entry. This means that paranoid handlers | ||
1076 | * run in real process context if user_mode(regs). | ||
1077 | */ | ||
1078 | 1: | ||
1079 | call error_entry | ||
1080 | |||
1081 | DEFAULT_FRAME 0 | ||
1082 | |||
1083 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1084 | call sync_regs | ||
1085 | movq %rax,%rsp /* switch stack */ | ||
1086 | |||
1087 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1088 | |||
1089 | .if \has_error_code | ||
1090 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
1091 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | ||
1092 | .else | ||
1093 | xorl %esi,%esi /* no error code */ | ||
1094 | .endif | ||
1095 | |||
1096 | call \do_sym | ||
1097 | |||
1098 | jmp error_exit /* %ebx: no swapgs flag */ | ||
1099 | .endif | ||
1100 | |||
1091 | CFI_ENDPROC | 1101 | CFI_ENDPROC |
1092 | END(\sym) | 1102 | END(\sym) |
1093 | .endm | 1103 | .endm |
@@ -1108,7 +1118,7 @@ idtentry overflow do_overflow has_error_code=0 | |||
1108 | idtentry bounds do_bounds has_error_code=0 | 1118 | idtentry bounds do_bounds has_error_code=0 |
1109 | idtentry invalid_op do_invalid_op has_error_code=0 | 1119 | idtentry invalid_op do_invalid_op has_error_code=0 |
1110 | idtentry device_not_available do_device_not_available has_error_code=0 | 1120 | idtentry device_not_available do_device_not_available has_error_code=0 |
1111 | idtentry double_fault do_double_fault has_error_code=1 paranoid=1 | 1121 | idtentry double_fault do_double_fault has_error_code=1 paranoid=2 |
1112 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 | 1122 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
1113 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 | 1123 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
1114 | idtentry segment_not_present do_segment_not_present has_error_code=1 | 1124 | idtentry segment_not_present do_segment_not_present has_error_code=1 |
@@ -1198,6 +1208,9 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | |||
1198 | popq %rsp | 1208 | popq %rsp |
1199 | CFI_DEF_CFA_REGISTER rsp | 1209 | CFI_DEF_CFA_REGISTER rsp |
1200 | decl PER_CPU_VAR(irq_count) | 1210 | decl PER_CPU_VAR(irq_count) |
1211 | #ifndef CONFIG_PREEMPT | ||
1212 | call xen_maybe_preempt_hcall | ||
1213 | #endif | ||
1201 | jmp error_exit | 1214 | jmp error_exit |
1202 | CFI_ENDPROC | 1215 | CFI_ENDPROC |
1203 | END(xen_do_hypervisor_callback) | 1216 | END(xen_do_hypervisor_callback) |
@@ -1289,16 +1302,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( | |||
1289 | #endif | 1302 | #endif |
1290 | 1303 | ||
1291 | /* | 1304 | /* |
1292 | * "Paranoid" exit path from exception stack. | 1305 | * "Paranoid" exit path from exception stack. This is invoked |
1293 | * Paranoid because this is used by NMIs and cannot take | 1306 | * only on return from non-NMI IST interrupts that came |
1294 | * any kernel state for granted. | 1307 | * from kernel space. |
1295 | * We don't do kernel preemption checks here, because only | ||
1296 | * NMI should be common and it does not enable IRQs and | ||
1297 | * cannot get reschedule ticks. | ||
1298 | * | 1308 | * |
1299 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1309 | * We may be returning to very strange contexts (e.g. very early |
1300 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1310 | * in syscall entry), so checking for preemption here would |
1301 | * hard flags at once, atomically) | 1311 | * be complicated. Fortunately, we there's no good reason |
1312 | * to try to handle preemption here. | ||
1302 | */ | 1313 | */ |
1303 | 1314 | ||
1304 | /* ebx: no swapgs flag */ | 1315 | /* ebx: no swapgs flag */ |
@@ -1308,43 +1319,14 @@ ENTRY(paranoid_exit) | |||
1308 | TRACE_IRQS_OFF_DEBUG | 1319 | TRACE_IRQS_OFF_DEBUG |
1309 | testl %ebx,%ebx /* swapgs needed? */ | 1320 | testl %ebx,%ebx /* swapgs needed? */ |
1310 | jnz paranoid_restore | 1321 | jnz paranoid_restore |
1311 | testl $3,CS(%rsp) | ||
1312 | jnz paranoid_userspace | ||
1313 | paranoid_swapgs: | ||
1314 | TRACE_IRQS_IRETQ 0 | 1322 | TRACE_IRQS_IRETQ 0 |
1315 | SWAPGS_UNSAFE_STACK | 1323 | SWAPGS_UNSAFE_STACK |
1316 | RESTORE_ALL 8 | 1324 | RESTORE_ALL 8 |
1317 | jmp irq_return | 1325 | INTERRUPT_RETURN |
1318 | paranoid_restore: | 1326 | paranoid_restore: |
1319 | TRACE_IRQS_IRETQ_DEBUG 0 | 1327 | TRACE_IRQS_IRETQ_DEBUG 0 |
1320 | RESTORE_ALL 8 | 1328 | RESTORE_ALL 8 |
1321 | jmp irq_return | 1329 | INTERRUPT_RETURN |
1322 | paranoid_userspace: | ||
1323 | GET_THREAD_INFO(%rcx) | ||
1324 | movl TI_flags(%rcx),%ebx | ||
1325 | andl $_TIF_WORK_MASK,%ebx | ||
1326 | jz paranoid_swapgs | ||
1327 | movq %rsp,%rdi /* &pt_regs */ | ||
1328 | call sync_regs | ||
1329 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1330 | testl $_TIF_NEED_RESCHED,%ebx | ||
1331 | jnz paranoid_schedule | ||
1332 | movl %ebx,%edx /* arg3: thread flags */ | ||
1333 | TRACE_IRQS_ON | ||
1334 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1335 | xorl %esi,%esi /* arg2: oldset */ | ||
1336 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1337 | call do_notify_resume | ||
1338 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1339 | TRACE_IRQS_OFF | ||
1340 | jmp paranoid_userspace | ||
1341 | paranoid_schedule: | ||
1342 | TRACE_IRQS_ON | ||
1343 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1344 | SCHEDULE_USER | ||
1345 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1346 | TRACE_IRQS_OFF | ||
1347 | jmp paranoid_userspace | ||
1348 | CFI_ENDPROC | 1330 | CFI_ENDPROC |
1349 | END(paranoid_exit) | 1331 | END(paranoid_exit) |
1350 | 1332 | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d6c1b9836995..2911ef3a9f1c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void) | |||
31 | 31 | ||
32 | asmlinkage __visible void __init i386_start_kernel(void) | 32 | asmlinkage __visible void __init i386_start_kernel(void) |
33 | { | 33 | { |
34 | cr4_init_shadow(); | ||
34 | sanitize_boot_params(&boot_params); | 35 | sanitize_boot_params(&boot_params); |
35 | 36 | ||
36 | /* Call the subarch specific early setup function */ | 37 | /* Call the subarch specific early setup function */ |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index eda1a865641e..c4f8d4659070 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/bootparam_utils.h> | 28 | #include <asm/bootparam_utils.h> |
29 | #include <asm/microcode.h> | 29 | #include <asm/microcode.h> |
30 | #include <asm/kasan.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Manage page tables very early on. | 33 | * Manage page tables very early on. |
@@ -46,7 +47,7 @@ static void __init reset_early_page_tables(void) | |||
46 | 47 | ||
47 | next_early_pgt = 0; | 48 | next_early_pgt = 0; |
48 | 49 | ||
49 | write_cr3(__pa(early_level4_pgt)); | 50 | write_cr3(__pa_nodebug(early_level4_pgt)); |
50 | } | 51 | } |
51 | 52 | ||
52 | /* Create a new PMD entry */ | 53 | /* Create a new PMD entry */ |
@@ -59,7 +60,7 @@ int __init early_make_pgtable(unsigned long address) | |||
59 | pmdval_t pmd, *pmd_p; | 60 | pmdval_t pmd, *pmd_p; |
60 | 61 | ||
61 | /* Invalid address or early pgt is done ? */ | 62 | /* Invalid address or early pgt is done ? */ |
62 | if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) | 63 | if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt)) |
63 | return -1; | 64 | return -1; |
64 | 65 | ||
65 | again: | 66 | again: |
@@ -155,9 +156,13 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) | |||
155 | (__START_KERNEL & PGDIR_MASK))); | 156 | (__START_KERNEL & PGDIR_MASK))); |
156 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | 157 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); |
157 | 158 | ||
159 | cr4_init_shadow(); | ||
160 | |||
158 | /* Kill off the identity-map trampoline */ | 161 | /* Kill off the identity-map trampoline */ |
159 | reset_early_page_tables(); | 162 | reset_early_page_tables(); |
160 | 163 | ||
164 | kasan_map_early_shadow(early_level4_pgt); | ||
165 | |||
161 | /* clear bss before set_intr_gate with early_idt_handler */ | 166 | /* clear bss before set_intr_gate with early_idt_handler */ |
162 | clear_bss(); | 167 | clear_bss(); |
163 | 168 | ||
@@ -179,6 +184,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) | |||
179 | /* set init_level4_pgt kernel high mapping*/ | 184 | /* set init_level4_pgt kernel high mapping*/ |
180 | init_level4_pgt[511] = early_level4_pgt[511]; | 185 | init_level4_pgt[511] = early_level4_pgt[511]; |
181 | 186 | ||
187 | kasan_map_early_shadow(init_level4_pgt); | ||
188 | |||
182 | x86_64_start_reservations(real_mode_data); | 189 | x86_64_start_reservations(real_mode_data); |
183 | } | 190 | } |
184 | 191 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a468c0a65c42..6fd514d9f69a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -514,8 +514,38 @@ ENTRY(phys_base) | |||
514 | /* This must match the first entry in level2_kernel_pgt */ | 514 | /* This must match the first entry in level2_kernel_pgt */ |
515 | .quad 0x0000000000000000 | 515 | .quad 0x0000000000000000 |
516 | 516 | ||
517 | #ifdef CONFIG_KASAN | ||
518 | #define FILL(VAL, COUNT) \ | ||
519 | .rept (COUNT) ; \ | ||
520 | .quad (VAL) ; \ | ||
521 | .endr | ||
522 | |||
523 | NEXT_PAGE(kasan_zero_pte) | ||
524 | FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) | ||
525 | NEXT_PAGE(kasan_zero_pmd) | ||
526 | FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) | ||
527 | NEXT_PAGE(kasan_zero_pud) | ||
528 | FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) | ||
529 | |||
530 | #undef FILL | ||
531 | #endif | ||
532 | |||
533 | |||
517 | #include "../../x86/xen/xen-head.S" | 534 | #include "../../x86/xen/xen-head.S" |
518 | 535 | ||
519 | __PAGE_ALIGNED_BSS | 536 | __PAGE_ALIGNED_BSS |
520 | NEXT_PAGE(empty_zero_page) | 537 | NEXT_PAGE(empty_zero_page) |
521 | .skip PAGE_SIZE | 538 | .skip PAGE_SIZE |
539 | |||
540 | #ifdef CONFIG_KASAN | ||
541 | /* | ||
542 | * This page used as early shadow. We don't use empty_zero_page | ||
543 | * at early stages, stack instrumentation could write some garbage | ||
544 | * to this page. | ||
545 | * Latter we reuse it as zero shadow for large ranges of memory | ||
546 | * that allowed to access, but not instrumented by kasan | ||
547 | * (vmalloc/vmemmap ...). | ||
548 | */ | ||
549 | NEXT_PAGE(kasan_zero_page) | ||
550 | .skip PAGE_SIZE | ||
551 | #endif | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 319bcb9372fe..3acbff4716b0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -168,7 +168,7 @@ static void _hpet_print_config(const char *function, int line) | |||
168 | #define hpet_print_config() \ | 168 | #define hpet_print_config() \ |
169 | do { \ | 169 | do { \ |
170 | if (hpet_verbose) \ | 170 | if (hpet_verbose) \ |
171 | _hpet_print_config(__FUNCTION__, __LINE__); \ | 171 | _hpet_print_config(__func__, __LINE__); \ |
172 | } while (0) | 172 | } while (0) |
173 | 173 | ||
174 | /* | 174 | /* |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 3d5fb509bdeb..7114ba220fd4 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) | |||
126 | *dr7 |= encode_dr7(i, info->len, info->type); | 126 | *dr7 |= encode_dr7(i, info->len, info->type); |
127 | 127 | ||
128 | set_debugreg(*dr7, 7); | 128 | set_debugreg(*dr7, 7); |
129 | if (info->mask) | ||
130 | set_dr_addr_mask(info->mask, i); | ||
129 | 131 | ||
130 | return 0; | 132 | return 0; |
131 | } | 133 | } |
@@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) | |||
161 | *dr7 &= ~__encode_dr7(i, info->len, info->type); | 163 | *dr7 &= ~__encode_dr7(i, info->len, info->type); |
162 | 164 | ||
163 | set_debugreg(*dr7, 7); | 165 | set_debugreg(*dr7, 7); |
164 | } | 166 | if (info->mask) |
165 | 167 | set_dr_addr_mask(0, i); | |
166 | static int get_hbp_len(u8 hbp_len) | ||
167 | { | ||
168 | unsigned int len_in_bytes = 0; | ||
169 | |||
170 | switch (hbp_len) { | ||
171 | case X86_BREAKPOINT_LEN_1: | ||
172 | len_in_bytes = 1; | ||
173 | break; | ||
174 | case X86_BREAKPOINT_LEN_2: | ||
175 | len_in_bytes = 2; | ||
176 | break; | ||
177 | case X86_BREAKPOINT_LEN_4: | ||
178 | len_in_bytes = 4; | ||
179 | break; | ||
180 | #ifdef CONFIG_X86_64 | ||
181 | case X86_BREAKPOINT_LEN_8: | ||
182 | len_in_bytes = 8; | ||
183 | break; | ||
184 | #endif | ||
185 | } | ||
186 | return len_in_bytes; | ||
187 | } | 168 | } |
188 | 169 | ||
189 | /* | 170 | /* |
@@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) | |||
196 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 177 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
197 | 178 | ||
198 | va = info->address; | 179 | va = info->address; |
199 | len = get_hbp_len(info->len); | 180 | len = bp->attr.bp_len; |
200 | 181 | ||
201 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 182 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
202 | } | 183 | } |
@@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
277 | } | 258 | } |
278 | 259 | ||
279 | /* Len */ | 260 | /* Len */ |
261 | info->mask = 0; | ||
262 | |||
280 | switch (bp->attr.bp_len) { | 263 | switch (bp->attr.bp_len) { |
281 | case HW_BREAKPOINT_LEN_1: | 264 | case HW_BREAKPOINT_LEN_1: |
282 | info->len = X86_BREAKPOINT_LEN_1; | 265 | info->len = X86_BREAKPOINT_LEN_1; |
@@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
293 | break; | 276 | break; |
294 | #endif | 277 | #endif |
295 | default: | 278 | default: |
296 | return -EINVAL; | 279 | if (!is_power_of_2(bp->attr.bp_len)) |
280 | return -EINVAL; | ||
281 | if (!cpu_has_bpext) | ||
282 | return -EOPNOTSUPP; | ||
283 | info->mask = bp->attr.bp_len - 1; | ||
284 | info->len = X86_BREAKPOINT_LEN_1; | ||
297 | } | 285 | } |
298 | 286 | ||
299 | return 0; | 287 | return 0; |
300 | } | 288 | } |
289 | |||
301 | /* | 290 | /* |
302 | * Validate the arch-specific HW Breakpoint register settings | 291 | * Validate the arch-specific HW Breakpoint register settings |
303 | */ | 292 | */ |
@@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
312 | if (ret) | 301 | if (ret) |
313 | return ret; | 302 | return ret; |
314 | 303 | ||
315 | ret = -EINVAL; | ||
316 | |||
317 | switch (info->len) { | 304 | switch (info->len) { |
318 | case X86_BREAKPOINT_LEN_1: | 305 | case X86_BREAKPOINT_LEN_1: |
319 | align = 0; | 306 | align = 0; |
307 | if (info->mask) | ||
308 | align = info->mask; | ||
320 | break; | 309 | break; |
321 | case X86_BREAKPOINT_LEN_2: | 310 | case X86_BREAKPOINT_LEN_2: |
322 | align = 1; | 311 | align = 1; |
@@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
330 | break; | 319 | break; |
331 | #endif | 320 | #endif |
332 | default: | 321 | default: |
333 | return ret; | 322 | WARN_ON_ONCE(1); |
334 | } | 323 | } |
335 | 324 | ||
336 | /* | 325 | /* |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a9a4229f6161..d5651fce0b71 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -13,12 +13,26 @@ | |||
13 | #include <asm/sigcontext.h> | 13 | #include <asm/sigcontext.h> |
14 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
15 | #include <asm/math_emu.h> | 15 | #include <asm/math_emu.h> |
16 | #include <asm/tlbflush.h> | ||
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
17 | #include <asm/ptrace.h> | 18 | #include <asm/ptrace.h> |
18 | #include <asm/i387.h> | 19 | #include <asm/i387.h> |
19 | #include <asm/fpu-internal.h> | 20 | #include <asm/fpu-internal.h> |
20 | #include <asm/user.h> | 21 | #include <asm/user.h> |
21 | 22 | ||
23 | static DEFINE_PER_CPU(bool, in_kernel_fpu); | ||
24 | |||
25 | void kernel_fpu_disable(void) | ||
26 | { | ||
27 | WARN_ON(this_cpu_read(in_kernel_fpu)); | ||
28 | this_cpu_write(in_kernel_fpu, true); | ||
29 | } | ||
30 | |||
31 | void kernel_fpu_enable(void) | ||
32 | { | ||
33 | this_cpu_write(in_kernel_fpu, false); | ||
34 | } | ||
35 | |||
22 | /* | 36 | /* |
23 | * Were we in an interrupt that interrupted kernel mode? | 37 | * Were we in an interrupt that interrupted kernel mode? |
24 | * | 38 | * |
@@ -33,6 +47,9 @@ | |||
33 | */ | 47 | */ |
34 | static inline bool interrupted_kernel_fpu_idle(void) | 48 | static inline bool interrupted_kernel_fpu_idle(void) |
35 | { | 49 | { |
50 | if (this_cpu_read(in_kernel_fpu)) | ||
51 | return false; | ||
52 | |||
36 | if (use_eager_fpu()) | 53 | if (use_eager_fpu()) |
37 | return __thread_has_fpu(current); | 54 | return __thread_has_fpu(current); |
38 | 55 | ||
@@ -73,10 +90,10 @@ void __kernel_fpu_begin(void) | |||
73 | { | 90 | { |
74 | struct task_struct *me = current; | 91 | struct task_struct *me = current; |
75 | 92 | ||
93 | this_cpu_write(in_kernel_fpu, true); | ||
94 | |||
76 | if (__thread_has_fpu(me)) { | 95 | if (__thread_has_fpu(me)) { |
77 | __thread_clear_has_fpu(me); | ||
78 | __save_init_fpu(me); | 96 | __save_init_fpu(me); |
79 | /* We do 'stts()' in __kernel_fpu_end() */ | ||
80 | } else if (!use_eager_fpu()) { | 97 | } else if (!use_eager_fpu()) { |
81 | this_cpu_write(fpu_owner_task, NULL); | 98 | this_cpu_write(fpu_owner_task, NULL); |
82 | clts(); | 99 | clts(); |
@@ -86,19 +103,16 @@ EXPORT_SYMBOL(__kernel_fpu_begin); | |||
86 | 103 | ||
87 | void __kernel_fpu_end(void) | 104 | void __kernel_fpu_end(void) |
88 | { | 105 | { |
89 | if (use_eager_fpu()) { | 106 | struct task_struct *me = current; |
90 | /* | 107 | |
91 | * For eager fpu, most the time, tsk_used_math() is true. | 108 | if (__thread_has_fpu(me)) { |
92 | * Restore the user math as we are done with the kernel usage. | 109 | if (WARN_ON(restore_fpu_checking(me))) |
93 | * At few instances during thread exit, signal handling etc, | 110 | drop_init_fpu(me); |
94 | * tsk_used_math() is false. Those few places will take proper | 111 | } else if (!use_eager_fpu()) { |
95 | * actions, so we don't need to restore the math here. | ||
96 | */ | ||
97 | if (likely(tsk_used_math(current))) | ||
98 | math_state_restore(); | ||
99 | } else { | ||
100 | stts(); | 112 | stts(); |
101 | } | 113 | } |
114 | |||
115 | this_cpu_write(in_kernel_fpu, false); | ||
102 | } | 116 | } |
103 | EXPORT_SYMBOL(__kernel_fpu_end); | 117 | EXPORT_SYMBOL(__kernel_fpu_end); |
104 | 118 | ||
@@ -180,7 +194,7 @@ void fpu_init(void) | |||
180 | if (cpu_has_xmm) | 194 | if (cpu_has_xmm) |
181 | cr4_mask |= X86_CR4_OSXMMEXCPT; | 195 | cr4_mask |= X86_CR4_OSXMMEXCPT; |
182 | if (cr4_mask) | 196 | if (cr4_mask) |
183 | set_in_cr4(cr4_mask); | 197 | cr4_set_bits(cr4_mask); |
184 | 198 | ||
185 | cr0 = read_cr0(); | 199 | cr0 = read_cr0(); |
186 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | 200 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 705ef8d48e2d..67b1cbe0093a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void) | |||
302 | irq = __this_cpu_read(vector_irq[vector]); | 302 | irq = __this_cpu_read(vector_irq[vector]); |
303 | if (irq >= 0) { | 303 | if (irq >= 0) { |
304 | desc = irq_to_desc(irq); | 304 | desc = irq_to_desc(irq); |
305 | if (!desc) | ||
306 | continue; | ||
307 | |||
305 | data = irq_desc_get_irq_data(desc); | 308 | data = irq_desc_get_irq_data(desc); |
306 | cpumask_copy(&affinity_new, data->affinity); | 309 | cpumask_copy(&affinity_new, data->affinity); |
307 | cpu_clear(this_cpu, affinity_new); | 310 | cpu_clear(this_cpu, affinity_new); |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 63ce838e5a54..28d28f5eb8f4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack) | |||
69 | : "memory", "cc", "edx", "ecx", "eax"); | 69 | : "memory", "cc", "edx", "ecx", "eax"); |
70 | } | 70 | } |
71 | 71 | ||
72 | /* how to get the current stack pointer from C */ | ||
73 | #define current_stack_pointer ({ \ | ||
74 | unsigned long sp; \ | ||
75 | asm("mov %%esp,%0" : "=g" (sp)); \ | ||
76 | sp; \ | ||
77 | }) | ||
78 | |||
79 | static inline void *current_stack(void) | 72 | static inline void *current_stack(void) |
80 | { | 73 | { |
81 | return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); | 74 | return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); |
82 | } | 75 | } |
83 | 76 | ||
84 | static inline int | 77 | static inline int |
@@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
103 | 96 | ||
104 | /* Save the next esp at the bottom of the stack */ | 97 | /* Save the next esp at the bottom of the stack */ |
105 | prev_esp = (u32 *)irqstk; | 98 | prev_esp = (u32 *)irqstk; |
106 | *prev_esp = current_stack_pointer; | 99 | *prev_esp = current_stack_pointer(); |
107 | 100 | ||
108 | if (unlikely(overflow)) | 101 | if (unlikely(overflow)) |
109 | call_on_stack(print_stack_overflow, isp); | 102 | call_on_stack(print_stack_overflow, isp); |
@@ -156,7 +149,7 @@ void do_softirq_own_stack(void) | |||
156 | 149 | ||
157 | /* Push the previous esp onto the stack */ | 150 | /* Push the previous esp onto the stack */ |
158 | prev_esp = (u32 *)irqstk; | 151 | prev_esp = (u32 *)irqstk; |
159 | *prev_esp = current_stack_pointer; | 152 | *prev_esp = current_stack_pointer(); |
160 | 153 | ||
161 | call_on_stack(__do_softirq, isp); | 154 | call_on_stack(__do_softirq, isp); |
162 | } | 155 | } |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 98f654d466e5..4e3d5a9621fe 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = { | |||
84 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 84 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
85 | /* ---------------------------------------------- */ | 85 | /* ---------------------------------------------- */ |
86 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ | 86 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ |
87 | W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ | 87 | W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */ |
88 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ | 88 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ |
89 | W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | 89 | W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ |
90 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 90 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
@@ -223,27 +223,48 @@ static unsigned long | |||
223 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) | 223 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) |
224 | { | 224 | { |
225 | struct kprobe *kp; | 225 | struct kprobe *kp; |
226 | unsigned long faddr; | ||
226 | 227 | ||
227 | kp = get_kprobe((void *)addr); | 228 | kp = get_kprobe((void *)addr); |
228 | /* There is no probe, return original address */ | 229 | faddr = ftrace_location(addr); |
229 | if (!kp) | 230 | /* |
231 | * Addresses inside the ftrace location are refused by | ||
232 | * arch_check_ftrace_location(). Something went terribly wrong | ||
233 | * if such an address is checked here. | ||
234 | */ | ||
235 | if (WARN_ON(faddr && faddr != addr)) | ||
236 | return 0UL; | ||
237 | /* | ||
238 | * Use the current code if it is not modified by Kprobe | ||
239 | * and it cannot be modified by ftrace. | ||
240 | */ | ||
241 | if (!kp && !faddr) | ||
230 | return addr; | 242 | return addr; |
231 | 243 | ||
232 | /* | 244 | /* |
233 | * Basically, kp->ainsn.insn has an original instruction. | 245 | * Basically, kp->ainsn.insn has an original instruction. |
234 | * However, RIP-relative instruction can not do single-stepping | 246 | * However, RIP-relative instruction can not do single-stepping |
235 | * at different place, __copy_instruction() tweaks the displacement of | 247 | * at different place, __copy_instruction() tweaks the displacement of |
236 | * that instruction. In that case, we can't recover the instruction | 248 | * that instruction. In that case, we can't recover the instruction |
237 | * from the kp->ainsn.insn. | 249 | * from the kp->ainsn.insn. |
238 | * | 250 | * |
239 | * On the other hand, kp->opcode has a copy of the first byte of | 251 | * On the other hand, in case on normal Kprobe, kp->opcode has a copy |
240 | * the probed instruction, which is overwritten by int3. And | 252 | * of the first byte of the probed instruction, which is overwritten |
241 | * the instruction at kp->addr is not modified by kprobes except | 253 | * by int3. And the instruction at kp->addr is not modified by kprobes |
242 | * for the first byte, we can recover the original instruction | 254 | * except for the first byte, we can recover the original instruction |
243 | * from it and kp->opcode. | 255 | * from it and kp->opcode. |
256 | * | ||
257 | * In case of Kprobes using ftrace, we do not have a copy of | ||
258 | * the original instruction. In fact, the ftrace location might | ||
259 | * be modified at anytime and even could be in an inconsistent state. | ||
260 | * Fortunately, we know that the original code is the ideal 5-byte | ||
261 | * long NOP. | ||
244 | */ | 262 | */ |
245 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 263 | memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
246 | buf[0] = kp->opcode; | 264 | if (faddr) |
265 | memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); | ||
266 | else | ||
267 | buf[0] = kp->opcode; | ||
247 | return (unsigned long)buf; | 268 | return (unsigned long)buf; |
248 | } | 269 | } |
249 | 270 | ||
@@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) | |||
251 | * Recover the probed instruction at addr for further analysis. | 272 | * Recover the probed instruction at addr for further analysis. |
252 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | 273 | * Caller must lock kprobes by kprobe_mutex, or disable preemption |
253 | * for preventing to release referencing kprobes. | 274 | * for preventing to release referencing kprobes. |
275 | * Returns zero if the instruction can not get recovered. | ||
254 | */ | 276 | */ |
255 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 277 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) |
256 | { | 278 | { |
@@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr) | |||
285 | * normally used, we just go through if there is no kprobe. | 307 | * normally used, we just go through if there is no kprobe. |
286 | */ | 308 | */ |
287 | __addr = recover_probed_instruction(buf, addr); | 309 | __addr = recover_probed_instruction(buf, addr); |
310 | if (!__addr) | ||
311 | return 0; | ||
288 | kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); | 312 | kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); |
289 | insn_get_length(&insn); | 313 | insn_get_length(&insn); |
290 | 314 | ||
@@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src) | |||
333 | unsigned long recovered_insn = | 357 | unsigned long recovered_insn = |
334 | recover_probed_instruction(buf, (unsigned long)src); | 358 | recover_probed_instruction(buf, (unsigned long)src); |
335 | 359 | ||
360 | if (!recovered_insn) | ||
361 | return 0; | ||
336 | kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); | 362 | kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); |
337 | insn_get_length(&insn); | 363 | insn_get_length(&insn); |
338 | /* Another subsystem puts a breakpoint, failed to recover */ | 364 | /* Another subsystem puts a breakpoint, failed to recover */ |
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7c523bbf3dc8..7b3b9d15c47a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c | |||
@@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr) | |||
259 | */ | 259 | */ |
260 | return 0; | 260 | return 0; |
261 | recovered_insn = recover_probed_instruction(buf, addr); | 261 | recovered_insn = recover_probed_instruction(buf, addr); |
262 | if (!recovered_insn) | ||
263 | return 0; | ||
262 | kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); | 264 | kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); |
263 | insn_get_length(&insn); | 265 | insn_get_length(&insn); |
264 | /* Another subsystem puts a breakpoint */ | 266 | /* Another subsystem puts a breakpoint */ |
@@ -322,7 +324,8 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) | |||
322 | * Target instructions MUST be relocatable (checked inside) | 324 | * Target instructions MUST be relocatable (checked inside) |
323 | * This is called when new aggr(opt)probe is allocated or reused. | 325 | * This is called when new aggr(opt)probe is allocated or reused. |
324 | */ | 326 | */ |
325 | int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | 327 | int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, |
328 | struct kprobe *__unused) | ||
326 | { | 329 | { |
327 | u8 *buf; | 330 | u8 *buf; |
328 | int ret; | 331 | int ret; |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 94f643484300..e354cc6446ab 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -609,7 +609,7 @@ static inline void check_zero(void) | |||
609 | u8 ret; | 609 | u8 ret; |
610 | u8 old; | 610 | u8 old; |
611 | 611 | ||
612 | old = ACCESS_ONCE(zero_stats); | 612 | old = READ_ONCE(zero_stats); |
613 | if (unlikely(old)) { | 613 | if (unlikely(old)) { |
614 | ret = cmpxchg(&zero_stats, old, 0); | 614 | ret = cmpxchg(&zero_stats, old, 0); |
615 | /* This ensures only one fellow resets the stat */ | 615 | /* This ensures only one fellow resets the stat */ |
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
727 | int cpu; | 727 | int cpu; |
728 | u64 start; | 728 | u64 start; |
729 | unsigned long flags; | 729 | unsigned long flags; |
730 | __ticket_t head; | ||
730 | 731 | ||
731 | if (in_nmi()) | 732 | if (in_nmi()) |
732 | return; | 733 | return; |
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
768 | */ | 769 | */ |
769 | __ticket_enter_slowpath(lock); | 770 | __ticket_enter_slowpath(lock); |
770 | 771 | ||
772 | /* make sure enter_slowpath, which is atomic does not cross the read */ | ||
773 | smp_mb__after_atomic(); | ||
774 | |||
771 | /* | 775 | /* |
772 | * check again make sure it didn't become free while | 776 | * check again make sure it didn't become free while |
773 | * we weren't looking. | 777 | * we weren't looking. |
774 | */ | 778 | */ |
775 | if (ACCESS_ONCE(lock->tickets.head) == want) { | 779 | head = READ_ONCE(lock->tickets.head); |
780 | if (__tickets_equal(head, want)) { | ||
776 | add_stats(TAKEN_SLOW_PICKUP, 1); | 781 | add_stats(TAKEN_SLOW_PICKUP, 1); |
777 | goto out; | 782 | goto out; |
778 | } | 783 | } |
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | |||
803 | add_stats(RELEASED_SLOW, 1); | 808 | add_stats(RELEASED_SLOW, 1); |
804 | for_each_cpu(cpu, &waiting_cpus) { | 809 | for_each_cpu(cpu, &waiting_cpus) { |
805 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | 810 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); |
806 | if (ACCESS_ONCE(w->lock) == lock && | 811 | if (READ_ONCE(w->lock) == lock && |
807 | ACCESS_ONCE(w->want) == ticket) { | 812 | READ_ONCE(w->want) == ticket) { |
808 | add_stats(RELEASED_SLOW_KICKED, 1); | 813 | add_stats(RELEASED_SLOW_KICKED, 1); |
809 | kvm_kick_cpu(cpu); | 814 | kvm_kick_cpu(cpu); |
810 | break; | 815 | break; |
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c new file mode 100644 index 000000000000..ff3c3101d003 --- /dev/null +++ b/arch/x86/kernel/livepatch.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * livepatch.c - x86-specific Kernel Live Patching Core | ||
3 | * | ||
4 | * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> | ||
5 | * Copyright (C) 2014 SUSE | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2 | ||
10 | * of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <asm/cacheflush.h> | ||
24 | #include <asm/page_types.h> | ||
25 | #include <asm/elf.h> | ||
26 | #include <asm/livepatch.h> | ||
27 | |||
28 | /** | ||
29 | * klp_write_module_reloc() - write a relocation in a module | ||
30 | * @mod: module in which the section to be modified is found | ||
31 | * @type: ELF relocation type (see asm/elf.h) | ||
32 | * @loc: address that the relocation should be written to | ||
33 | * @value: relocation value (sym address + addend) | ||
34 | * | ||
35 | * This function writes a relocation to the specified location for | ||
36 | * a particular module. | ||
37 | */ | ||
38 | int klp_write_module_reloc(struct module *mod, unsigned long type, | ||
39 | unsigned long loc, unsigned long value) | ||
40 | { | ||
41 | int ret, numpages, size = 4; | ||
42 | bool readonly; | ||
43 | unsigned long val; | ||
44 | unsigned long core = (unsigned long)mod->module_core; | ||
45 | unsigned long core_ro_size = mod->core_ro_size; | ||
46 | unsigned long core_size = mod->core_size; | ||
47 | |||
48 | switch (type) { | ||
49 | case R_X86_64_NONE: | ||
50 | return 0; | ||
51 | case R_X86_64_64: | ||
52 | val = value; | ||
53 | size = 8; | ||
54 | break; | ||
55 | case R_X86_64_32: | ||
56 | val = (u32)value; | ||
57 | break; | ||
58 | case R_X86_64_32S: | ||
59 | val = (s32)value; | ||
60 | break; | ||
61 | case R_X86_64_PC32: | ||
62 | val = (u32)(value - loc); | ||
63 | break; | ||
64 | default: | ||
65 | /* unsupported relocation type */ | ||
66 | return -EINVAL; | ||
67 | } | ||
68 | |||
69 | if (loc < core || loc >= core + core_size) | ||
70 | /* loc does not point to any symbol inside the module */ | ||
71 | return -EINVAL; | ||
72 | |||
73 | if (loc < core + core_ro_size) | ||
74 | readonly = true; | ||
75 | else | ||
76 | readonly = false; | ||
77 | |||
78 | /* determine if the relocation spans a page boundary */ | ||
79 | numpages = ((loc & PAGE_MASK) == ((loc + size) & PAGE_MASK)) ? 1 : 2; | ||
80 | |||
81 | if (readonly) | ||
82 | set_memory_rw(loc & PAGE_MASK, numpages); | ||
83 | |||
84 | ret = probe_kernel_write((void *)loc, &val, size); | ||
85 | |||
86 | if (readonly) | ||
87 | set_memory_ro(loc & PAGE_MASK, numpages); | ||
88 | |||
89 | return ret; | ||
90 | } | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e69f9882bf95..9bbb9b35c144 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
27 | #include <linux/kasan.h> | ||
27 | #include <linux/bug.h> | 28 | #include <linux/bug.h> |
28 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
29 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
@@ -46,21 +47,13 @@ do { \ | |||
46 | 47 | ||
47 | #ifdef CONFIG_RANDOMIZE_BASE | 48 | #ifdef CONFIG_RANDOMIZE_BASE |
48 | static unsigned long module_load_offset; | 49 | static unsigned long module_load_offset; |
49 | static int randomize_modules = 1; | ||
50 | 50 | ||
51 | /* Mutex protects the module_load_offset. */ | 51 | /* Mutex protects the module_load_offset. */ |
52 | static DEFINE_MUTEX(module_kaslr_mutex); | 52 | static DEFINE_MUTEX(module_kaslr_mutex); |
53 | 53 | ||
54 | static int __init parse_nokaslr(char *p) | ||
55 | { | ||
56 | randomize_modules = 0; | ||
57 | return 0; | ||
58 | } | ||
59 | early_param("nokaslr", parse_nokaslr); | ||
60 | |||
61 | static unsigned long int get_module_load_offset(void) | 54 | static unsigned long int get_module_load_offset(void) |
62 | { | 55 | { |
63 | if (randomize_modules) { | 56 | if (kaslr_enabled) { |
64 | mutex_lock(&module_kaslr_mutex); | 57 | mutex_lock(&module_kaslr_mutex); |
65 | /* | 58 | /* |
66 | * Calculate the module_load_offset the first time this | 59 | * Calculate the module_load_offset the first time this |
@@ -83,13 +76,22 @@ static unsigned long int get_module_load_offset(void) | |||
83 | 76 | ||
84 | void *module_alloc(unsigned long size) | 77 | void *module_alloc(unsigned long size) |
85 | { | 78 | { |
79 | void *p; | ||
80 | |||
86 | if (PAGE_ALIGN(size) > MODULES_LEN) | 81 | if (PAGE_ALIGN(size) > MODULES_LEN) |
87 | return NULL; | 82 | return NULL; |
88 | return __vmalloc_node_range(size, 1, | 83 | |
84 | p = __vmalloc_node_range(size, MODULE_ALIGN, | ||
89 | MODULES_VADDR + get_module_load_offset(), | 85 | MODULES_VADDR + get_module_load_offset(), |
90 | MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, | 86 | MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, |
91 | PAGE_KERNEL_EXEC, NUMA_NO_NODE, | 87 | PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, |
92 | __builtin_return_address(0)); | 88 | __builtin_return_address(0)); |
89 | if (p && (kasan_module_alloc(p, size) < 0)) { | ||
90 | vfree(p); | ||
91 | return NULL; | ||
92 | } | ||
93 | |||
94 | return p; | ||
93 | } | 95 | } |
94 | 96 | ||
95 | #ifdef CONFIG_X86_32 | 97 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0ee5025e0fa4..d66a4fe6caee 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c | |||
@@ -25,8 +25,6 @@ | |||
25 | 25 | ||
26 | #include <asm/pmc_atom.h> | 26 | #include <asm/pmc_atom.h> |
27 | 27 | ||
28 | #define DRIVER_NAME KBUILD_MODNAME | ||
29 | |||
30 | struct pmc_dev { | 28 | struct pmc_dev { |
31 | u32 base_addr; | 29 | u32 base_addr; |
32 | void __iomem *regmap; | 30 | void __iomem *regmap; |
@@ -38,12 +36,12 @@ struct pmc_dev { | |||
38 | static struct pmc_dev pmc_device; | 36 | static struct pmc_dev pmc_device; |
39 | static u32 acpi_base_addr; | 37 | static u32 acpi_base_addr; |
40 | 38 | ||
41 | struct pmc_dev_map { | 39 | struct pmc_bit_map { |
42 | const char *name; | 40 | const char *name; |
43 | u32 bit_mask; | 41 | u32 bit_mask; |
44 | }; | 42 | }; |
45 | 43 | ||
46 | static const struct pmc_dev_map dev_map[] = { | 44 | static const struct pmc_bit_map dev_map[] = { |
47 | {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, | 45 | {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, |
48 | {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, | 46 | {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, |
49 | {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, | 47 | {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, |
@@ -82,6 +80,27 @@ static const struct pmc_dev_map dev_map[] = { | |||
82 | {"35 - DFX", BIT_DFX}, | 80 | {"35 - DFX", BIT_DFX}, |
83 | }; | 81 | }; |
84 | 82 | ||
83 | static const struct pmc_bit_map pss_map[] = { | ||
84 | {"0 - GBE", PMC_PSS_BIT_GBE}, | ||
85 | {"1 - SATA", PMC_PSS_BIT_SATA}, | ||
86 | {"2 - HDA", PMC_PSS_BIT_HDA}, | ||
87 | {"3 - SEC", PMC_PSS_BIT_SEC}, | ||
88 | {"4 - PCIE", PMC_PSS_BIT_PCIE}, | ||
89 | {"5 - LPSS", PMC_PSS_BIT_LPSS}, | ||
90 | {"6 - LPE", PMC_PSS_BIT_LPE}, | ||
91 | {"7 - DFX", PMC_PSS_BIT_DFX}, | ||
92 | {"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL}, | ||
93 | {"9 - USH_SUS", PMC_PSS_BIT_USH_SUS}, | ||
94 | {"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS}, | ||
95 | {"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA}, | ||
96 | {"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, | ||
97 | {"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, | ||
98 | {"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, | ||
99 | {"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, | ||
100 | {"16 - USB", PMC_PSS_BIT_USB}, | ||
101 | {"17 - USB_SUS", PMC_PSS_BIT_USB_SUS}, | ||
102 | }; | ||
103 | |||
85 | static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) | 104 | static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) |
86 | { | 105 | { |
87 | return readl(pmc->regmap + reg_offset); | 106 | return readl(pmc->regmap + reg_offset); |
@@ -169,6 +188,32 @@ static const struct file_operations pmc_dev_state_ops = { | |||
169 | .release = single_release, | 188 | .release = single_release, |
170 | }; | 189 | }; |
171 | 190 | ||
191 | static int pmc_pss_state_show(struct seq_file *s, void *unused) | ||
192 | { | ||
193 | struct pmc_dev *pmc = s->private; | ||
194 | u32 pss = pmc_reg_read(pmc, PMC_PSS); | ||
195 | int pss_index; | ||
196 | |||
197 | for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) { | ||
198 | seq_printf(s, "Island: %-32s\tState: %s\n", | ||
199 | pss_map[pss_index].name, | ||
200 | pss_map[pss_index].bit_mask & pss ? "Off" : "On"); | ||
201 | } | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static int pmc_pss_state_open(struct inode *inode, struct file *file) | ||
206 | { | ||
207 | return single_open(file, pmc_pss_state_show, inode->i_private); | ||
208 | } | ||
209 | |||
210 | static const struct file_operations pmc_pss_state_ops = { | ||
211 | .open = pmc_pss_state_open, | ||
212 | .read = seq_read, | ||
213 | .llseek = seq_lseek, | ||
214 | .release = single_release, | ||
215 | }; | ||
216 | |||
172 | static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) | 217 | static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) |
173 | { | 218 | { |
174 | struct pmc_dev *pmc = s->private; | 219 | struct pmc_dev *pmc = s->private; |
@@ -202,11 +247,7 @@ static const struct file_operations pmc_sleep_tmr_ops = { | |||
202 | 247 | ||
203 | static void pmc_dbgfs_unregister(struct pmc_dev *pmc) | 248 | static void pmc_dbgfs_unregister(struct pmc_dev *pmc) |
204 | { | 249 | { |
205 | if (!pmc->dbgfs_dir) | ||
206 | return; | ||
207 | |||
208 | debugfs_remove_recursive(pmc->dbgfs_dir); | 250 | debugfs_remove_recursive(pmc->dbgfs_dir); |
209 | pmc->dbgfs_dir = NULL; | ||
210 | } | 251 | } |
211 | 252 | ||
212 | static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) | 253 | static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) |
@@ -217,19 +258,29 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) | |||
217 | if (!dir) | 258 | if (!dir) |
218 | return -ENOMEM; | 259 | return -ENOMEM; |
219 | 260 | ||
261 | pmc->dbgfs_dir = dir; | ||
262 | |||
220 | f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, | 263 | f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, |
221 | dir, pmc, &pmc_dev_state_ops); | 264 | dir, pmc, &pmc_dev_state_ops); |
222 | if (!f) { | 265 | if (!f) { |
223 | dev_err(&pdev->dev, "dev_states register failed\n"); | 266 | dev_err(&pdev->dev, "dev_state register failed\n"); |
224 | goto err; | 267 | goto err; |
225 | } | 268 | } |
269 | |||
270 | f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, | ||
271 | dir, pmc, &pmc_pss_state_ops); | ||
272 | if (!f) { | ||
273 | dev_err(&pdev->dev, "pss_state register failed\n"); | ||
274 | goto err; | ||
275 | } | ||
276 | |||
226 | f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, | 277 | f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, |
227 | dir, pmc, &pmc_sleep_tmr_ops); | 278 | dir, pmc, &pmc_sleep_tmr_ops); |
228 | if (!f) { | 279 | if (!f) { |
229 | dev_err(&pdev->dev, "sleep_state register failed\n"); | 280 | dev_err(&pdev->dev, "sleep_state register failed\n"); |
230 | goto err; | 281 | goto err; |
231 | } | 282 | } |
232 | pmc->dbgfs_dir = dir; | 283 | |
233 | return 0; | 284 | return 0; |
234 | err: | 285 | err: |
235 | pmc_dbgfs_unregister(pmc); | 286 | pmc_dbgfs_unregister(pmc); |
@@ -292,7 +343,6 @@ MODULE_DEVICE_TABLE(pci, pmc_pci_ids); | |||
292 | 343 | ||
293 | static int __init pmc_atom_init(void) | 344 | static int __init pmc_atom_init(void) |
294 | { | 345 | { |
295 | int err = -ENODEV; | ||
296 | struct pci_dev *pdev = NULL; | 346 | struct pci_dev *pdev = NULL; |
297 | const struct pci_device_id *ent; | 347 | const struct pci_device_id *ent; |
298 | 348 | ||
@@ -306,14 +356,11 @@ static int __init pmc_atom_init(void) | |||
306 | */ | 356 | */ |
307 | for_each_pci_dev(pdev) { | 357 | for_each_pci_dev(pdev) { |
308 | ent = pci_match_id(pmc_pci_ids, pdev); | 358 | ent = pci_match_id(pmc_pci_ids, pdev); |
309 | if (ent) { | 359 | if (ent) |
310 | err = pmc_setup_dev(pdev); | 360 | return pmc_setup_dev(pdev); |
311 | goto out; | ||
312 | } | ||
313 | } | 361 | } |
314 | /* Device not found. */ | 362 | /* Device not found. */ |
315 | out: | 363 | return -ENODEV; |
316 | return err; | ||
317 | } | 364 | } |
318 | 365 | ||
319 | module_init(pmc_atom_init); | 366 | module_init(pmc_atom_init); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e127ddaa2d5a..046e2d620bbe 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/fpu-internal.h> | 28 | #include <asm/fpu-internal.h> |
29 | #include <asm/debugreg.h> | 29 | #include <asm/debugreg.h> |
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | #include <asm/tlbflush.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 34 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -141,7 +142,7 @@ void flush_thread(void) | |||
141 | 142 | ||
142 | static void hard_disable_TSC(void) | 143 | static void hard_disable_TSC(void) |
143 | { | 144 | { |
144 | write_cr4(read_cr4() | X86_CR4_TSD); | 145 | cr4_set_bits(X86_CR4_TSD); |
145 | } | 146 | } |
146 | 147 | ||
147 | void disable_TSC(void) | 148 | void disable_TSC(void) |
@@ -158,7 +159,7 @@ void disable_TSC(void) | |||
158 | 159 | ||
159 | static void hard_enable_TSC(void) | 160 | static void hard_enable_TSC(void) |
160 | { | 161 | { |
161 | write_cr4(read_cr4() & ~X86_CR4_TSD); | 162 | cr4_clear_bits(X86_CR4_TSD); |
162 | } | 163 | } |
163 | 164 | ||
164 | static void enable_TSC(void) | 165 | static void enable_TSC(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8f3ebfe710d0..603c4f99cb5a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -101,7 +101,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
101 | cr0 = read_cr0(); | 101 | cr0 = read_cr0(); |
102 | cr2 = read_cr2(); | 102 | cr2 = read_cr2(); |
103 | cr3 = read_cr3(); | 103 | cr3 = read_cr3(); |
104 | cr4 = read_cr4_safe(); | 104 | cr4 = __read_cr4_safe(); |
105 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", | 105 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", |
106 | cr0, cr2, cr3, cr4); | 106 | cr0, cr2, cr3, cr4); |
107 | 107 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5a2c02913af3..67fcc43577d2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
93 | cr0 = read_cr0(); | 93 | cr0 = read_cr0(); |
94 | cr2 = read_cr2(); | 94 | cr2 = read_cr2(); |
95 | cr3 = read_cr3(); | 95 | cr3 = read_cr3(); |
96 | cr4 = read_cr4(); | 96 | cr4 = __read_cr4(); |
97 | 97 | ||
98 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 98 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
99 | fs, fsindex, gs, gsindex, shadowgs); | 99 | fs, fsindex, gs, gsindex, shadowgs); |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index ca9622a25e95..cd9685235df9 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -49,11 +49,11 @@ int mach_set_rtc_mmss(const struct timespec *now) | |||
49 | retval = set_rtc_time(&tm); | 49 | retval = set_rtc_time(&tm); |
50 | if (retval) | 50 | if (retval) |
51 | printk(KERN_ERR "%s: RTC write failed with error %d\n", | 51 | printk(KERN_ERR "%s: RTC write failed with error %d\n", |
52 | __FUNCTION__, retval); | 52 | __func__, retval); |
53 | } else { | 53 | } else { |
54 | printk(KERN_ERR | 54 | printk(KERN_ERR |
55 | "%s: Invalid RTC value: write of %lx to RTC failed\n", | 55 | "%s: Invalid RTC value: write of %lx to RTC failed\n", |
56 | __FUNCTION__, nowtime); | 56 | __func__, nowtime); |
57 | retval = -EINVAL; | 57 | retval = -EINVAL; |
58 | } | 58 | } |
59 | return retval; | 59 | return retval; |
@@ -170,7 +170,7 @@ static struct platform_device rtc_device = { | |||
170 | static __init int add_rtc_cmos(void) | 170 | static __init int add_rtc_cmos(void) |
171 | { | 171 | { |
172 | #ifdef CONFIG_PNP | 172 | #ifdef CONFIG_PNP |
173 | static const char * const const ids[] __initconst = | 173 | static const char * const ids[] __initconst = |
174 | { "PNP0b00", "PNP0b01", "PNP0b02", }; | 174 | { "PNP0b00", "PNP0b01", "PNP0b02", }; |
175 | struct pnp_dev *dev; | 175 | struct pnp_dev *dev; |
176 | struct pnp_id *id; | 176 | struct pnp_id *id; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ab4734e5411d..98dc9317286e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -89,6 +89,7 @@ | |||
89 | #include <asm/cacheflush.h> | 89 | #include <asm/cacheflush.h> |
90 | #include <asm/processor.h> | 90 | #include <asm/processor.h> |
91 | #include <asm/bugs.h> | 91 | #include <asm/bugs.h> |
92 | #include <asm/kasan.h> | ||
92 | 93 | ||
93 | #include <asm/vsyscall.h> | 94 | #include <asm/vsyscall.h> |
94 | #include <asm/cpu.h> | 95 | #include <asm/cpu.h> |
@@ -121,6 +122,8 @@ | |||
121 | unsigned long max_low_pfn_mapped; | 122 | unsigned long max_low_pfn_mapped; |
122 | unsigned long max_pfn_mapped; | 123 | unsigned long max_pfn_mapped; |
123 | 124 | ||
125 | bool __read_mostly kaslr_enabled = false; | ||
126 | |||
124 | #ifdef CONFIG_DMI | 127 | #ifdef CONFIG_DMI |
125 | RESERVE_BRK(dmi_alloc, 65536); | 128 | RESERVE_BRK(dmi_alloc, 65536); |
126 | #endif | 129 | #endif |
@@ -424,6 +427,11 @@ static void __init reserve_initrd(void) | |||
424 | } | 427 | } |
425 | #endif /* CONFIG_BLK_DEV_INITRD */ | 428 | #endif /* CONFIG_BLK_DEV_INITRD */ |
426 | 429 | ||
430 | static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) | ||
431 | { | ||
432 | kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); | ||
433 | } | ||
434 | |||
427 | static void __init parse_setup_data(void) | 435 | static void __init parse_setup_data(void) |
428 | { | 436 | { |
429 | struct setup_data *data; | 437 | struct setup_data *data; |
@@ -431,15 +439,13 @@ static void __init parse_setup_data(void) | |||
431 | 439 | ||
432 | pa_data = boot_params.hdr.setup_data; | 440 | pa_data = boot_params.hdr.setup_data; |
433 | while (pa_data) { | 441 | while (pa_data) { |
434 | u32 data_len, map_len, data_type; | 442 | u32 data_len, data_type; |
435 | 443 | ||
436 | map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), | 444 | data = early_memremap(pa_data, sizeof(*data)); |
437 | (u64)sizeof(struct setup_data)); | ||
438 | data = early_memremap(pa_data, map_len); | ||
439 | data_len = data->len + sizeof(struct setup_data); | 445 | data_len = data->len + sizeof(struct setup_data); |
440 | data_type = data->type; | 446 | data_type = data->type; |
441 | pa_next = data->next; | 447 | pa_next = data->next; |
442 | early_iounmap(data, map_len); | 448 | early_iounmap(data, sizeof(*data)); |
443 | 449 | ||
444 | switch (data_type) { | 450 | switch (data_type) { |
445 | case SETUP_E820_EXT: | 451 | case SETUP_E820_EXT: |
@@ -451,6 +457,9 @@ static void __init parse_setup_data(void) | |||
451 | case SETUP_EFI: | 457 | case SETUP_EFI: |
452 | parse_efi_setup(pa_data, data_len); | 458 | parse_efi_setup(pa_data, data_len); |
453 | break; | 459 | break; |
460 | case SETUP_KASLR: | ||
461 | parse_kaslr_setup(pa_data, data_len); | ||
462 | break; | ||
454 | default: | 463 | default: |
455 | break; | 464 | break; |
456 | } | 465 | } |
@@ -833,10 +842,14 @@ static void __init trim_low_memory_range(void) | |||
833 | static int | 842 | static int |
834 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) | 843 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) |
835 | { | 844 | { |
836 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx " | 845 | if (kaslr_enabled) |
837 | "(relocation range: 0x%lx-0x%lx)\n", | 846 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", |
838 | (unsigned long)&_text - __START_KERNEL, __START_KERNEL, | 847 | (unsigned long)&_text - __START_KERNEL, |
839 | __START_KERNEL_map, MODULES_VADDR-1); | 848 | __START_KERNEL, |
849 | __START_KERNEL_map, | ||
850 | MODULES_VADDR-1); | ||
851 | else | ||
852 | pr_emerg("Kernel Offset: disabled\n"); | ||
840 | 853 | ||
841 | return 0; | 854 | return 0; |
842 | } | 855 | } |
@@ -1176,9 +1189,11 @@ void __init setup_arch(char **cmdline_p) | |||
1176 | 1189 | ||
1177 | x86_init.paging.pagetable_init(); | 1190 | x86_init.paging.pagetable_init(); |
1178 | 1191 | ||
1192 | kasan_init(); | ||
1193 | |||
1179 | if (boot_cpu_data.cpuid_level >= 0) { | 1194 | if (boot_cpu_data.cpuid_level >= 0) { |
1180 | /* A CPU has %cr4 if and only if it has CPUID */ | 1195 | /* A CPU has %cr4 if and only if it has CPUID */ |
1181 | mmu_cr4_features = read_cr4(); | 1196 | mmu_cr4_features = __read_cr4(); |
1182 | if (trampoline_cr4_features) | 1197 | if (trampoline_cr4_features) |
1183 | *trampoline_cr4_features = mmu_cr4_features; | 1198 | *trampoline_cr4_features = mmu_cr4_features; |
1184 | } | 1199 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0fc..e5042463c1bc 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -69,7 +69,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
69 | unsigned int err = 0; | 69 | unsigned int err = 0; |
70 | 70 | ||
71 | /* Always make any pending restarted system calls return -EINTR */ | 71 | /* Always make any pending restarted system calls return -EINTR */ |
72 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 72 | current->restart_block.fn = do_no_restart_syscall; |
73 | 73 | ||
74 | get_user_try { | 74 | get_user_try { |
75 | 75 | ||
@@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
740 | { | 740 | { |
741 | user_exit(); | 741 | user_exit(); |
742 | 742 | ||
743 | #ifdef CONFIG_X86_MCE | ||
744 | /* notify userspace of pending MCEs */ | ||
745 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
746 | mce_notify_process(); | ||
747 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
748 | |||
749 | if (thread_info_flags & _TIF_UPROBE) | 743 | if (thread_info_flags & _TIF_UPROBE) |
750 | uprobe_notify_resume(regs); | 744 | uprobe_notify_resume(regs); |
751 | 745 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6d7022c683e3..febc6aabc72e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -73,7 +73,6 @@ | |||
73 | #include <asm/setup.h> | 73 | #include <asm/setup.h> |
74 | #include <asm/uv/uv.h> | 74 | #include <asm/uv/uv.h> |
75 | #include <linux/mc146818rtc.h> | 75 | #include <linux/mc146818rtc.h> |
76 | #include <asm/smpboot_hooks.h> | ||
77 | #include <asm/i8259.h> | 76 | #include <asm/i8259.h> |
78 | #include <asm/realmode.h> | 77 | #include <asm/realmode.h> |
79 | #include <asm/misc.h> | 78 | #include <asm/misc.h> |
@@ -104,6 +103,43 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
104 | 103 | ||
105 | atomic_t init_deasserted; | 104 | atomic_t init_deasserted; |
106 | 105 | ||
106 | static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | ||
107 | { | ||
108 | unsigned long flags; | ||
109 | |||
110 | spin_lock_irqsave(&rtc_lock, flags); | ||
111 | CMOS_WRITE(0xa, 0xf); | ||
112 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
113 | local_flush_tlb(); | ||
114 | pr_debug("1.\n"); | ||
115 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = | ||
116 | start_eip >> 4; | ||
117 | pr_debug("2.\n"); | ||
118 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = | ||
119 | start_eip & 0xf; | ||
120 | pr_debug("3.\n"); | ||
121 | } | ||
122 | |||
123 | static inline void smpboot_restore_warm_reset_vector(void) | ||
124 | { | ||
125 | unsigned long flags; | ||
126 | |||
127 | /* | ||
128 | * Install writable page 0 entry to set BIOS data area. | ||
129 | */ | ||
130 | local_flush_tlb(); | ||
131 | |||
132 | /* | ||
133 | * Paranoid: Set warm reset code and vector here back | ||
134 | * to default values. | ||
135 | */ | ||
136 | spin_lock_irqsave(&rtc_lock, flags); | ||
137 | CMOS_WRITE(0, 0xf); | ||
138 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
139 | |||
140 | *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; | ||
141 | } | ||
142 | |||
107 | /* | 143 | /* |
108 | * Report back to the Boot Processor during boot time or to the caller processor | 144 | * Report back to the Boot Processor during boot time or to the caller processor |
109 | * during CPU online. | 145 | * during CPU online. |
@@ -136,8 +172,7 @@ static void smp_callin(void) | |||
136 | * CPU, first the APIC. (this is probably redundant on most | 172 | * CPU, first the APIC. (this is probably redundant on most |
137 | * boards) | 173 | * boards) |
138 | */ | 174 | */ |
139 | setup_local_APIC(); | 175 | apic_ap_setup(); |
140 | end_local_APIC_setup(); | ||
141 | 176 | ||
142 | /* | 177 | /* |
143 | * Need to setup vector mappings before we enable interrupts. | 178 | * Need to setup vector mappings before we enable interrupts. |
@@ -955,9 +990,12 @@ void arch_disable_smp_support(void) | |||
955 | */ | 990 | */ |
956 | static __init void disable_smp(void) | 991 | static __init void disable_smp(void) |
957 | { | 992 | { |
993 | pr_info("SMP disabled\n"); | ||
994 | |||
995 | disable_ioapic_support(); | ||
996 | |||
958 | init_cpu_present(cpumask_of(0)); | 997 | init_cpu_present(cpumask_of(0)); |
959 | init_cpu_possible(cpumask_of(0)); | 998 | init_cpu_possible(cpumask_of(0)); |
960 | smpboot_clear_io_apic_irqs(); | ||
961 | 999 | ||
962 | if (smp_found_config) | 1000 | if (smp_found_config) |
963 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | 1001 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
@@ -967,6 +1005,13 @@ static __init void disable_smp(void) | |||
967 | cpumask_set_cpu(0, cpu_core_mask(0)); | 1005 | cpumask_set_cpu(0, cpu_core_mask(0)); |
968 | } | 1006 | } |
969 | 1007 | ||
1008 | enum { | ||
1009 | SMP_OK, | ||
1010 | SMP_NO_CONFIG, | ||
1011 | SMP_NO_APIC, | ||
1012 | SMP_FORCE_UP, | ||
1013 | }; | ||
1014 | |||
970 | /* | 1015 | /* |
971 | * Various sanity checks. | 1016 | * Various sanity checks. |
972 | */ | 1017 | */ |
@@ -1014,10 +1059,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1014 | if (!smp_found_config && !acpi_lapic) { | 1059 | if (!smp_found_config && !acpi_lapic) { |
1015 | preempt_enable(); | 1060 | preempt_enable(); |
1016 | pr_notice("SMP motherboard not detected\n"); | 1061 | pr_notice("SMP motherboard not detected\n"); |
1017 | disable_smp(); | 1062 | return SMP_NO_CONFIG; |
1018 | if (APIC_init_uniprocessor()) | ||
1019 | pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); | ||
1020 | return -1; | ||
1021 | } | 1063 | } |
1022 | 1064 | ||
1023 | /* | 1065 | /* |
@@ -1041,9 +1083,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1041 | boot_cpu_physical_apicid); | 1083 | boot_cpu_physical_apicid); |
1042 | pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); | 1084 | pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); |
1043 | } | 1085 | } |
1044 | smpboot_clear_io_apic(); | 1086 | return SMP_NO_APIC; |
1045 | disable_ioapic_support(); | ||
1046 | return -1; | ||
1047 | } | 1087 | } |
1048 | 1088 | ||
1049 | verify_local_APIC(); | 1089 | verify_local_APIC(); |
@@ -1053,15 +1093,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1053 | */ | 1093 | */ |
1054 | if (!max_cpus) { | 1094 | if (!max_cpus) { |
1055 | pr_info("SMP mode deactivated\n"); | 1095 | pr_info("SMP mode deactivated\n"); |
1056 | smpboot_clear_io_apic(); | 1096 | return SMP_FORCE_UP; |
1057 | |||
1058 | connect_bsp_APIC(); | ||
1059 | setup_local_APIC(); | ||
1060 | bsp_end_local_APIC_setup(); | ||
1061 | return -1; | ||
1062 | } | 1097 | } |
1063 | 1098 | ||
1064 | return 0; | 1099 | return SMP_OK; |
1065 | } | 1100 | } |
1066 | 1101 | ||
1067 | static void __init smp_cpu_index_default(void) | 1102 | static void __init smp_cpu_index_default(void) |
@@ -1101,10 +1136,21 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1101 | } | 1136 | } |
1102 | set_cpu_sibling_map(0); | 1137 | set_cpu_sibling_map(0); |
1103 | 1138 | ||
1104 | if (smp_sanity_check(max_cpus) < 0) { | 1139 | switch (smp_sanity_check(max_cpus)) { |
1105 | pr_info("SMP disabled\n"); | 1140 | case SMP_NO_CONFIG: |
1106 | disable_smp(); | 1141 | disable_smp(); |
1142 | if (APIC_init_uniprocessor()) | ||
1143 | pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); | ||
1107 | return; | 1144 | return; |
1145 | case SMP_NO_APIC: | ||
1146 | disable_smp(); | ||
1147 | return; | ||
1148 | case SMP_FORCE_UP: | ||
1149 | disable_smp(); | ||
1150 | apic_bsp_setup(false); | ||
1151 | return; | ||
1152 | case SMP_OK: | ||
1153 | break; | ||
1108 | } | 1154 | } |
1109 | 1155 | ||
1110 | default_setup_apic_routing(); | 1156 | default_setup_apic_routing(); |
@@ -1115,33 +1161,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1115 | /* Or can we switch back to PIC here? */ | 1161 | /* Or can we switch back to PIC here? */ |
1116 | } | 1162 | } |
1117 | 1163 | ||
1118 | connect_bsp_APIC(); | 1164 | cpu0_logical_apicid = apic_bsp_setup(false); |
1119 | |||
1120 | /* | ||
1121 | * Switch from PIC to APIC mode. | ||
1122 | */ | ||
1123 | setup_local_APIC(); | ||
1124 | |||
1125 | if (x2apic_mode) | ||
1126 | cpu0_logical_apicid = apic_read(APIC_LDR); | ||
1127 | else | ||
1128 | cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
1129 | |||
1130 | /* | ||
1131 | * Enable IO APIC before setting up error vector | ||
1132 | */ | ||
1133 | if (!skip_ioapic_setup && nr_ioapics) | ||
1134 | enable_IO_APIC(); | ||
1135 | |||
1136 | bsp_end_local_APIC_setup(); | ||
1137 | smpboot_setup_io_apic(); | ||
1138 | /* | ||
1139 | * Set up local APIC timer on boot CPU. | ||
1140 | */ | ||
1141 | 1165 | ||
1142 | pr_info("CPU%d: ", 0); | 1166 | pr_info("CPU%d: ", 0); |
1143 | print_cpu_info(&cpu_data(0)); | 1167 | print_cpu_info(&cpu_data(0)); |
1144 | x86_init.timers.setup_percpu_clockev(); | ||
1145 | 1168 | ||
1146 | if (is_uv_system()) | 1169 | if (is_uv_system()) |
1147 | uv_system_init(); | 1170 | uv_system_init(); |
@@ -1177,9 +1200,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1177 | 1200 | ||
1178 | nmi_selftest(); | 1201 | nmi_selftest(); |
1179 | impress_friends(); | 1202 | impress_friends(); |
1180 | #ifdef CONFIG_X86_IO_APIC | ||
1181 | setup_ioapic_dest(); | 1203 | setup_ioapic_dest(); |
1182 | #endif | ||
1183 | mtrr_aps_init(); | 1204 | mtrr_aps_init(); |
1184 | } | 1205 | } |
1185 | 1206 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88900e288021..9d2073e2ecc9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -108,6 +108,88 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
108 | preempt_count_dec(); | 108 | preempt_count_dec(); |
109 | } | 109 | } |
110 | 110 | ||
111 | enum ctx_state ist_enter(struct pt_regs *regs) | ||
112 | { | ||
113 | enum ctx_state prev_state; | ||
114 | |||
115 | if (user_mode_vm(regs)) { | ||
116 | /* Other than that, we're just an exception. */ | ||
117 | prev_state = exception_enter(); | ||
118 | } else { | ||
119 | /* | ||
120 | * We might have interrupted pretty much anything. In | ||
121 | * fact, if we're a machine check, we can even interrupt | ||
122 | * NMI processing. We don't want in_nmi() to return true, | ||
123 | * but we need to notify RCU. | ||
124 | */ | ||
125 | rcu_nmi_enter(); | ||
126 | prev_state = IN_KERNEL; /* the value is irrelevant. */ | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * We are atomic because we're on the IST stack (or we're on x86_32, | ||
131 | * in which case we still shouldn't schedule). | ||
132 | * | ||
133 | * This must be after exception_enter(), because exception_enter() | ||
134 | * won't do anything if in_interrupt() returns true. | ||
135 | */ | ||
136 | preempt_count_add(HARDIRQ_OFFSET); | ||
137 | |||
138 | /* This code is a bit fragile. Test it. */ | ||
139 | rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); | ||
140 | |||
141 | return prev_state; | ||
142 | } | ||
143 | |||
144 | void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | ||
145 | { | ||
146 | /* Must be before exception_exit. */ | ||
147 | preempt_count_sub(HARDIRQ_OFFSET); | ||
148 | |||
149 | if (user_mode_vm(regs)) | ||
150 | return exception_exit(prev_state); | ||
151 | else | ||
152 | rcu_nmi_exit(); | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * ist_begin_non_atomic() - begin a non-atomic section in an IST exception | ||
157 | * @regs: regs passed to the IST exception handler | ||
158 | * | ||
159 | * IST exception handlers normally cannot schedule. As a special | ||
160 | * exception, if the exception interrupted userspace code (i.e. | ||
161 | * user_mode_vm(regs) would return true) and the exception was not | ||
162 | * a double fault, it can be safe to schedule. ist_begin_non_atomic() | ||
163 | * begins a non-atomic section within an ist_enter()/ist_exit() region. | ||
164 | * Callers are responsible for enabling interrupts themselves inside | ||
165 | * the non-atomic section, and callers must call is_end_non_atomic() | ||
166 | * before ist_exit(). | ||
167 | */ | ||
168 | void ist_begin_non_atomic(struct pt_regs *regs) | ||
169 | { | ||
170 | BUG_ON(!user_mode_vm(regs)); | ||
171 | |||
172 | /* | ||
173 | * Sanity check: we need to be on the normal thread stack. This | ||
174 | * will catch asm bugs and any attempt to use ist_preempt_enable | ||
175 | * from double_fault. | ||
176 | */ | ||
177 | BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) | ||
178 | & ~(THREAD_SIZE - 1)) != 0); | ||
179 | |||
180 | preempt_count_sub(HARDIRQ_OFFSET); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * ist_end_non_atomic() - begin a non-atomic section in an IST exception | ||
185 | * | ||
186 | * Ends a non-atomic section started with ist_begin_non_atomic(). | ||
187 | */ | ||
188 | void ist_end_non_atomic(void) | ||
189 | { | ||
190 | preempt_count_add(HARDIRQ_OFFSET); | ||
191 | } | ||
192 | |||
111 | static nokprobe_inline int | 193 | static nokprobe_inline int |
112 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, | 194 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, |
113 | struct pt_regs *regs, long error_code) | 195 | struct pt_regs *regs, long error_code) |
@@ -251,6 +333,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
251 | * end up promoting it to a doublefault. In that case, modify | 333 | * end up promoting it to a doublefault. In that case, modify |
252 | * the stack to make it look like we just entered the #GP | 334 | * the stack to make it look like we just entered the #GP |
253 | * handler from user space, similar to bad_iret. | 335 | * handler from user space, similar to bad_iret. |
336 | * | ||
337 | * No need for ist_enter here because we don't use RCU. | ||
254 | */ | 338 | */ |
255 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && | 339 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && |
256 | regs->cs == __KERNEL_CS && | 340 | regs->cs == __KERNEL_CS && |
@@ -263,12 +347,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
263 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ | 347 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ |
264 | regs->ip = (unsigned long)general_protection; | 348 | regs->ip = (unsigned long)general_protection; |
265 | regs->sp = (unsigned long)&normal_regs->orig_ax; | 349 | regs->sp = (unsigned long)&normal_regs->orig_ax; |
350 | |||
266 | return; | 351 | return; |
267 | } | 352 | } |
268 | #endif | 353 | #endif |
269 | 354 | ||
270 | exception_enter(); | 355 | ist_enter(regs); /* Discard prev_state because we won't return. */ |
271 | /* Return not checked because double check cannot be ignored */ | ||
272 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 356 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
273 | 357 | ||
274 | tsk->thread.error_code = error_code; | 358 | tsk->thread.error_code = error_code; |
@@ -434,7 +518,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
434 | if (poke_int3_handler(regs)) | 518 | if (poke_int3_handler(regs)) |
435 | return; | 519 | return; |
436 | 520 | ||
437 | prev_state = exception_enter(); | 521 | prev_state = ist_enter(regs); |
438 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 522 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
439 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 523 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
440 | SIGTRAP) == NOTIFY_STOP) | 524 | SIGTRAP) == NOTIFY_STOP) |
@@ -460,33 +544,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
460 | preempt_conditional_cli(regs); | 544 | preempt_conditional_cli(regs); |
461 | debug_stack_usage_dec(); | 545 | debug_stack_usage_dec(); |
462 | exit: | 546 | exit: |
463 | exception_exit(prev_state); | 547 | ist_exit(regs, prev_state); |
464 | } | 548 | } |
465 | NOKPROBE_SYMBOL(do_int3); | 549 | NOKPROBE_SYMBOL(do_int3); |
466 | 550 | ||
467 | #ifdef CONFIG_X86_64 | 551 | #ifdef CONFIG_X86_64 |
468 | /* | 552 | /* |
469 | * Help handler running on IST stack to switch back to user stack | 553 | * Help handler running on IST stack to switch off the IST stack if the |
470 | * for scheduling or signal handling. The actual stack switch is done in | 554 | * interrupted code was in user mode. The actual stack switch is done in |
471 | * entry.S | 555 | * entry_64.S |
472 | */ | 556 | */ |
473 | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) | 557 | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) |
474 | { | 558 | { |
475 | struct pt_regs *regs = eregs; | 559 | struct pt_regs *regs = task_pt_regs(current); |
476 | /* Did already sync */ | 560 | *regs = *eregs; |
477 | if (eregs == (struct pt_regs *)eregs->sp) | ||
478 | ; | ||
479 | /* Exception from user space */ | ||
480 | else if (user_mode(eregs)) | ||
481 | regs = task_pt_regs(current); | ||
482 | /* | ||
483 | * Exception from kernel and interrupts are enabled. Move to | ||
484 | * kernel process stack. | ||
485 | */ | ||
486 | else if (eregs->flags & X86_EFLAGS_IF) | ||
487 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
488 | if (eregs != regs) | ||
489 | *regs = *eregs; | ||
490 | return regs; | 561 | return regs; |
491 | } | 562 | } |
492 | NOKPROBE_SYMBOL(sync_regs); | 563 | NOKPROBE_SYMBOL(sync_regs); |
@@ -554,7 +625,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
554 | unsigned long dr6; | 625 | unsigned long dr6; |
555 | int si_code; | 626 | int si_code; |
556 | 627 | ||
557 | prev_state = exception_enter(); | 628 | prev_state = ist_enter(regs); |
558 | 629 | ||
559 | get_debugreg(dr6, 6); | 630 | get_debugreg(dr6, 6); |
560 | 631 | ||
@@ -629,7 +700,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
629 | debug_stack_usage_dec(); | 700 | debug_stack_usage_dec(); |
630 | 701 | ||
631 | exit: | 702 | exit: |
632 | exception_exit(prev_state); | 703 | ist_exit(regs, prev_state); |
633 | } | 704 | } |
634 | NOKPROBE_SYMBOL(do_debug); | 705 | NOKPROBE_SYMBOL(do_debug); |
635 | 706 | ||
@@ -788,18 +859,16 @@ void math_state_restore(void) | |||
788 | local_irq_disable(); | 859 | local_irq_disable(); |
789 | } | 860 | } |
790 | 861 | ||
862 | /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ | ||
863 | kernel_fpu_disable(); | ||
791 | __thread_fpu_begin(tsk); | 864 | __thread_fpu_begin(tsk); |
792 | |||
793 | /* | ||
794 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
795 | */ | ||
796 | if (unlikely(restore_fpu_checking(tsk))) { | 865 | if (unlikely(restore_fpu_checking(tsk))) { |
797 | drop_init_fpu(tsk); | 866 | drop_init_fpu(tsk); |
798 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); | 867 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
799 | return; | 868 | } else { |
869 | tsk->thread.fpu_counter++; | ||
800 | } | 870 | } |
801 | 871 | kernel_fpu_enable(); | |
802 | tsk->thread.fpu_counter++; | ||
803 | } | 872 | } |
804 | EXPORT_SYMBOL_GPL(math_state_restore); | 873 | EXPORT_SYMBOL_GPL(math_state_restore); |
805 | 874 | ||
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 8b96a947021f..81f8adb0679e 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -66,27 +66,54 @@ | |||
66 | * Good-instruction tables for 32-bit apps. This is non-const and volatile | 66 | * Good-instruction tables for 32-bit apps. This is non-const and volatile |
67 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | 67 | * to keep gcc from statically optimizing it out, as variable_test_bit makes |
68 | * some versions of gcc to think only *(unsigned long*) is used. | 68 | * some versions of gcc to think only *(unsigned long*) is used. |
69 | * | ||
70 | * Opcodes we'll probably never support: | ||
71 | * 6c-6f - ins,outs. SEGVs if used in userspace | ||
72 | * e4-e7 - in,out imm. SEGVs if used in userspace | ||
73 | * ec-ef - in,out acc. SEGVs if used in userspace | ||
74 | * cc - int3. SIGTRAP if used in userspace | ||
75 | * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs | ||
76 | * (why we support bound (62) then? it's similar, and similarly unused...) | ||
77 | * f1 - int1. SIGTRAP if used in userspace | ||
78 | * f4 - hlt. SEGVs if used in userspace | ||
79 | * fa - cli. SEGVs if used in userspace | ||
80 | * fb - sti. SEGVs if used in userspace | ||
81 | * | ||
82 | * Opcodes which need some work to be supported: | ||
83 | * 07,17,1f - pop es/ss/ds | ||
84 | * Normally not used in userspace, but would execute if used. | ||
85 | * Can cause GP or stack exception if tries to load wrong segment descriptor. | ||
86 | * We hesitate to run them under single step since kernel's handling | ||
87 | * of userspace single-stepping (TF flag) is fragile. | ||
88 | * We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e) | ||
89 | * on the same grounds that they are never used. | ||
90 | * cd - int N. | ||
91 | * Used by userspace for "int 80" syscall entry. (Other "int N" | ||
92 | * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3). | ||
93 | * Not supported since kernel's handling of userspace single-stepping | ||
94 | * (TF flag) is fragile. | ||
95 | * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad | ||
69 | */ | 96 | */ |
70 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 97 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
71 | static volatile u32 good_insns_32[256 / 32] = { | 98 | static volatile u32 good_insns_32[256 / 32] = { |
72 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 99 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
73 | /* ---------------------------------------------- */ | 100 | /* ---------------------------------------------- */ |
74 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ | 101 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */ |
75 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ | 102 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ |
76 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ | 103 | W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ |
77 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ | 104 | W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */ |
78 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 105 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
79 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | 106 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ |
80 | W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | 107 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ |
81 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | 108 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ |
82 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | 109 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ |
83 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 110 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ |
84 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | 111 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ |
85 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | 112 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ |
86 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | 113 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ |
87 | W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | 114 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ |
88 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | 115 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ |
89 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | 116 | W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ |
90 | /* ---------------------------------------------- */ | 117 | /* ---------------------------------------------- */ |
91 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 118 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
92 | }; | 119 | }; |
@@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = { | |||
94 | #define good_insns_32 NULL | 121 | #define good_insns_32 NULL |
95 | #endif | 122 | #endif |
96 | 123 | ||
97 | /* Good-instruction tables for 64-bit apps */ | 124 | /* Good-instruction tables for 64-bit apps. |
125 | * | ||
126 | * Genuinely invalid opcodes: | ||
127 | * 06,07 - formerly push/pop es | ||
128 | * 0e - formerly push cs | ||
129 | * 16,17 - formerly push/pop ss | ||
130 | * 1e,1f - formerly push/pop ds | ||
131 | * 27,2f,37,3f - formerly daa/das/aaa/aas | ||
132 | * 60,61 - formerly pusha/popa | ||
133 | * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported) | ||
134 | * 82 - formerly redundant encoding of Group1 | ||
135 | * 9a - formerly call seg:ofs | ||
136 | * ce - formerly into | ||
137 | * d4,d5 - formerly aam/aad | ||
138 | * d6 - formerly undocumented salc | ||
139 | * ea - formerly jmp seg:ofs | ||
140 | * | ||
141 | * Opcodes we'll probably never support: | ||
142 | * 6c-6f - ins,outs. SEGVs if used in userspace | ||
143 | * e4-e7 - in,out imm. SEGVs if used in userspace | ||
144 | * ec-ef - in,out acc. SEGVs if used in userspace | ||
145 | * cc - int3. SIGTRAP if used in userspace | ||
146 | * f1 - int1. SIGTRAP if used in userspace | ||
147 | * f4 - hlt. SEGVs if used in userspace | ||
148 | * fa - cli. SEGVs if used in userspace | ||
149 | * fb - sti. SEGVs if used in userspace | ||
150 | * | ||
151 | * Opcodes which need some work to be supported: | ||
152 | * cd - int N. | ||
153 | * Used by userspace for "int 80" syscall entry. (Other "int N" | ||
154 | * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3). | ||
155 | * Not supported since kernel's handling of userspace single-stepping | ||
156 | * (TF flag) is fragile. | ||
157 | * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad | ||
158 | */ | ||
98 | #if defined(CONFIG_X86_64) | 159 | #if defined(CONFIG_X86_64) |
99 | static volatile u32 good_insns_64[256 / 32] = { | 160 | static volatile u32 good_insns_64[256 / 32] = { |
100 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 161 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
101 | /* ---------------------------------------------- */ | 162 | /* ---------------------------------------------- */ |
102 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ | 163 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */ |
103 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ | 164 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ |
104 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ | 165 | W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */ |
105 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ | 166 | W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */ |
106 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | 167 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
107 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | 168 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ |
108 | W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | 169 | W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ |
109 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | 170 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ |
110 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | 171 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ |
111 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 172 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */ |
112 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | 173 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ |
113 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | 174 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ |
114 | W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | 175 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ |
115 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | 176 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ |
116 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | 177 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */ |
117 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | 178 | W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ |
118 | /* ---------------------------------------------- */ | 179 | /* ---------------------------------------------- */ |
119 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 180 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
120 | }; | 181 | }; |
@@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = { | |||
122 | #define good_insns_64 NULL | 183 | #define good_insns_64 NULL |
123 | #endif | 184 | #endif |
124 | 185 | ||
125 | /* Using this for both 64-bit and 32-bit apps */ | 186 | /* Using this for both 64-bit and 32-bit apps. |
187 | * Opcodes we don't support: | ||
188 | * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns | ||
189 | * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group. | ||
190 | * Also encodes tons of other system insns if mod=11. | ||
191 | * Some are in fact non-system: xend, xtest, rdtscp, maybe more | ||
192 | * 0f 05 - syscall | ||
193 | * 0f 06 - clts (CPL0 insn) | ||
194 | * 0f 07 - sysret | ||
195 | * 0f 08 - invd (CPL0 insn) | ||
196 | * 0f 09 - wbinvd (CPL0 insn) | ||
197 | * 0f 0b - ud2 | ||
198 | * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?) | ||
199 | * 0f 34 - sysenter | ||
200 | * 0f 35 - sysexit | ||
201 | * 0f 37 - getsec | ||
202 | * 0f 78 - vmread (Intel VMX. CPL0 insn) | ||
203 | * 0f 79 - vmwrite (Intel VMX. CPL0 insn) | ||
204 | * Note: with prefixes, these two opcodes are | ||
205 | * extrq/insertq/AVX512 convert vector ops. | ||
206 | * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt], | ||
207 | * {rd,wr}{fs,gs}base,{s,l,m}fence. | ||
208 | * Why? They are all user-executable. | ||
209 | */ | ||
126 | static volatile u32 good_2byte_insns[256 / 32] = { | 210 | static volatile u32 good_2byte_insns[256 / 32] = { |
127 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 211 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
128 | /* ---------------------------------------------- */ | 212 | /* ---------------------------------------------- */ |
129 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | 213 | W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */ |
130 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | 214 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ |
131 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | 215 | W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ |
132 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | 216 | W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */ |
133 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 217 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
134 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | 218 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ |
135 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | 219 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ |
136 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | 220 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */ |
137 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | 221 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ |
138 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 222 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ |
139 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | 223 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ |
140 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | 224 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ |
141 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | 225 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ |
142 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | 226 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ |
143 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | 227 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ |
144 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | 228 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) /* f0 */ |
145 | /* ---------------------------------------------- */ | 229 | /* ---------------------------------------------- */ |
146 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 230 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
147 | }; | 231 | }; |
148 | #undef W | 232 | #undef W |
149 | 233 | ||
150 | /* | 234 | /* |
151 | * opcodes we'll probably never support: | ||
152 | * | ||
153 | * 6c-6d, e4-e5, ec-ed - in | ||
154 | * 6e-6f, e6-e7, ee-ef - out | ||
155 | * cc, cd - int3, int | ||
156 | * cf - iret | ||
157 | * d6 - illegal instruction | ||
158 | * f1 - int1/icebp | ||
159 | * f4 - hlt | ||
160 | * fa, fb - cli, sti | ||
161 | * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 | ||
162 | * | ||
163 | * invalid opcodes in 64-bit mode: | ||
164 | * | ||
165 | * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 | ||
166 | * 63 - we support this opcode in x86_64 but not in i386. | ||
167 | * | ||
168 | * opcodes we may need to refine support for: | 235 | * opcodes we may need to refine support for: |
169 | * | 236 | * |
170 | * 0f - 2-byte instructions: For many of these instructions, the validity | 237 | * 0f - 2-byte instructions: For many of these instructions, the validity |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 040681928e9d..37d8fa4438f0 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -50,13 +50,19 @@ EXPORT_SYMBOL(csum_partial); | |||
50 | #undef memset | 50 | #undef memset |
51 | #undef memmove | 51 | #undef memmove |
52 | 52 | ||
53 | extern void *__memset(void *, int, __kernel_size_t); | ||
54 | extern void *__memcpy(void *, const void *, __kernel_size_t); | ||
55 | extern void *__memmove(void *, const void *, __kernel_size_t); | ||
53 | extern void *memset(void *, int, __kernel_size_t); | 56 | extern void *memset(void *, int, __kernel_size_t); |
54 | extern void *memcpy(void *, const void *, __kernel_size_t); | 57 | extern void *memcpy(void *, const void *, __kernel_size_t); |
55 | extern void *__memcpy(void *, const void *, __kernel_size_t); | 58 | extern void *memmove(void *, const void *, __kernel_size_t); |
59 | |||
60 | EXPORT_SYMBOL(__memset); | ||
61 | EXPORT_SYMBOL(__memcpy); | ||
62 | EXPORT_SYMBOL(__memmove); | ||
56 | 63 | ||
57 | EXPORT_SYMBOL(memset); | 64 | EXPORT_SYMBOL(memset); |
58 | EXPORT_SYMBOL(memcpy); | 65 | EXPORT_SYMBOL(memcpy); |
59 | EXPORT_SYMBOL(__memcpy); | ||
60 | EXPORT_SYMBOL(memmove); | 66 | EXPORT_SYMBOL(memmove); |
61 | 67 | ||
62 | #ifndef CONFIG_DEBUG_VIRTUAL | 68 | #ifndef CONFIG_DEBUG_VIRTUAL |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 0de1fae2bdf0..34f66e58a896 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/i387.h> | 12 | #include <asm/i387.h> |
13 | #include <asm/fpu-internal.h> | 13 | #include <asm/fpu-internal.h> |
14 | #include <asm/sigframe.h> | 14 | #include <asm/sigframe.h> |
15 | #include <asm/tlbflush.h> | ||
15 | #include <asm/xcr.h> | 16 | #include <asm/xcr.h> |
16 | 17 | ||
17 | /* | 18 | /* |
@@ -453,7 +454,7 @@ static void prepare_fx_sw_frame(void) | |||
453 | */ | 454 | */ |
454 | static inline void xstate_enable(void) | 455 | static inline void xstate_enable(void) |
455 | { | 456 | { |
456 | set_in_cr4(X86_CR4_OSXSAVE); | 457 | cr4_set_bits(X86_CR4_OSXSAVE); |
457 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | 458 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); |
458 | } | 459 | } |
459 | 460 | ||