diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 20:02:01 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 20:02:01 -0400 |
commit | 10ce3cc919f50c2043b41ca968b43c26a3672600 (patch) | |
tree | ea409366a5208aced495bc0516a08b81fd43222e /arch/x86/kernel | |
parent | 24e3e5ae1e4c2a3a32f5b1f96b4e3fd721806acd (diff) | |
parent | 5c6a7a62c130afef3d61c1dee153012231ff5cd9 (diff) |
Merge branch 'next' into for-linus
Diffstat (limited to 'arch/x86/kernel')
86 files changed, 2462 insertions, 1448 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8baca3c4871c..5369059c07a9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -25,7 +25,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
25 | obj-y += probe_roms.o | 25 | obj-y += probe_roms.o |
26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 26 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 27 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
28 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 28 | obj-y += syscall_$(BITS).o |
29 | obj-$(CONFIG_X86_64) += vsyscall_64.o | ||
29 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
30 | obj-y += bootflag.o e820.o | 31 | obj-y += bootflag.o e820.o |
31 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 32 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
@@ -80,6 +81,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o | |||
80 | obj-$(CONFIG_AMD_NB) += amd_nb.o | 81 | obj-$(CONFIG_AMD_NB) += amd_nb.o |
81 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 82 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
82 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 83 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
84 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o | ||
83 | 85 | ||
84 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 86 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
85 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 87 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4558f0d0822d..ce664f33ea8e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -219,6 +219,8 @@ static int __init | |||
219 | acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) | 219 | acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) |
220 | { | 220 | { |
221 | struct acpi_madt_local_x2apic *processor = NULL; | 221 | struct acpi_madt_local_x2apic *processor = NULL; |
222 | int apic_id; | ||
223 | u8 enabled; | ||
222 | 224 | ||
223 | processor = (struct acpi_madt_local_x2apic *)header; | 225 | processor = (struct acpi_madt_local_x2apic *)header; |
224 | 226 | ||
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) | |||
227 | 229 | ||
228 | acpi_table_print_madt_entry(header); | 230 | acpi_table_print_madt_entry(header); |
229 | 231 | ||
232 | apic_id = processor->local_apic_id; | ||
233 | enabled = processor->lapic_flags & ACPI_MADT_ENABLED; | ||
230 | #ifdef CONFIG_X86_X2APIC | 234 | #ifdef CONFIG_X86_X2APIC |
231 | /* | 235 | /* |
232 | * We need to register disabled CPU as well to permit | 236 | * We need to register disabled CPU as well to permit |
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) | |||
235 | * to not preallocating memory for all NR_CPUS | 239 | * to not preallocating memory for all NR_CPUS |
236 | * when we use CPU hotplug. | 240 | * when we use CPU hotplug. |
237 | */ | 241 | */ |
238 | acpi_register_lapic(processor->local_apic_id, /* APIC ID */ | 242 | if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled) |
239 | processor->lapic_flags & ACPI_MADT_ENABLED); | 243 | printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); |
244 | else | ||
245 | acpi_register_lapic(apic_id, enabled); | ||
240 | #else | 246 | #else |
241 | printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); | 247 | printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); |
242 | #endif | 248 | #endif |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4c39baa8facc..be16854591cc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -119,20 +119,49 @@ bool __init early_is_amd_nb(u32 device) | |||
119 | return false; | 119 | return false; |
120 | } | 120 | } |
121 | 121 | ||
122 | struct resource *amd_get_mmconfig_range(struct resource *res) | ||
123 | { | ||
124 | u32 address; | ||
125 | u64 base, msr; | ||
126 | unsigned segn_busn_bits; | ||
127 | |||
128 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
129 | return NULL; | ||
130 | |||
131 | /* assume all cpus from fam10h have mmconfig */ | ||
132 | if (boot_cpu_data.x86 < 0x10) | ||
133 | return NULL; | ||
134 | |||
135 | address = MSR_FAM10H_MMIO_CONF_BASE; | ||
136 | rdmsrl(address, msr); | ||
137 | |||
138 | /* mmconfig is not enabled */ | ||
139 | if (!(msr & FAM10H_MMIO_CONF_ENABLE)) | ||
140 | return NULL; | ||
141 | |||
142 | base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); | ||
143 | |||
144 | segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & | ||
145 | FAM10H_MMIO_CONF_BUSRANGE_MASK; | ||
146 | |||
147 | res->flags = IORESOURCE_MEM; | ||
148 | res->start = base; | ||
149 | res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | ||
150 | return res; | ||
151 | } | ||
152 | |||
122 | int amd_get_subcaches(int cpu) | 153 | int amd_get_subcaches(int cpu) |
123 | { | 154 | { |
124 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; | 155 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; |
125 | unsigned int mask; | 156 | unsigned int mask; |
126 | int cuid = 0; | 157 | int cuid; |
127 | 158 | ||
128 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | 159 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
129 | return 0; | 160 | return 0; |
130 | 161 | ||
131 | pci_read_config_dword(link, 0x1d4, &mask); | 162 | pci_read_config_dword(link, 0x1d4, &mask); |
132 | 163 | ||
133 | #ifdef CONFIG_SMP | ||
134 | cuid = cpu_data(cpu).compute_unit_id; | 164 | cuid = cpu_data(cpu).compute_unit_id; |
135 | #endif | ||
136 | return (mask >> (4 * cuid)) & 0xf; | 165 | return (mask >> (4 * cuid)) & 0xf; |
137 | } | 166 | } |
138 | 167 | ||
@@ -141,7 +170,7 @@ int amd_set_subcaches(int cpu, int mask) | |||
141 | static unsigned int reset, ban; | 170 | static unsigned int reset, ban; |
142 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); | 171 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
143 | unsigned int reg; | 172 | unsigned int reg; |
144 | int cuid = 0; | 173 | int cuid; |
145 | 174 | ||
146 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) | 175 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) |
147 | return -EINVAL; | 176 | return -EINVAL; |
@@ -159,9 +188,7 @@ int amd_set_subcaches(int cpu, int mask) | |||
159 | pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); | 188 | pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); |
160 | } | 189 | } |
161 | 190 | ||
162 | #ifdef CONFIG_SMP | ||
163 | cuid = cpu_data(cpu).compute_unit_id; | 191 | cuid = cpu_data(cpu).compute_unit_id; |
164 | #endif | ||
165 | mask <<= 4 * cuid; | 192 | mask <<= 4 * cuid; |
166 | mask |= (0xf ^ (1 << cuid)) << 26; | 193 | mask |= (0xf ^ (1 << cuid)) << 26; |
167 | 194 | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3d2661ca6542..6e76c191a835 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void) | |||
88 | */ | 88 | */ |
89 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, | 89 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, |
90 | aper_size, aper_size); | 90 | aper_size, aper_size); |
91 | if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { | 91 | if (!addr || addr + aper_size > GART_MAX_ADDR) { |
92 | printk(KERN_ERR | 92 | printk(KERN_ERR |
93 | "Cannot allocate aperture memory hole (%lx,%uK)\n", | 93 | "Cannot allocate aperture memory hole (%lx,%uK)\n", |
94 | addr, aper_size>>10); | 94 | addr, aper_size>>10); |
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); | 97 | memblock_reserve(addr, aper_size); |
98 | /* | 98 | /* |
99 | * Kmemleak should not scan this block as it may not be mapped via the | 99 | * Kmemleak should not scan this block as it may not be mapped via the |
100 | * kernel direct mapping. | 100 | * kernel direct mapping. |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 767fd04f2843..0ae0323b1f9c 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o | |||
10 | 10 | ||
11 | ifeq ($(CONFIG_X86_64),y) | 11 | ifeq ($(CONFIG_X86_64),y) |
12 | # APIC probe will depend on the listing order here | 12 | # APIC probe will depend on the listing order here |
13 | obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o | ||
13 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o | 14 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o |
14 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o | 15 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o |
15 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o | 16 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f98d84caf94c..2eec05b6d1b8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer); | |||
146 | int x2apic_mode; | 146 | int x2apic_mode; |
147 | #ifdef CONFIG_X86_X2APIC | 147 | #ifdef CONFIG_X86_X2APIC |
148 | /* x2apic enabled before OS handover */ | 148 | /* x2apic enabled before OS handover */ |
149 | static int x2apic_preenabled; | 149 | int x2apic_preenabled; |
150 | static int x2apic_disabled; | ||
151 | static int nox2apic; | ||
150 | static __init int setup_nox2apic(char *str) | 152 | static __init int setup_nox2apic(char *str) |
151 | { | 153 | { |
152 | if (x2apic_enabled()) { | 154 | if (x2apic_enabled()) { |
153 | pr_warning("Bios already enabled x2apic, " | 155 | int apicid = native_apic_msr_read(APIC_ID); |
154 | "can't enforce nox2apic"); | 156 | |
155 | return 0; | 157 | if (apicid >= 255) { |
156 | } | 158 | pr_warning("Apicid: %08x, cannot enforce nox2apic\n", |
159 | apicid); | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | pr_warning("x2apic already enabled. will disable it\n"); | ||
164 | } else | ||
165 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
166 | |||
167 | nox2apic = 1; | ||
157 | 168 | ||
158 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
159 | return 0; | 169 | return 0; |
160 | } | 170 | } |
161 | early_param("nox2apic", setup_nox2apic); | 171 | early_param("nox2apic", setup_nox2apic); |
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void) | |||
250 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | 260 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
251 | if (!send_status) | 261 | if (!send_status) |
252 | break; | 262 | break; |
263 | inc_irq_stat(icr_read_retry_count); | ||
253 | udelay(100); | 264 | udelay(100); |
254 | } while (timeout++ < 1000); | 265 | } while (timeout++ < 1000); |
255 | 266 | ||
@@ -876,8 +887,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) | |||
876 | * Besides, if we don't timer interrupts ignore the global | 887 | * Besides, if we don't timer interrupts ignore the global |
877 | * interrupt lock, which is the WrongThing (tm) to do. | 888 | * interrupt lock, which is the WrongThing (tm) to do. |
878 | */ | 889 | */ |
879 | exit_idle(); | ||
880 | irq_enter(); | 890 | irq_enter(); |
891 | exit_idle(); | ||
881 | local_apic_timer_interrupt(); | 892 | local_apic_timer_interrupt(); |
882 | irq_exit(); | 893 | irq_exit(); |
883 | 894 | ||
@@ -1431,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void) | |||
1431 | } | 1442 | } |
1432 | 1443 | ||
1433 | #ifdef CONFIG_X86_X2APIC | 1444 | #ifdef CONFIG_X86_X2APIC |
1445 | /* | ||
1446 | * Need to disable xapic and x2apic at the same time and then enable xapic mode | ||
1447 | */ | ||
1448 | static inline void __disable_x2apic(u64 msr) | ||
1449 | { | ||
1450 | wrmsrl(MSR_IA32_APICBASE, | ||
1451 | msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); | ||
1452 | wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); | ||
1453 | } | ||
1454 | |||
1455 | static __init void disable_x2apic(void) | ||
1456 | { | ||
1457 | u64 msr; | ||
1458 | |||
1459 | if (!cpu_has_x2apic) | ||
1460 | return; | ||
1461 | |||
1462 | rdmsrl(MSR_IA32_APICBASE, msr); | ||
1463 | if (msr & X2APIC_ENABLE) { | ||
1464 | u32 x2apic_id = read_apic_id(); | ||
1465 | |||
1466 | if (x2apic_id >= 255) | ||
1467 | panic("Cannot disable x2apic, id: %08x\n", x2apic_id); | ||
1468 | |||
1469 | pr_info("Disabling x2apic\n"); | ||
1470 | __disable_x2apic(msr); | ||
1471 | |||
1472 | if (nox2apic) { | ||
1473 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); | ||
1474 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
1475 | } | ||
1476 | |||
1477 | x2apic_disabled = 1; | ||
1478 | x2apic_mode = 0; | ||
1479 | |||
1480 | register_lapic_address(mp_lapic_addr); | ||
1481 | } | ||
1482 | } | ||
1483 | |||
1434 | void check_x2apic(void) | 1484 | void check_x2apic(void) |
1435 | { | 1485 | { |
1436 | if (x2apic_enabled()) { | 1486 | if (x2apic_enabled()) { |
@@ -1441,15 +1491,20 @@ void check_x2apic(void) | |||
1441 | 1491 | ||
1442 | void enable_x2apic(void) | 1492 | void enable_x2apic(void) |
1443 | { | 1493 | { |
1444 | int msr, msr2; | 1494 | u64 msr; |
1495 | |||
1496 | rdmsrl(MSR_IA32_APICBASE, msr); | ||
1497 | if (x2apic_disabled) { | ||
1498 | __disable_x2apic(msr); | ||
1499 | return; | ||
1500 | } | ||
1445 | 1501 | ||
1446 | if (!x2apic_mode) | 1502 | if (!x2apic_mode) |
1447 | return; | 1503 | return; |
1448 | 1504 | ||
1449 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | ||
1450 | if (!(msr & X2APIC_ENABLE)) { | 1505 | if (!(msr & X2APIC_ENABLE)) { |
1451 | printk_once(KERN_INFO "Enabling x2apic\n"); | 1506 | printk_once(KERN_INFO "Enabling x2apic\n"); |
1452 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2); | 1507 | wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); |
1453 | } | 1508 | } |
1454 | } | 1509 | } |
1455 | #endif /* CONFIG_X86_X2APIC */ | 1510 | #endif /* CONFIG_X86_X2APIC */ |
@@ -1486,25 +1541,34 @@ void __init enable_IR_x2apic(void) | |||
1486 | ret = save_ioapic_entries(); | 1541 | ret = save_ioapic_entries(); |
1487 | if (ret) { | 1542 | if (ret) { |
1488 | pr_info("Saving IO-APIC state failed: %d\n", ret); | 1543 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1489 | goto out; | 1544 | return; |
1490 | } | 1545 | } |
1491 | 1546 | ||
1492 | local_irq_save(flags); | 1547 | local_irq_save(flags); |
1493 | legacy_pic->mask_all(); | 1548 | legacy_pic->mask_all(); |
1494 | mask_ioapic_entries(); | 1549 | mask_ioapic_entries(); |
1495 | 1550 | ||
1551 | if (x2apic_preenabled && nox2apic) | ||
1552 | disable_x2apic(); | ||
1553 | |||
1496 | if (dmar_table_init_ret) | 1554 | if (dmar_table_init_ret) |
1497 | ret = -1; | 1555 | ret = -1; |
1498 | else | 1556 | else |
1499 | ret = enable_IR(); | 1557 | ret = enable_IR(); |
1500 | 1558 | ||
1559 | if (!x2apic_supported()) | ||
1560 | goto skip_x2apic; | ||
1561 | |||
1501 | if (ret < 0) { | 1562 | if (ret < 0) { |
1502 | /* IR is required if there is APIC ID > 255 even when running | 1563 | /* IR is required if there is APIC ID > 255 even when running |
1503 | * under KVM | 1564 | * under KVM |
1504 | */ | 1565 | */ |
1505 | if (max_physical_apicid > 255 || | 1566 | if (max_physical_apicid > 255 || |
1506 | !hypervisor_x2apic_available()) | 1567 | !hypervisor_x2apic_available()) { |
1507 | goto nox2apic; | 1568 | if (x2apic_preenabled) |
1569 | disable_x2apic(); | ||
1570 | goto skip_x2apic; | ||
1571 | } | ||
1508 | /* | 1572 | /* |
1509 | * without IR all CPUs can be addressed by IOAPIC/MSI | 1573 | * without IR all CPUs can be addressed by IOAPIC/MSI |
1510 | * only in physical mode | 1574 | * only in physical mode |
@@ -1512,8 +1576,10 @@ void __init enable_IR_x2apic(void) | |||
1512 | x2apic_force_phys(); | 1576 | x2apic_force_phys(); |
1513 | } | 1577 | } |
1514 | 1578 | ||
1515 | if (ret == IRQ_REMAP_XAPIC_MODE) | 1579 | if (ret == IRQ_REMAP_XAPIC_MODE) { |
1516 | goto nox2apic; | 1580 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); |
1581 | goto skip_x2apic; | ||
1582 | } | ||
1517 | 1583 | ||
1518 | x2apic_enabled = 1; | 1584 | x2apic_enabled = 1; |
1519 | 1585 | ||
@@ -1523,22 +1589,11 @@ void __init enable_IR_x2apic(void) | |||
1523 | pr_info("Enabled x2apic\n"); | 1589 | pr_info("Enabled x2apic\n"); |
1524 | } | 1590 | } |
1525 | 1591 | ||
1526 | nox2apic: | 1592 | skip_x2apic: |
1527 | if (ret < 0) /* IR enabling failed */ | 1593 | if (ret < 0) /* IR enabling failed */ |
1528 | restore_ioapic_entries(); | 1594 | restore_ioapic_entries(); |
1529 | legacy_pic->restore_mask(); | 1595 | legacy_pic->restore_mask(); |
1530 | local_irq_restore(flags); | 1596 | local_irq_restore(flags); |
1531 | |||
1532 | out: | ||
1533 | if (x2apic_enabled || !x2apic_supported()) | ||
1534 | return; | ||
1535 | |||
1536 | if (x2apic_preenabled) | ||
1537 | panic("x2apic: enabled by BIOS but kernel init failed."); | ||
1538 | else if (ret == IRQ_REMAP_XAPIC_MODE) | ||
1539 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); | ||
1540 | else if (ret < 0) | ||
1541 | pr_info("x2apic not enabled, IRQ remapping init failed\n"); | ||
1542 | } | 1597 | } |
1543 | 1598 | ||
1544 | #ifdef CONFIG_X86_64 | 1599 | #ifdef CONFIG_X86_64 |
@@ -1809,8 +1864,8 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1809 | { | 1864 | { |
1810 | u32 v; | 1865 | u32 v; |
1811 | 1866 | ||
1812 | exit_idle(); | ||
1813 | irq_enter(); | 1867 | irq_enter(); |
1868 | exit_idle(); | ||
1814 | /* | 1869 | /* |
1815 | * Check if this really is a spurious interrupt and ACK it | 1870 | * Check if this really is a spurious interrupt and ACK it |
1816 | * if it is a vectored one. Just in case... | 1871 | * if it is a vectored one. Just in case... |
@@ -1846,8 +1901,8 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1846 | "Illegal register address", /* APIC Error Bit 7 */ | 1901 | "Illegal register address", /* APIC Error Bit 7 */ |
1847 | }; | 1902 | }; |
1848 | 1903 | ||
1849 | exit_idle(); | ||
1850 | irq_enter(); | 1904 | irq_enter(); |
1905 | exit_idle(); | ||
1851 | /* First tickle the hardware, only then report what went on. -- REW */ | 1906 | /* First tickle the hardware, only then report what went on. -- REW */ |
1852 | v0 = apic_read(APIC_ESR); | 1907 | v0 = apic_read(APIC_ESR); |
1853 | apic_write(APIC_ESR, 0); | 1908 | apic_write(APIC_ESR, 0); |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f7a41e4cae47..8c3cdded6f2b 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
62 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | 62 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel |
63 | * document number 292116). So here it goes... | 63 | * document number 292116). So here it goes... |
64 | */ | 64 | */ |
65 | static void flat_init_apic_ldr(void) | 65 | void flat_init_apic_ldr(void) |
66 | { | 66 | { |
67 | unsigned long val; | 67 | unsigned long val; |
68 | unsigned long num, id; | 68 | unsigned long num, id; |
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb) | |||
171 | return initial_apic_id >> index_msb; | 171 | return initial_apic_id >> index_msb; |
172 | } | 172 | } |
173 | 173 | ||
174 | static int flat_probe(void) | ||
175 | { | ||
176 | return 1; | ||
177 | } | ||
178 | |||
174 | static struct apic apic_flat = { | 179 | static struct apic apic_flat = { |
175 | .name = "flat", | 180 | .name = "flat", |
176 | .probe = NULL, | 181 | .probe = flat_probe, |
177 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 182 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
178 | .apic_id_registered = flat_apic_id_registered, | 183 | .apic_id_registered = flat_apic_id_registered, |
179 | 184 | ||
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c new file mode 100644 index 000000000000..09d3d8c1cd99 --- /dev/null +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -0,0 +1,294 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Numascale NumaConnect-Specific APIC Code | ||
7 | * | ||
8 | * Copyright (C) 2011 Numascale AS. All rights reserved. | ||
9 | * | ||
10 | * Send feedback to <support@numascale.com> | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/errno.h> | ||
15 | #include <linux/threads.h> | ||
16 | #include <linux/cpumask.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/ctype.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/hardirq.h> | ||
23 | #include <linux/delay.h> | ||
24 | |||
25 | #include <asm/numachip/numachip_csr.h> | ||
26 | #include <asm/smp.h> | ||
27 | #include <asm/apic.h> | ||
28 | #include <asm/ipi.h> | ||
29 | #include <asm/apic_flat_64.h> | ||
30 | |||
31 | static int numachip_system __read_mostly; | ||
32 | |||
33 | static struct apic apic_numachip __read_mostly; | ||
34 | |||
35 | static unsigned int get_apic_id(unsigned long x) | ||
36 | { | ||
37 | unsigned long value; | ||
38 | unsigned int id; | ||
39 | |||
40 | rdmsrl(MSR_FAM10H_NODE_ID, value); | ||
41 | id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U); | ||
42 | |||
43 | return id; | ||
44 | } | ||
45 | |||
46 | static unsigned long set_apic_id(unsigned int id) | ||
47 | { | ||
48 | unsigned long x; | ||
49 | |||
50 | x = ((id & 0xffU) << 24); | ||
51 | return x; | ||
52 | } | ||
53 | |||
54 | static unsigned int read_xapic_id(void) | ||
55 | { | ||
56 | return get_apic_id(apic_read(APIC_ID)); | ||
57 | } | ||
58 | |||
59 | static int numachip_apic_id_registered(void) | ||
60 | { | ||
61 | return physid_isset(read_xapic_id(), phys_cpu_present_map); | ||
62 | } | ||
63 | |||
64 | static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) | ||
65 | { | ||
66 | return initial_apic_id >> index_msb; | ||
67 | } | ||
68 | |||
69 | static const struct cpumask *numachip_target_cpus(void) | ||
70 | { | ||
71 | return cpu_online_mask; | ||
72 | } | ||
73 | |||
74 | static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
75 | { | ||
76 | cpumask_clear(retmask); | ||
77 | cpumask_set_cpu(cpu, retmask); | ||
78 | } | ||
79 | |||
80 | static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) | ||
81 | { | ||
82 | union numachip_csr_g3_ext_irq_gen int_gen; | ||
83 | |||
84 | int_gen.s._destination_apic_id = phys_apicid; | ||
85 | int_gen.s._vector = 0; | ||
86 | int_gen.s._msgtype = APIC_DM_INIT >> 8; | ||
87 | int_gen.s._index = 0; | ||
88 | |||
89 | write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); | ||
90 | |||
91 | int_gen.s._msgtype = APIC_DM_STARTUP >> 8; | ||
92 | int_gen.s._vector = start_rip >> 12; | ||
93 | |||
94 | write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); | ||
95 | |||
96 | atomic_set(&init_deasserted, 1); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static void numachip_send_IPI_one(int cpu, int vector) | ||
101 | { | ||
102 | union numachip_csr_g3_ext_irq_gen int_gen; | ||
103 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
104 | |||
105 | int_gen.s._destination_apic_id = apicid; | ||
106 | int_gen.s._vector = vector; | ||
107 | int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8; | ||
108 | int_gen.s._index = 0; | ||
109 | |||
110 | write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); | ||
111 | } | ||
112 | |||
113 | static void numachip_send_IPI_mask(const struct cpumask *mask, int vector) | ||
114 | { | ||
115 | unsigned int cpu; | ||
116 | |||
117 | for_each_cpu(cpu, mask) | ||
118 | numachip_send_IPI_one(cpu, vector); | ||
119 | } | ||
120 | |||
121 | static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask, | ||
122 | int vector) | ||
123 | { | ||
124 | unsigned int this_cpu = smp_processor_id(); | ||
125 | unsigned int cpu; | ||
126 | |||
127 | for_each_cpu(cpu, mask) { | ||
128 | if (cpu != this_cpu) | ||
129 | numachip_send_IPI_one(cpu, vector); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static void numachip_send_IPI_allbutself(int vector) | ||
134 | { | ||
135 | unsigned int this_cpu = smp_processor_id(); | ||
136 | unsigned int cpu; | ||
137 | |||
138 | for_each_online_cpu(cpu) { | ||
139 | if (cpu != this_cpu) | ||
140 | numachip_send_IPI_one(cpu, vector); | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static void numachip_send_IPI_all(int vector) | ||
145 | { | ||
146 | numachip_send_IPI_mask(cpu_online_mask, vector); | ||
147 | } | ||
148 | |||
149 | static void numachip_send_IPI_self(int vector) | ||
150 | { | ||
151 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | ||
152 | } | ||
153 | |||
154 | static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
155 | { | ||
156 | int cpu; | ||
157 | |||
158 | /* | ||
159 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
160 | * May as well be the first. | ||
161 | */ | ||
162 | cpu = cpumask_first(cpumask); | ||
163 | if (likely((unsigned)cpu < nr_cpu_ids)) | ||
164 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
165 | |||
166 | return BAD_APICID; | ||
167 | } | ||
168 | |||
169 | static unsigned int | ||
170 | numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
171 | const struct cpumask *andmask) | ||
172 | { | ||
173 | int cpu; | ||
174 | |||
175 | /* | ||
176 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
177 | * May as well be the first. | ||
178 | */ | ||
179 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
180 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
181 | break; | ||
182 | } | ||
183 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
184 | } | ||
185 | |||
186 | static int __init numachip_probe(void) | ||
187 | { | ||
188 | return apic == &apic_numachip; | ||
189 | } | ||
190 | |||
191 | static void __init map_csrs(void) | ||
192 | { | ||
193 | printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n", | ||
194 | NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1); | ||
195 | init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); | ||
196 | |||
197 | printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n", | ||
198 | NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1); | ||
199 | init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE); | ||
200 | } | ||
201 | |||
202 | static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) | ||
203 | { | ||
204 | c->phys_proc_id = node; | ||
205 | per_cpu(cpu_llc_id, smp_processor_id()) = node; | ||
206 | } | ||
207 | |||
208 | static int __init numachip_system_init(void) | ||
209 | { | ||
210 | unsigned int val; | ||
211 | |||
212 | if (!numachip_system) | ||
213 | return 0; | ||
214 | |||
215 | x86_cpuinit.fixup_cpu_id = fixup_cpu_id; | ||
216 | |||
217 | map_csrs(); | ||
218 | |||
219 | val = read_lcsr(CSR_G0_NODE_IDS); | ||
220 | printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val); | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | early_initcall(numachip_system_init); | ||
225 | |||
226 | static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
227 | { | ||
228 | if (!strncmp(oem_id, "NUMASC", 6)) { | ||
229 | numachip_system = 1; | ||
230 | return 1; | ||
231 | } | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | static struct apic apic_numachip __refconst = { | ||
237 | |||
238 | .name = "NumaConnect system", | ||
239 | .probe = numachip_probe, | ||
240 | .acpi_madt_oem_check = numachip_acpi_madt_oem_check, | ||
241 | .apic_id_registered = numachip_apic_id_registered, | ||
242 | |||
243 | .irq_delivery_mode = dest_Fixed, | ||
244 | .irq_dest_mode = 0, /* physical */ | ||
245 | |||
246 | .target_cpus = numachip_target_cpus, | ||
247 | .disable_esr = 0, | ||
248 | .dest_logical = 0, | ||
249 | .check_apicid_used = NULL, | ||
250 | .check_apicid_present = NULL, | ||
251 | |||
252 | .vector_allocation_domain = numachip_vector_allocation_domain, | ||
253 | .init_apic_ldr = flat_init_apic_ldr, | ||
254 | |||
255 | .ioapic_phys_id_map = NULL, | ||
256 | .setup_apic_routing = NULL, | ||
257 | .multi_timer_check = NULL, | ||
258 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
259 | .apicid_to_cpu_present = NULL, | ||
260 | .setup_portio_remap = NULL, | ||
261 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
262 | .enable_apic_mode = NULL, | ||
263 | .phys_pkg_id = numachip_phys_pkg_id, | ||
264 | .mps_oem_check = NULL, | ||
265 | |||
266 | .get_apic_id = get_apic_id, | ||
267 | .set_apic_id = set_apic_id, | ||
268 | .apic_id_mask = 0xffU << 24, | ||
269 | |||
270 | .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, | ||
271 | .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, | ||
272 | |||
273 | .send_IPI_mask = numachip_send_IPI_mask, | ||
274 | .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, | ||
275 | .send_IPI_allbutself = numachip_send_IPI_allbutself, | ||
276 | .send_IPI_all = numachip_send_IPI_all, | ||
277 | .send_IPI_self = numachip_send_IPI_self, | ||
278 | |||
279 | .wakeup_secondary_cpu = numachip_wakeup_secondary, | ||
280 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
281 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
282 | .wait_for_init_deassert = NULL, | ||
283 | .smp_callin_clear_local_apic = NULL, | ||
284 | .inquire_remote_apic = NULL, /* REMRD not supported */ | ||
285 | |||
286 | .read = native_apic_mem_read, | ||
287 | .write = native_apic_mem_write, | ||
288 | .icr_read = native_apic_icr_read, | ||
289 | .icr_write = native_apic_icr_write, | ||
290 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
291 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
292 | }; | ||
293 | apic_driver(apic_numachip); | ||
294 | |||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d939d7847e2..fb072754bc1d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2421 | unsigned vector, me; | 2421 | unsigned vector, me; |
2422 | 2422 | ||
2423 | ack_APIC_irq(); | 2423 | ack_APIC_irq(); |
2424 | exit_idle(); | ||
2425 | irq_enter(); | 2424 | irq_enter(); |
2425 | exit_idle(); | ||
2426 | 2426 | ||
2427 | me = smp_processor_id(); | 2427 | me = smp_processor_id(); |
2428 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 2428 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
@@ -2948,6 +2948,10 @@ static inline void __init check_timer(void) | |||
2948 | } | 2948 | } |
2949 | local_irq_disable(); | 2949 | local_irq_disable(); |
2950 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | 2950 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2951 | if (x2apic_preenabled) | ||
2952 | apic_printk(APIC_QUIET, KERN_INFO | ||
2953 | "Perhaps problem with the pre-enabled x2apic mode\n" | ||
2954 | "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); | ||
2951 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | 2955 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
2952 | "report. Then try booting with the 'noapic' option.\n"); | 2956 | "report. Then try booting with the 'noapic' option.\n"); |
2953 | out: | 2957 | out: |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 62ae3001ae02..79b05b88aa19 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -93,6 +93,8 @@ static int __init early_get_pnodeid(void) | |||
93 | 93 | ||
94 | if (node_id.s.part_number == UV2_HUB_PART_NUMBER) | 94 | if (node_id.s.part_number == UV2_HUB_PART_NUMBER) |
95 | uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; | 95 | uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; |
96 | if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) | ||
97 | uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; | ||
96 | 98 | ||
97 | uv_hub_info->hub_revision = uv_min_hub_revision_id; | 99 | uv_hub_info->hub_revision = uv_min_hub_revision_id; |
98 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); | 100 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); |
@@ -767,7 +769,12 @@ void __init uv_system_init(void) | |||
767 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) | 769 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) |
768 | uv_possible_blades += | 770 | uv_possible_blades += |
769 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); | 771 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); |
770 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 772 | |
773 | /* uv_num_possible_blades() is really the hub count */ | ||
774 | printk(KERN_INFO "UV: Found %d blades, %d hubs\n", | ||
775 | is_uv1_hub() ? uv_num_possible_blades() : | ||
776 | (uv_num_possible_blades() + 1) / 2, | ||
777 | uv_num_possible_blades()); | ||
771 | 778 | ||
772 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 779 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
773 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); | 780 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index a46bd383953c..f76623cbe263 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -383,21 +383,21 @@ static int ignore_sys_suspend; | |||
383 | static int ignore_normal_resume; | 383 | static int ignore_normal_resume; |
384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; | 384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; |
385 | 385 | ||
386 | static int debug __read_mostly; | 386 | static bool debug __read_mostly; |
387 | static int smp __read_mostly; | 387 | static bool smp __read_mostly; |
388 | static int apm_disabled = -1; | 388 | static int apm_disabled = -1; |
389 | #ifdef CONFIG_SMP | 389 | #ifdef CONFIG_SMP |
390 | static int power_off; | 390 | static bool power_off; |
391 | #else | 391 | #else |
392 | static int power_off = 1; | 392 | static bool power_off = 1; |
393 | #endif | 393 | #endif |
394 | static int realmode_power_off; | 394 | static bool realmode_power_off; |
395 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
396 | static int allow_ints = 1; | 396 | static bool allow_ints = 1; |
397 | #else | 397 | #else |
398 | static int allow_ints; | 398 | static bool allow_ints; |
399 | #endif | 399 | #endif |
400 | static int broken_psr; | 400 | static bool broken_psr; |
401 | 401 | ||
402 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | 402 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); |
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 4f13fafc5264..68de2dc962ec 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -67,4 +67,6 @@ void common(void) { | |||
67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
68 | OFFSET(BP_version, boot_params, hdr.version); | 68 | OFFSET(BP_version, boot_params, hdr.version); |
69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | 69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); |
70 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); | ||
71 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); | ||
70 | } | 72 | } |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 395a10e68067..85d98ab15cdc 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -3,6 +3,11 @@ | |||
3 | #include <linux/lguest.h> | 3 | #include <linux/lguest.h> |
4 | #include "../../../drivers/lguest/lg.h" | 4 | #include "../../../drivers/lguest/lg.h" |
5 | 5 | ||
6 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | ||
7 | static char syscalls[] = { | ||
8 | #include <asm/syscalls_32.h> | ||
9 | }; | ||
10 | |||
6 | /* workaround for a warning with -Wmissing-prototypes */ | 11 | /* workaround for a warning with -Wmissing-prototypes */ |
7 | void foo(void); | 12 | void foo(void); |
8 | 13 | ||
@@ -76,4 +81,7 @@ void foo(void) | |||
76 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); | 81 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); |
77 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); | 82 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); |
78 | #endif | 83 | #endif |
84 | BLANK(); | ||
85 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
86 | DEFINE(NR_syscalls, sizeof(syscalls)); | ||
79 | } | 87 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72a1194af22..834e897b1e25 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -1,11 +1,12 @@ | |||
1 | #include <asm/ia32.h> | 1 | #include <asm/ia32.h> |
2 | 2 | ||
3 | #define __NO_STUBS 1 | 3 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, |
4 | #undef __SYSCALL | 4 | static char syscalls_64[] = { |
5 | #undef _ASM_X86_UNISTD_64_H | 5 | #include <asm/syscalls_64.h> |
6 | #define __SYSCALL(nr, sym) [nr] = 1, | 6 | }; |
7 | static char syscalls[] = { | 7 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, |
8 | #include <asm/unistd.h> | 8 | static char syscalls_ia32[] = { |
9 | #include <asm/syscalls_32.h> | ||
9 | }; | 10 | }; |
10 | 11 | ||
11 | int main(void) | 12 | int main(void) |
@@ -72,7 +73,11 @@ int main(void) | |||
72 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | 73 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); |
73 | BLANK(); | 74 | BLANK(); |
74 | 75 | ||
75 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | 76 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); |
77 | DEFINE(NR_syscalls, sizeof(syscalls_64)); | ||
78 | |||
79 | DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); | ||
80 | DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); | ||
76 | 81 | ||
77 | return 0; | 82 | return 0; |
78 | } | 83 | } |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 452932d34730..5da1269e8ddc 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size); | |||
62 | 62 | ||
63 | void __init setup_bios_corruption_check(void) | 63 | void __init setup_bios_corruption_check(void) |
64 | { | 64 | { |
65 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | 65 | phys_addr_t start, end; |
66 | u64 i; | ||
66 | 67 | ||
67 | if (memory_corruption_check == -1) { | 68 | if (memory_corruption_check == -1) { |
68 | memory_corruption_check = | 69 | memory_corruption_check = |
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void) | |||
82 | 83 | ||
83 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | 84 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); |
84 | 85 | ||
85 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 86 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { |
86 | u64 size; | 87 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), |
87 | addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); | 88 | PAGE_SIZE, corruption_check_size); |
89 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), | ||
90 | PAGE_SIZE, corruption_check_size); | ||
91 | if (start >= end) | ||
92 | continue; | ||
88 | 93 | ||
89 | if (addr == MEMBLOCK_ERROR) | 94 | memblock_reserve(start, end - start); |
90 | break; | 95 | scan_areas[num_scan_areas].addr = start; |
91 | 96 | scan_areas[num_scan_areas].size = end - start; | |
92 | if (addr >= corruption_check_size) | ||
93 | break; | ||
94 | |||
95 | if ((addr + size) > corruption_check_size) | ||
96 | size = corruption_check_size - addr; | ||
97 | |||
98 | memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); | ||
99 | scan_areas[num_scan_areas].addr = addr; | ||
100 | scan_areas[num_scan_areas].size = size; | ||
101 | num_scan_areas++; | ||
102 | 97 | ||
103 | /* Assume we've already mapped this early memory */ | 98 | /* Assume we've already mapped this early memory */ |
104 | memset(__va(addr), 0, size); | 99 | memset(__va(start), 0, end - start); |
105 | 100 | ||
106 | addr += size; | 101 | if (++num_scan_areas >= MAX_SCAN_AREAS) |
102 | break; | ||
107 | } | 103 | } |
108 | 104 | ||
109 | if (num_scan_areas) | 105 | if (num_scan_areas) |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c7e46cb35327..f4773f4aae35 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | |||
148 | 148 | ||
149 | static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | 149 | static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) |
150 | { | 150 | { |
151 | #ifdef CONFIG_SMP | ||
152 | /* calling is from identify_secondary_cpu() ? */ | 151 | /* calling is from identify_secondary_cpu() ? */ |
153 | if (!c->cpu_index) | 152 | if (!c->cpu_index) |
154 | return; | 153 | return; |
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
192 | 191 | ||
193 | valid_k7: | 192 | valid_k7: |
194 | ; | 193 | ; |
195 | #endif | ||
196 | } | 194 | } |
197 | 195 | ||
198 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | 196 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) |
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
353 | if (node == NUMA_NO_NODE) | 351 | if (node == NUMA_NO_NODE) |
354 | node = per_cpu(cpu_llc_id, cpu); | 352 | node = per_cpu(cpu_llc_id, cpu); |
355 | 353 | ||
354 | /* | ||
355 | * If core numbers are inconsistent, it's likely a multi-fabric platform, | ||
356 | * so invoke platform-specific handler | ||
357 | */ | ||
358 | if (c->phys_proc_id != node) | ||
359 | x86_cpuinit.fixup_cpu_id(c, node); | ||
360 | |||
356 | if (!node_online(node)) { | 361 | if (!node_online(node)) { |
357 | /* | 362 | /* |
358 | * Two possibilities here: | 363 | * Two possibilities here: |
@@ -442,8 +447,6 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) | |||
442 | 447 | ||
443 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | 448 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) |
444 | { | 449 | { |
445 | u32 dummy; | ||
446 | |||
447 | early_init_amd_mc(c); | 450 | early_init_amd_mc(c); |
448 | 451 | ||
449 | /* | 452 | /* |
@@ -473,12 +476,12 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
473 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 476 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
474 | } | 477 | } |
475 | #endif | 478 | #endif |
476 | |||
477 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | ||
478 | } | 479 | } |
479 | 480 | ||
480 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 481 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
481 | { | 482 | { |
483 | u32 dummy; | ||
484 | |||
482 | #ifdef CONFIG_SMP | 485 | #ifdef CONFIG_SMP |
483 | unsigned long long value; | 486 | unsigned long long value; |
484 | 487 | ||
@@ -657,6 +660,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
657 | checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); | 660 | checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); |
658 | } | 661 | } |
659 | } | 662 | } |
663 | |||
664 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | ||
660 | } | 665 | } |
661 | 666 | ||
662 | #ifdef CONFIG_X86_32 | 667 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e58d978e0758..159103c0b1f4 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
278 | } | 278 | } |
279 | #ifdef CONFIG_X86_32 | 279 | #ifdef CONFIG_X86_32 |
280 | /* Cyrix III family needs CX8 & PGE explicitly enabled. */ | 280 | /* Cyrix III family needs CX8 & PGE explicitly enabled. */ |
281 | if (c->x86_model >= 6 && c->x86_model <= 9) { | 281 | if (c->x86_model >= 6 && c->x86_model <= 13) { |
282 | rdmsr(MSR_VIA_FCR, lo, hi); | 282 | rdmsr(MSR_VIA_FCR, lo, hi); |
283 | lo |= (1<<1 | 1<<7); | 283 | lo |= (1<<1 | 1<<7); |
284 | wrmsr(MSR_VIA_FCR, lo, hi); | 284 | wrmsr(MSR_VIA_FCR, lo, hi); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index aa003b13a831..c0f7d68d318f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
676 | if (this_cpu->c_early_init) | 676 | if (this_cpu->c_early_init) |
677 | this_cpu->c_early_init(c); | 677 | this_cpu->c_early_init(c); |
678 | 678 | ||
679 | #ifdef CONFIG_SMP | ||
680 | c->cpu_index = 0; | 679 | c->cpu_index = 0; |
681 | #endif | ||
682 | filter_cpuid_features(c, false); | 680 | filter_cpuid_features(c, false); |
683 | 681 | ||
684 | setup_smep(c); | 682 | setup_smep(c); |
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
764 | c->apicid = c->initial_apicid; | 762 | c->apicid = c->initial_apicid; |
765 | # endif | 763 | # endif |
766 | #endif | 764 | #endif |
767 | |||
768 | #ifdef CONFIG_X86_HT | ||
769 | c->phys_proc_id = c->initial_apicid; | 765 | c->phys_proc_id = c->initial_apicid; |
770 | #endif | ||
771 | } | 766 | } |
772 | 767 | ||
773 | setup_smep(c); | 768 | setup_smep(c); |
@@ -1026,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid); | |||
1026 | 1021 | ||
1027 | #ifdef CONFIG_X86_64 | 1022 | #ifdef CONFIG_X86_64 |
1028 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1024 | struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, | ||
1025 | (unsigned long) nmi_idt_table }; | ||
1029 | 1026 | ||
1030 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1027 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1031 | irq_stack_union) __aligned(PAGE_SIZE); | 1028 | irq_stack_union) __aligned(PAGE_SIZE); |
@@ -1047,6 +1044,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = | |||
1047 | 1044 | ||
1048 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1045 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
1049 | 1046 | ||
1047 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
1048 | EXPORT_PER_CPU_SYMBOL(fpu_owner_task); | ||
1049 | |||
1050 | /* | 1050 | /* |
1051 | * Special IST stacks which the CPU switches to when it calls | 1051 | * Special IST stacks which the CPU switches to when it calls |
1052 | * an IST-marked descriptor entry. Up to 7 stacks (hardware | 1052 | * an IST-marked descriptor entry. Up to 7 stacks (hardware |
@@ -1090,10 +1090,32 @@ unsigned long kernel_eflags; | |||
1090 | */ | 1090 | */ |
1091 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 1091 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
1092 | 1092 | ||
1093 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | ||
1094 | DEFINE_PER_CPU(int, debug_stack_usage); | ||
1095 | |||
1096 | int is_debug_stack(unsigned long addr) | ||
1097 | { | ||
1098 | return __get_cpu_var(debug_stack_usage) || | ||
1099 | (addr <= __get_cpu_var(debug_stack_addr) && | ||
1100 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | ||
1101 | } | ||
1102 | |||
1103 | void debug_stack_set_zero(void) | ||
1104 | { | ||
1105 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | ||
1106 | } | ||
1107 | |||
1108 | void debug_stack_reset(void) | ||
1109 | { | ||
1110 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1111 | } | ||
1112 | |||
1093 | #else /* CONFIG_X86_64 */ | 1113 | #else /* CONFIG_X86_64 */ |
1094 | 1114 | ||
1095 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 1115 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
1096 | EXPORT_PER_CPU_SYMBOL(current_task); | 1116 | EXPORT_PER_CPU_SYMBOL(current_task); |
1117 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
1118 | EXPORT_PER_CPU_SYMBOL(fpu_owner_task); | ||
1097 | 1119 | ||
1098 | #ifdef CONFIG_CC_STACKPROTECTOR | 1120 | #ifdef CONFIG_CC_STACKPROTECTOR |
1099 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 1121 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
@@ -1141,6 +1163,15 @@ static void dbg_restore_debug_regs(void) | |||
1141 | #endif /* ! CONFIG_KGDB */ | 1163 | #endif /* ! CONFIG_KGDB */ |
1142 | 1164 | ||
1143 | /* | 1165 | /* |
1166 | * Prints an error where the NUMA and configured core-number mismatch and the | ||
1167 | * platform didn't override this to fix it up | ||
1168 | */ | ||
1169 | void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node) | ||
1170 | { | ||
1171 | pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id); | ||
1172 | } | ||
1173 | |||
1174 | /* | ||
1144 | * cpu_init() initializes state that is per-CPU. Some data is already | 1175 | * cpu_init() initializes state that is per-CPU. Some data is already |
1145 | * initialized (naturally) in the bootstrap process, such as the GDT | 1176 | * initialized (naturally) in the bootstrap process, such as the GDT |
1146 | * and IDT. We reload them nevertheless, this function acts as a | 1177 | * and IDT. We reload them nevertheless, this function acts as a |
@@ -1208,6 +1239,8 @@ void __cpuinit cpu_init(void) | |||
1208 | estacks += exception_stack_sizes[v]; | 1239 | estacks += exception_stack_sizes[v]; |
1209 | oist->ist[v] = t->x86_tss.ist[v] = | 1240 | oist->ist[v] = t->x86_tss.ist[v] = |
1210 | (unsigned long)estacks; | 1241 | (unsigned long)estacks; |
1242 | if (v == DEBUG_STACK-1) | ||
1243 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | ||
1211 | } | 1244 | } |
1212 | } | 1245 | } |
1213 | 1246 | ||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1b22dcc51af4..8bacc7826fb3 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -1,5 +1,4 @@ | |||
1 | #ifndef ARCH_X86_CPU_H | 1 | #ifndef ARCH_X86_CPU_H |
2 | |||
3 | #define ARCH_X86_CPU_H | 2 | #define ARCH_X86_CPU_H |
4 | 3 | ||
5 | struct cpu_model_info { | 4 | struct cpu_model_info { |
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], | |||
35 | 34 | ||
36 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | 35 | extern void get_cpu_cap(struct cpuinfo_x86 *c); |
37 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 36 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
38 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | 37 | #endif /* ARCH_X86_CPU_H */ |
39 | |||
40 | #endif | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 523131213f08..3e6ff6cbf42a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void) | |||
181 | 181 | ||
182 | static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | 182 | static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) |
183 | { | 183 | { |
184 | #ifdef CONFIG_SMP | ||
185 | /* calling is from identify_secondary_cpu() ? */ | 184 | /* calling is from identify_secondary_cpu() ? */ |
186 | if (!c->cpu_index) | 185 | if (!c->cpu_index) |
187 | return; | 186 | return; |
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | |||
198 | WARN_ONCE(1, "WARNING: SMP operation may be unreliable" | 197 | WARN_ONCE(1, "WARNING: SMP operation may be unreliable" |
199 | "with B stepping processors.\n"); | 198 | "with B stepping processors.\n"); |
200 | } | 199 | } |
201 | #endif | ||
202 | } | 200 | } |
203 | 201 | ||
204 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | 202 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a3b0811693c9..73d08ed98a64 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -326,8 +326,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb) | |||
326 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; | 326 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; |
327 | } | 327 | } |
328 | 328 | ||
329 | static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, | 329 | static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) |
330 | int index) | ||
331 | { | 330 | { |
332 | int node; | 331 | int node; |
333 | 332 | ||
@@ -725,14 +724,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); | |||
725 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) | 724 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) |
726 | 725 | ||
727 | #ifdef CONFIG_SMP | 726 | #ifdef CONFIG_SMP |
728 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 727 | |
728 | static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) | ||
729 | { | 729 | { |
730 | struct _cpuid4_info *this_leaf, *sibling_leaf; | 730 | struct _cpuid4_info *this_leaf; |
731 | unsigned long num_threads_sharing; | 731 | int ret, i, sibling; |
732 | int index_msb, i, sibling; | ||
733 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 732 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
734 | 733 | ||
735 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | 734 | ret = 0; |
735 | if (index == 3) { | ||
736 | ret = 1; | ||
736 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { | 737 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { |
737 | if (!per_cpu(ici_cpuid4_info, i)) | 738 | if (!per_cpu(ici_cpuid4_info, i)) |
738 | continue; | 739 | continue; |
@@ -743,8 +744,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
743 | set_bit(sibling, this_leaf->shared_cpu_map); | 744 | set_bit(sibling, this_leaf->shared_cpu_map); |
744 | } | 745 | } |
745 | } | 746 | } |
746 | return; | 747 | } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { |
748 | ret = 1; | ||
749 | for_each_cpu(i, cpu_sibling_mask(cpu)) { | ||
750 | if (!per_cpu(ici_cpuid4_info, i)) | ||
751 | continue; | ||
752 | this_leaf = CPUID4_INFO_IDX(i, index); | ||
753 | for_each_cpu(sibling, cpu_sibling_mask(cpu)) { | ||
754 | if (!cpu_online(sibling)) | ||
755 | continue; | ||
756 | set_bit(sibling, this_leaf->shared_cpu_map); | ||
757 | } | ||
758 | } | ||
747 | } | 759 | } |
760 | |||
761 | return ret; | ||
762 | } | ||
763 | |||
764 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | ||
765 | { | ||
766 | struct _cpuid4_info *this_leaf, *sibling_leaf; | ||
767 | unsigned long num_threads_sharing; | ||
768 | int index_msb, i; | ||
769 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
770 | |||
771 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
772 | if (cache_shared_amd_cpu_map_setup(cpu, index)) | ||
773 | return; | ||
774 | } | ||
775 | |||
748 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 776 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
749 | num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; | 777 | num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; |
750 | 778 | ||
@@ -844,8 +872,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
844 | 872 | ||
845 | #include <linux/kobject.h> | 873 | #include <linux/kobject.h> |
846 | #include <linux/sysfs.h> | 874 | #include <linux/sysfs.h> |
847 | 875 | #include <linux/cpu.h> | |
848 | extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ | ||
849 | 876 | ||
850 | /* pointer to kobject for cpuX/cache */ | 877 | /* pointer to kobject for cpuX/cache */ |
851 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); | 878 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); |
@@ -1073,9 +1100,9 @@ err_out: | |||
1073 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); | 1100 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); |
1074 | 1101 | ||
1075 | /* Add/Remove cache interface for CPU device */ | 1102 | /* Add/Remove cache interface for CPU device */ |
1076 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | 1103 | static int __cpuinit cache_add_dev(struct device *dev) |
1077 | { | 1104 | { |
1078 | unsigned int cpu = sys_dev->id; | 1105 | unsigned int cpu = dev->id; |
1079 | unsigned long i, j; | 1106 | unsigned long i, j; |
1080 | struct _index_kobject *this_object; | 1107 | struct _index_kobject *this_object; |
1081 | struct _cpuid4_info *this_leaf; | 1108 | struct _cpuid4_info *this_leaf; |
@@ -1087,7 +1114,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1087 | 1114 | ||
1088 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), | 1115 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), |
1089 | &ktype_percpu_entry, | 1116 | &ktype_percpu_entry, |
1090 | &sys_dev->kobj, "%s", "cache"); | 1117 | &dev->kobj, "%s", "cache"); |
1091 | if (retval < 0) { | 1118 | if (retval < 0) { |
1092 | cpuid4_cache_sysfs_exit(cpu); | 1119 | cpuid4_cache_sysfs_exit(cpu); |
1093 | return retval; | 1120 | return retval; |
@@ -1124,9 +1151,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1124 | return 0; | 1151 | return 0; |
1125 | } | 1152 | } |
1126 | 1153 | ||
1127 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | 1154 | static void __cpuinit cache_remove_dev(struct device *dev) |
1128 | { | 1155 | { |
1129 | unsigned int cpu = sys_dev->id; | 1156 | unsigned int cpu = dev->id; |
1130 | unsigned long i; | 1157 | unsigned long i; |
1131 | 1158 | ||
1132 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) | 1159 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) |
@@ -1145,17 +1172,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
1145 | unsigned long action, void *hcpu) | 1172 | unsigned long action, void *hcpu) |
1146 | { | 1173 | { |
1147 | unsigned int cpu = (unsigned long)hcpu; | 1174 | unsigned int cpu = (unsigned long)hcpu; |
1148 | struct sys_device *sys_dev; | 1175 | struct device *dev; |
1149 | 1176 | ||
1150 | sys_dev = get_cpu_sysdev(cpu); | 1177 | dev = get_cpu_device(cpu); |
1151 | switch (action) { | 1178 | switch (action) { |
1152 | case CPU_ONLINE: | 1179 | case CPU_ONLINE: |
1153 | case CPU_ONLINE_FROZEN: | 1180 | case CPU_ONLINE_FROZEN: |
1154 | cache_add_dev(sys_dev); | 1181 | cache_add_dev(dev); |
1155 | break; | 1182 | break; |
1156 | case CPU_DEAD: | 1183 | case CPU_DEAD: |
1157 | case CPU_DEAD_FROZEN: | 1184 | case CPU_DEAD_FROZEN: |
1158 | cache_remove_dev(sys_dev); | 1185 | cache_remove_dev(dev); |
1159 | break; | 1186 | break; |
1160 | } | 1187 | } |
1161 | return NOTIFY_OK; | 1188 | return NOTIFY_OK; |
@@ -1174,9 +1201,9 @@ static int __cpuinit cache_sysfs_init(void) | |||
1174 | 1201 | ||
1175 | for_each_online_cpu(i) { | 1202 | for_each_online_cpu(i) { |
1176 | int err; | 1203 | int err; |
1177 | struct sys_device *sys_dev = get_cpu_sysdev(i); | 1204 | struct device *dev = get_cpu_device(i); |
1178 | 1205 | ||
1179 | err = cache_add_dev(sys_dev); | 1206 | err = cache_add_dev(dev); |
1180 | if (err) | 1207 | if (err) |
1181 | return err; | 1208 | return err; |
1182 | } | 1209 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 319882ef848d..fc4beb393577 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/preempt.h> | ||
20 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
21 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
22 | #include <linux/kdebug.h> | 23 | #include <linux/kdebug.h> |
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | |||
92 | return NMI_HANDLED; | 93 | return NMI_HANDLED; |
93 | } | 94 | } |
94 | 95 | ||
96 | static void mce_irq_ipi(void *info) | ||
97 | { | ||
98 | int cpu = smp_processor_id(); | ||
99 | struct mce *m = &__get_cpu_var(injectm); | ||
100 | |||
101 | if (cpumask_test_cpu(cpu, mce_inject_cpumask) && | ||
102 | m->inject_flags & MCJ_EXCEPTION) { | ||
103 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | ||
104 | raise_exception(m, NULL); | ||
105 | } | ||
106 | } | ||
107 | |||
95 | /* Inject mce on current CPU */ | 108 | /* Inject mce on current CPU */ |
96 | static int raise_local(void) | 109 | static int raise_local(void) |
97 | { | 110 | { |
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m) | |||
139 | return; | 152 | return; |
140 | 153 | ||
141 | #ifdef CONFIG_X86_LOCAL_APIC | 154 | #ifdef CONFIG_X86_LOCAL_APIC |
142 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | 155 | if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { |
143 | unsigned long start; | 156 | unsigned long start; |
144 | int cpu; | 157 | int cpu; |
158 | |||
145 | get_online_cpus(); | 159 | get_online_cpus(); |
146 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); | 160 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); |
147 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); | 161 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); |
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m) | |||
151 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | 165 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) |
152 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 166 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
153 | } | 167 | } |
154 | if (!cpumask_empty(mce_inject_cpumask)) | 168 | if (!cpumask_empty(mce_inject_cpumask)) { |
155 | apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); | 169 | if (m->inject_flags & MCJ_IRQ_BRAODCAST) { |
170 | /* | ||
171 | * don't wait because mce_irq_ipi is necessary | ||
172 | * to be sync with following raise_local | ||
173 | */ | ||
174 | preempt_disable(); | ||
175 | smp_call_function_many(mce_inject_cpumask, | ||
176 | mce_irq_ipi, NULL, 0); | ||
177 | preempt_enable(); | ||
178 | } else if (m->inject_flags & MCJ_NMI_BROADCAST) | ||
179 | apic->send_IPI_mask(mce_inject_cpumask, | ||
180 | NMI_VECTOR); | ||
181 | } | ||
156 | start = jiffies; | 182 | start = jiffies; |
157 | while (!cpumask_empty(mce_inject_cpumask)) { | 183 | while (!cpumask_empty(mce_inject_cpumask)) { |
158 | if (!time_before(jiffies, start + 2*HZ)) { | 184 | if (!time_before(jiffies, start + 2*HZ)) { |
159 | printk(KERN_ERR | 185 | printk(KERN_ERR |
160 | "Timeout waiting for mce inject NMI %lx\n", | 186 | "Timeout waiting for mce inject %lx\n", |
161 | *cpumask_bits(mce_inject_cpumask)); | 187 | *cpumask_bits(mce_inject_cpumask)); |
162 | break; | 188 | break; |
163 | } | 189 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fefcc69ee8b5..ed44c8a65858 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -1,4 +1,4 @@ | |||
1 | #include <linux/sysdev.h> | 1 | #include <linux/device.h> |
2 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
3 | 3 | ||
4 | enum severity_level { | 4 | enum severity_level { |
@@ -17,7 +17,7 @@ enum severity_level { | |||
17 | struct mce_bank { | 17 | struct mce_bank { |
18 | u64 ctl; /* subevents to enable */ | 18 | u64 ctl; /* subevents to enable */ |
19 | unsigned char init; /* initialise bank? */ | 19 | unsigned char init; /* initialise bank? */ |
20 | struct sysdev_attribute attr; /* sysdev attribute */ | 20 | struct device_attribute attr; /* device attribute */ |
21 | char attrname[ATTR_LEN]; /* attribute name */ | 21 | char attrname[ATTR_LEN]; /* attribute name */ |
22 | }; | 22 | }; |
23 | 23 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2af127d4c3d1..5a11ae2e9e91 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/sysdev.h> | 22 | #include <linux/device.h> |
23 | #include <linux/syscore_ops.h> | 23 | #include <linux/syscore_ops.h> |
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
95 | static DEFINE_PER_CPU(struct mce, mces_seen); | 95 | static DEFINE_PER_CPU(struct mce, mces_seen); |
96 | static int cpu_missing; | 96 | static int cpu_missing; |
97 | 97 | ||
98 | /* | ||
99 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
100 | * MCE errors in a human-readable form. | ||
101 | */ | ||
102 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
103 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
104 | |||
105 | /* MCA banks polled by the period polling timer for corrected events */ | 98 | /* MCA banks polled by the period polling timer for corrected events */ |
106 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 99 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
107 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 100 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
109 | 102 | ||
110 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 103 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
111 | 104 | ||
105 | /* | ||
106 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
107 | * MCE errors in a human-readable form. | ||
108 | */ | ||
109 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
110 | |||
112 | /* Do initial initialization of a struct mce */ | 111 | /* Do initial initialization of a struct mce */ |
113 | void mce_setup(struct mce *m) | 112 | void mce_setup(struct mce *m) |
114 | { | 113 | { |
@@ -119,9 +118,7 @@ void mce_setup(struct mce *m) | |||
119 | m->time = get_seconds(); | 118 | m->time = get_seconds(); |
120 | m->cpuvendor = boot_cpu_data.x86_vendor; | 119 | m->cpuvendor = boot_cpu_data.x86_vendor; |
121 | m->cpuid = cpuid_eax(1); | 120 | m->cpuid = cpuid_eax(1); |
122 | #ifdef CONFIG_SMP | ||
123 | m->socketid = cpu_data(m->extcpu).phys_proc_id; | 121 | m->socketid = cpu_data(m->extcpu).phys_proc_id; |
124 | #endif | ||
125 | m->apicid = cpu_data(m->extcpu).initial_apicid; | 122 | m->apicid = cpu_data(m->extcpu).initial_apicid; |
126 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); | 123 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); |
127 | } | 124 | } |
@@ -190,6 +187,57 @@ void mce_log(struct mce *mce) | |||
190 | set_bit(0, &mce_need_notify); | 187 | set_bit(0, &mce_need_notify); |
191 | } | 188 | } |
192 | 189 | ||
190 | static void drain_mcelog_buffer(void) | ||
191 | { | ||
192 | unsigned int next, i, prev = 0; | ||
193 | |||
194 | next = rcu_dereference_check_mce(mcelog.next); | ||
195 | |||
196 | do { | ||
197 | struct mce *m; | ||
198 | |||
199 | /* drain what was logged during boot */ | ||
200 | for (i = prev; i < next; i++) { | ||
201 | unsigned long start = jiffies; | ||
202 | unsigned retries = 1; | ||
203 | |||
204 | m = &mcelog.entry[i]; | ||
205 | |||
206 | while (!m->finished) { | ||
207 | if (time_after_eq(jiffies, start + 2*retries)) | ||
208 | retries++; | ||
209 | |||
210 | cpu_relax(); | ||
211 | |||
212 | if (!m->finished && retries >= 4) { | ||
213 | pr_err("MCE: skipping error being logged currently!\n"); | ||
214 | break; | ||
215 | } | ||
216 | } | ||
217 | smp_rmb(); | ||
218 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
219 | } | ||
220 | |||
221 | memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); | ||
222 | prev = next; | ||
223 | next = cmpxchg(&mcelog.next, prev, 0); | ||
224 | } while (next != prev); | ||
225 | } | ||
226 | |||
227 | |||
228 | void mce_register_decode_chain(struct notifier_block *nb) | ||
229 | { | ||
230 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); | ||
231 | drain_mcelog_buffer(); | ||
232 | } | ||
233 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); | ||
234 | |||
235 | void mce_unregister_decode_chain(struct notifier_block *nb) | ||
236 | { | ||
237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); | ||
238 | } | ||
239 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | ||
240 | |||
193 | static void print_mce(struct mce *m) | 241 | static void print_mce(struct mce *m) |
194 | { | 242 | { |
195 | int ret = 0; | 243 | int ret = 0; |
@@ -1770,7 +1818,7 @@ static struct syscore_ops mce_syscore_ops = { | |||
1770 | }; | 1818 | }; |
1771 | 1819 | ||
1772 | /* | 1820 | /* |
1773 | * mce_sysdev: Sysfs support | 1821 | * mce_device: Sysfs support |
1774 | */ | 1822 | */ |
1775 | 1823 | ||
1776 | static void mce_cpu_restart(void *data) | 1824 | static void mce_cpu_restart(void *data) |
@@ -1806,27 +1854,28 @@ static void mce_enable_ce(void *all) | |||
1806 | __mcheck_cpu_init_timer(); | 1854 | __mcheck_cpu_init_timer(); |
1807 | } | 1855 | } |
1808 | 1856 | ||
1809 | static struct sysdev_class mce_sysdev_class = { | 1857 | static struct bus_type mce_subsys = { |
1810 | .name = "machinecheck", | 1858 | .name = "machinecheck", |
1859 | .dev_name = "machinecheck", | ||
1811 | }; | 1860 | }; |
1812 | 1861 | ||
1813 | DEFINE_PER_CPU(struct sys_device, mce_sysdev); | 1862 | struct device *mce_device[CONFIG_NR_CPUS]; |
1814 | 1863 | ||
1815 | __cpuinitdata | 1864 | __cpuinitdata |
1816 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1865 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
1817 | 1866 | ||
1818 | static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) | 1867 | static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) |
1819 | { | 1868 | { |
1820 | return container_of(attr, struct mce_bank, attr); | 1869 | return container_of(attr, struct mce_bank, attr); |
1821 | } | 1870 | } |
1822 | 1871 | ||
1823 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1872 | static ssize_t show_bank(struct device *s, struct device_attribute *attr, |
1824 | char *buf) | 1873 | char *buf) |
1825 | { | 1874 | { |
1826 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); | 1875 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); |
1827 | } | 1876 | } |
1828 | 1877 | ||
1829 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1878 | static ssize_t set_bank(struct device *s, struct device_attribute *attr, |
1830 | const char *buf, size_t size) | 1879 | const char *buf, size_t size) |
1831 | { | 1880 | { |
1832 | u64 new; | 1881 | u64 new; |
@@ -1841,14 +1890,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1841 | } | 1890 | } |
1842 | 1891 | ||
1843 | static ssize_t | 1892 | static ssize_t |
1844 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | 1893 | show_trigger(struct device *s, struct device_attribute *attr, char *buf) |
1845 | { | 1894 | { |
1846 | strcpy(buf, mce_helper); | 1895 | strcpy(buf, mce_helper); |
1847 | strcat(buf, "\n"); | 1896 | strcat(buf, "\n"); |
1848 | return strlen(mce_helper) + 1; | 1897 | return strlen(mce_helper) + 1; |
1849 | } | 1898 | } |
1850 | 1899 | ||
1851 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 1900 | static ssize_t set_trigger(struct device *s, struct device_attribute *attr, |
1852 | const char *buf, size_t siz) | 1901 | const char *buf, size_t siz) |
1853 | { | 1902 | { |
1854 | char *p; | 1903 | char *p; |
@@ -1863,8 +1912,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1863 | return strlen(mce_helper) + !!p; | 1912 | return strlen(mce_helper) + !!p; |
1864 | } | 1913 | } |
1865 | 1914 | ||
1866 | static ssize_t set_ignore_ce(struct sys_device *s, | 1915 | static ssize_t set_ignore_ce(struct device *s, |
1867 | struct sysdev_attribute *attr, | 1916 | struct device_attribute *attr, |
1868 | const char *buf, size_t size) | 1917 | const char *buf, size_t size) |
1869 | { | 1918 | { |
1870 | u64 new; | 1919 | u64 new; |
@@ -1887,8 +1936,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, | |||
1887 | return size; | 1936 | return size; |
1888 | } | 1937 | } |
1889 | 1938 | ||
1890 | static ssize_t set_cmci_disabled(struct sys_device *s, | 1939 | static ssize_t set_cmci_disabled(struct device *s, |
1891 | struct sysdev_attribute *attr, | 1940 | struct device_attribute *attr, |
1892 | const char *buf, size_t size) | 1941 | const char *buf, size_t size) |
1893 | { | 1942 | { |
1894 | u64 new; | 1943 | u64 new; |
@@ -1910,108 +1959,117 @@ static ssize_t set_cmci_disabled(struct sys_device *s, | |||
1910 | return size; | 1959 | return size; |
1911 | } | 1960 | } |
1912 | 1961 | ||
1913 | static ssize_t store_int_with_restart(struct sys_device *s, | 1962 | static ssize_t store_int_with_restart(struct device *s, |
1914 | struct sysdev_attribute *attr, | 1963 | struct device_attribute *attr, |
1915 | const char *buf, size_t size) | 1964 | const char *buf, size_t size) |
1916 | { | 1965 | { |
1917 | ssize_t ret = sysdev_store_int(s, attr, buf, size); | 1966 | ssize_t ret = device_store_int(s, attr, buf, size); |
1918 | mce_restart(); | 1967 | mce_restart(); |
1919 | return ret; | 1968 | return ret; |
1920 | } | 1969 | } |
1921 | 1970 | ||
1922 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 1971 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); |
1923 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 1972 | static DEVICE_INT_ATTR(tolerant, 0644, tolerant); |
1924 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 1973 | static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); |
1925 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | 1974 | static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); |
1926 | 1975 | ||
1927 | static struct sysdev_ext_attribute attr_check_interval = { | 1976 | static struct dev_ext_attribute dev_attr_check_interval = { |
1928 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | 1977 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), |
1929 | store_int_with_restart), | ||
1930 | &check_interval | 1978 | &check_interval |
1931 | }; | 1979 | }; |
1932 | 1980 | ||
1933 | static struct sysdev_ext_attribute attr_ignore_ce = { | 1981 | static struct dev_ext_attribute dev_attr_ignore_ce = { |
1934 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | 1982 | __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), |
1935 | &mce_ignore_ce | 1983 | &mce_ignore_ce |
1936 | }; | 1984 | }; |
1937 | 1985 | ||
1938 | static struct sysdev_ext_attribute attr_cmci_disabled = { | 1986 | static struct dev_ext_attribute dev_attr_cmci_disabled = { |
1939 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | 1987 | __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), |
1940 | &mce_cmci_disabled | 1988 | &mce_cmci_disabled |
1941 | }; | 1989 | }; |
1942 | 1990 | ||
1943 | static struct sysdev_attribute *mce_sysdev_attrs[] = { | 1991 | static struct device_attribute *mce_device_attrs[] = { |
1944 | &attr_tolerant.attr, | 1992 | &dev_attr_tolerant.attr, |
1945 | &attr_check_interval.attr, | 1993 | &dev_attr_check_interval.attr, |
1946 | &attr_trigger, | 1994 | &dev_attr_trigger, |
1947 | &attr_monarch_timeout.attr, | 1995 | &dev_attr_monarch_timeout.attr, |
1948 | &attr_dont_log_ce.attr, | 1996 | &dev_attr_dont_log_ce.attr, |
1949 | &attr_ignore_ce.attr, | 1997 | &dev_attr_ignore_ce.attr, |
1950 | &attr_cmci_disabled.attr, | 1998 | &dev_attr_cmci_disabled.attr, |
1951 | NULL | 1999 | NULL |
1952 | }; | 2000 | }; |
1953 | 2001 | ||
1954 | static cpumask_var_t mce_sysdev_initialized; | 2002 | static cpumask_var_t mce_device_initialized; |
2003 | |||
2004 | static void mce_device_release(struct device *dev) | ||
2005 | { | ||
2006 | kfree(dev); | ||
2007 | } | ||
1955 | 2008 | ||
1956 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | 2009 | /* Per cpu device init. All of the cpus still share the same ctrl bank: */ |
1957 | static __cpuinit int mce_sysdev_create(unsigned int cpu) | 2010 | static __cpuinit int mce_device_create(unsigned int cpu) |
1958 | { | 2011 | { |
1959 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | 2012 | struct device *dev; |
1960 | int err; | 2013 | int err; |
1961 | int i, j; | 2014 | int i, j; |
1962 | 2015 | ||
1963 | if (!mce_available(&boot_cpu_data)) | 2016 | if (!mce_available(&boot_cpu_data)) |
1964 | return -EIO; | 2017 | return -EIO; |
1965 | 2018 | ||
1966 | memset(&sysdev->kobj, 0, sizeof(struct kobject)); | 2019 | dev = kzalloc(sizeof *dev, GFP_KERNEL); |
1967 | sysdev->id = cpu; | 2020 | if (!dev) |
1968 | sysdev->cls = &mce_sysdev_class; | 2021 | return -ENOMEM; |
2022 | dev->id = cpu; | ||
2023 | dev->bus = &mce_subsys; | ||
2024 | dev->release = &mce_device_release; | ||
1969 | 2025 | ||
1970 | err = sysdev_register(sysdev); | 2026 | err = device_register(dev); |
1971 | if (err) | 2027 | if (err) |
1972 | return err; | 2028 | return err; |
1973 | 2029 | ||
1974 | for (i = 0; mce_sysdev_attrs[i]; i++) { | 2030 | for (i = 0; mce_device_attrs[i]; i++) { |
1975 | err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); | 2031 | err = device_create_file(dev, mce_device_attrs[i]); |
1976 | if (err) | 2032 | if (err) |
1977 | goto error; | 2033 | goto error; |
1978 | } | 2034 | } |
1979 | for (j = 0; j < banks; j++) { | 2035 | for (j = 0; j < banks; j++) { |
1980 | err = sysdev_create_file(sysdev, &mce_banks[j].attr); | 2036 | err = device_create_file(dev, &mce_banks[j].attr); |
1981 | if (err) | 2037 | if (err) |
1982 | goto error2; | 2038 | goto error2; |
1983 | } | 2039 | } |
1984 | cpumask_set_cpu(cpu, mce_sysdev_initialized); | 2040 | cpumask_set_cpu(cpu, mce_device_initialized); |
2041 | mce_device[cpu] = dev; | ||
1985 | 2042 | ||
1986 | return 0; | 2043 | return 0; |
1987 | error2: | 2044 | error2: |
1988 | while (--j >= 0) | 2045 | while (--j >= 0) |
1989 | sysdev_remove_file(sysdev, &mce_banks[j].attr); | 2046 | device_remove_file(dev, &mce_banks[j].attr); |
1990 | error: | 2047 | error: |
1991 | while (--i >= 0) | 2048 | while (--i >= 0) |
1992 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); | 2049 | device_remove_file(dev, mce_device_attrs[i]); |
1993 | 2050 | ||
1994 | sysdev_unregister(sysdev); | 2051 | device_unregister(dev); |
1995 | 2052 | ||
1996 | return err; | 2053 | return err; |
1997 | } | 2054 | } |
1998 | 2055 | ||
1999 | static __cpuinit void mce_sysdev_remove(unsigned int cpu) | 2056 | static __cpuinit void mce_device_remove(unsigned int cpu) |
2000 | { | 2057 | { |
2001 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | 2058 | struct device *dev = mce_device[cpu]; |
2002 | int i; | 2059 | int i; |
2003 | 2060 | ||
2004 | if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) | 2061 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) |
2005 | return; | 2062 | return; |
2006 | 2063 | ||
2007 | for (i = 0; mce_sysdev_attrs[i]; i++) | 2064 | for (i = 0; mce_device_attrs[i]; i++) |
2008 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); | 2065 | device_remove_file(dev, mce_device_attrs[i]); |
2009 | 2066 | ||
2010 | for (i = 0; i < banks; i++) | 2067 | for (i = 0; i < banks; i++) |
2011 | sysdev_remove_file(sysdev, &mce_banks[i].attr); | 2068 | device_remove_file(dev, &mce_banks[i].attr); |
2012 | 2069 | ||
2013 | sysdev_unregister(sysdev); | 2070 | device_unregister(dev); |
2014 | cpumask_clear_cpu(cpu, mce_sysdev_initialized); | 2071 | cpumask_clear_cpu(cpu, mce_device_initialized); |
2072 | mce_device[cpu] = NULL; | ||
2015 | } | 2073 | } |
2016 | 2074 | ||
2017 | /* Make sure there are no machine checks on offlined CPUs. */ | 2075 | /* Make sure there are no machine checks on offlined CPUs. */ |
@@ -2061,7 +2119,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2061 | switch (action) { | 2119 | switch (action) { |
2062 | case CPU_ONLINE: | 2120 | case CPU_ONLINE: |
2063 | case CPU_ONLINE_FROZEN: | 2121 | case CPU_ONLINE_FROZEN: |
2064 | mce_sysdev_create(cpu); | 2122 | mce_device_create(cpu); |
2065 | if (threshold_cpu_callback) | 2123 | if (threshold_cpu_callback) |
2066 | threshold_cpu_callback(action, cpu); | 2124 | threshold_cpu_callback(action, cpu); |
2067 | break; | 2125 | break; |
@@ -2069,7 +2127,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2069 | case CPU_DEAD_FROZEN: | 2127 | case CPU_DEAD_FROZEN: |
2070 | if (threshold_cpu_callback) | 2128 | if (threshold_cpu_callback) |
2071 | threshold_cpu_callback(action, cpu); | 2129 | threshold_cpu_callback(action, cpu); |
2072 | mce_sysdev_remove(cpu); | 2130 | mce_device_remove(cpu); |
2073 | break; | 2131 | break; |
2074 | case CPU_DOWN_PREPARE: | 2132 | case CPU_DOWN_PREPARE: |
2075 | case CPU_DOWN_PREPARE_FROZEN: | 2133 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -2103,7 +2161,7 @@ static __init void mce_init_banks(void) | |||
2103 | 2161 | ||
2104 | for (i = 0; i < banks; i++) { | 2162 | for (i = 0; i < banks; i++) { |
2105 | struct mce_bank *b = &mce_banks[i]; | 2163 | struct mce_bank *b = &mce_banks[i]; |
2106 | struct sysdev_attribute *a = &b->attr; | 2164 | struct device_attribute *a = &b->attr; |
2107 | 2165 | ||
2108 | sysfs_attr_init(&a->attr); | 2166 | sysfs_attr_init(&a->attr); |
2109 | a->attr.name = b->attrname; | 2167 | a->attr.name = b->attrname; |
@@ -2123,16 +2181,16 @@ static __init int mcheck_init_device(void) | |||
2123 | if (!mce_available(&boot_cpu_data)) | 2181 | if (!mce_available(&boot_cpu_data)) |
2124 | return -EIO; | 2182 | return -EIO; |
2125 | 2183 | ||
2126 | zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); | 2184 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); |
2127 | 2185 | ||
2128 | mce_init_banks(); | 2186 | mce_init_banks(); |
2129 | 2187 | ||
2130 | err = sysdev_class_register(&mce_sysdev_class); | 2188 | err = subsys_system_register(&mce_subsys, NULL); |
2131 | if (err) | 2189 | if (err) |
2132 | return err; | 2190 | return err; |
2133 | 2191 | ||
2134 | for_each_online_cpu(i) { | 2192 | for_each_online_cpu(i) { |
2135 | err = mce_sysdev_create(i); | 2193 | err = mce_device_create(i); |
2136 | if (err) | 2194 | if (err) |
2137 | return err; | 2195 | return err; |
2138 | } | 2196 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f5474218cffe..e4eeaaf58a47 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
18 | #include <linux/kobject.h> | 18 | #include <linux/kobject.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/sysdev.h> | ||
21 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
22 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
23 | #include <linux/sysfs.h> | 22 | #include <linux/sysfs.h> |
@@ -64,11 +63,9 @@ struct threshold_bank { | |||
64 | }; | 63 | }; |
65 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); | 64 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
66 | 65 | ||
67 | #ifdef CONFIG_SMP | ||
68 | static unsigned char shared_bank[NR_BANKS] = { | 66 | static unsigned char shared_bank[NR_BANKS] = { |
69 | 0, 0, 0, 0, 1 | 67 | 0, 0, 0, 0, 1 |
70 | }; | 68 | }; |
71 | #endif | ||
72 | 69 | ||
73 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | 70 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ |
74 | 71 | ||
@@ -202,10 +199,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
202 | 199 | ||
203 | if (!block) | 200 | if (!block) |
204 | per_cpu(bank_map, cpu) |= (1 << bank); | 201 | per_cpu(bank_map, cpu) |= (1 << bank); |
205 | #ifdef CONFIG_SMP | ||
206 | if (shared_bank[bank] && c->cpu_core_id) | 202 | if (shared_bank[bank] && c->cpu_core_id) |
207 | break; | 203 | break; |
208 | #endif | 204 | |
209 | offset = setup_APIC_mce(offset, | 205 | offset = setup_APIC_mce(offset, |
210 | (high & MASK_LVTOFF_HI) >> 20); | 206 | (high & MASK_LVTOFF_HI) >> 20); |
211 | 207 | ||
@@ -527,6 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
527 | { | 523 | { |
528 | int i, err = 0; | 524 | int i, err = 0; |
529 | struct threshold_bank *b = NULL; | 525 | struct threshold_bank *b = NULL; |
526 | struct device *dev = mce_device[cpu]; | ||
530 | char name[32]; | 527 | char name[32]; |
531 | 528 | ||
532 | sprintf(name, "threshold_bank%i", bank); | 529 | sprintf(name, "threshold_bank%i", bank); |
@@ -548,8 +545,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
548 | if (!b) | 545 | if (!b) |
549 | goto out; | 546 | goto out; |
550 | 547 | ||
551 | err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, | 548 | err = sysfs_create_link(&dev->kobj, b->kobj, name); |
552 | b->kobj, name); | ||
553 | if (err) | 549 | if (err) |
554 | goto out; | 550 | goto out; |
555 | 551 | ||
@@ -571,7 +567,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
571 | goto out; | 567 | goto out; |
572 | } | 568 | } |
573 | 569 | ||
574 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); | 570 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
575 | if (!b->kobj) | 571 | if (!b->kobj) |
576 | goto out_free; | 572 | goto out_free; |
577 | 573 | ||
@@ -591,8 +587,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
591 | if (i == cpu) | 587 | if (i == cpu) |
592 | continue; | 588 | continue; |
593 | 589 | ||
594 | err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, | 590 | dev = mce_device[i]; |
595 | b->kobj, name); | 591 | if (dev) |
592 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
596 | if (err) | 593 | if (err) |
597 | goto out; | 594 | goto out; |
598 | 595 | ||
@@ -655,6 +652,7 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
655 | static void threshold_remove_bank(unsigned int cpu, int bank) | 652 | static void threshold_remove_bank(unsigned int cpu, int bank) |
656 | { | 653 | { |
657 | struct threshold_bank *b; | 654 | struct threshold_bank *b; |
655 | struct device *dev; | ||
658 | char name[32]; | 656 | char name[32]; |
659 | int i = 0; | 657 | int i = 0; |
660 | 658 | ||
@@ -669,7 +667,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
669 | #ifdef CONFIG_SMP | 667 | #ifdef CONFIG_SMP |
670 | /* sibling symlink */ | 668 | /* sibling symlink */ |
671 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 669 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
672 | sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); | 670 | sysfs_remove_link(&mce_device[cpu]->kobj, name); |
673 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 671 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
674 | 672 | ||
675 | return; | 673 | return; |
@@ -681,7 +679,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
681 | if (i == cpu) | 679 | if (i == cpu) |
682 | continue; | 680 | continue; |
683 | 681 | ||
684 | sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); | 682 | dev = mce_device[i]; |
683 | if (dev) | ||
684 | sysfs_remove_link(&dev->kobj, name); | ||
685 | per_cpu(threshold_banks, i)[bank] = NULL; | 685 | per_cpu(threshold_banks, i)[bank] = NULL; |
686 | } | 686 | } |
687 | 687 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea6..67bb17a37a0a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
22 | #include <linux/sysdev.h> | ||
23 | #include <linux/types.h> | 22 | #include <linux/types.h> |
24 | #include <linux/init.h> | 23 | #include <linux/init.h> |
25 | #include <linux/smp.h> | 24 | #include <linux/smp.h> |
@@ -69,16 +68,16 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0); | |||
69 | static u32 lvtthmr_init __read_mostly; | 68 | static u32 lvtthmr_init __read_mostly; |
70 | 69 | ||
71 | #ifdef CONFIG_SYSFS | 70 | #ifdef CONFIG_SYSFS |
72 | #define define_therm_throt_sysdev_one_ro(_name) \ | 71 | #define define_therm_throt_device_one_ro(_name) \ |
73 | static SYSDEV_ATTR(_name, 0444, \ | 72 | static DEVICE_ATTR(_name, 0444, \ |
74 | therm_throt_sysdev_show_##_name, \ | 73 | therm_throt_device_show_##_name, \ |
75 | NULL) \ | 74 | NULL) \ |
76 | 75 | ||
77 | #define define_therm_throt_sysdev_show_func(event, name) \ | 76 | #define define_therm_throt_device_show_func(event, name) \ |
78 | \ | 77 | \ |
79 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ | 78 | static ssize_t therm_throt_device_show_##event##_##name( \ |
80 | struct sys_device *dev, \ | 79 | struct device *dev, \ |
81 | struct sysdev_attribute *attr, \ | 80 | struct device_attribute *attr, \ |
82 | char *buf) \ | 81 | char *buf) \ |
83 | { \ | 82 | { \ |
84 | unsigned int cpu = dev->id; \ | 83 | unsigned int cpu = dev->id; \ |
@@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name( \ | |||
95 | return ret; \ | 94 | return ret; \ |
96 | } | 95 | } |
97 | 96 | ||
98 | define_therm_throt_sysdev_show_func(core_throttle, count); | 97 | define_therm_throt_device_show_func(core_throttle, count); |
99 | define_therm_throt_sysdev_one_ro(core_throttle_count); | 98 | define_therm_throt_device_one_ro(core_throttle_count); |
100 | 99 | ||
101 | define_therm_throt_sysdev_show_func(core_power_limit, count); | 100 | define_therm_throt_device_show_func(core_power_limit, count); |
102 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | 101 | define_therm_throt_device_one_ro(core_power_limit_count); |
103 | 102 | ||
104 | define_therm_throt_sysdev_show_func(package_throttle, count); | 103 | define_therm_throt_device_show_func(package_throttle, count); |
105 | define_therm_throt_sysdev_one_ro(package_throttle_count); | 104 | define_therm_throt_device_one_ro(package_throttle_count); |
106 | 105 | ||
107 | define_therm_throt_sysdev_show_func(package_power_limit, count); | 106 | define_therm_throt_device_show_func(package_power_limit, count); |
108 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | 107 | define_therm_throt_device_one_ro(package_power_limit_count); |
109 | 108 | ||
110 | static struct attribute *thermal_throttle_attrs[] = { | 109 | static struct attribute *thermal_throttle_attrs[] = { |
111 | &attr_core_throttle_count.attr, | 110 | &dev_attr_core_throttle_count.attr, |
112 | NULL | 111 | NULL |
113 | }; | 112 | }; |
114 | 113 | ||
@@ -223,36 +222,36 @@ static int thresh_event_valid(int event) | |||
223 | 222 | ||
224 | #ifdef CONFIG_SYSFS | 223 | #ifdef CONFIG_SYSFS |
225 | /* Add/Remove thermal_throttle interface for CPU device: */ | 224 | /* Add/Remove thermal_throttle interface for CPU device: */ |
226 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | 225 | static __cpuinit int thermal_throttle_add_dev(struct device *dev, |
227 | unsigned int cpu) | 226 | unsigned int cpu) |
228 | { | 227 | { |
229 | int err; | 228 | int err; |
230 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 229 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
231 | 230 | ||
232 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | 231 | err = sysfs_create_group(&dev->kobj, &thermal_attr_group); |
233 | if (err) | 232 | if (err) |
234 | return err; | 233 | return err; |
235 | 234 | ||
236 | if (cpu_has(c, X86_FEATURE_PLN)) | 235 | if (cpu_has(c, X86_FEATURE_PLN)) |
237 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 236 | err = sysfs_add_file_to_group(&dev->kobj, |
238 | &attr_core_power_limit_count.attr, | 237 | &dev_attr_core_power_limit_count.attr, |
239 | thermal_attr_group.name); | 238 | thermal_attr_group.name); |
240 | if (cpu_has(c, X86_FEATURE_PTS)) { | 239 | if (cpu_has(c, X86_FEATURE_PTS)) { |
241 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 240 | err = sysfs_add_file_to_group(&dev->kobj, |
242 | &attr_package_throttle_count.attr, | 241 | &dev_attr_package_throttle_count.attr, |
243 | thermal_attr_group.name); | 242 | thermal_attr_group.name); |
244 | if (cpu_has(c, X86_FEATURE_PLN)) | 243 | if (cpu_has(c, X86_FEATURE_PLN)) |
245 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 244 | err = sysfs_add_file_to_group(&dev->kobj, |
246 | &attr_package_power_limit_count.attr, | 245 | &dev_attr_package_power_limit_count.attr, |
247 | thermal_attr_group.name); | 246 | thermal_attr_group.name); |
248 | } | 247 | } |
249 | 248 | ||
250 | return err; | 249 | return err; |
251 | } | 250 | } |
252 | 251 | ||
253 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 252 | static __cpuinit void thermal_throttle_remove_dev(struct device *dev) |
254 | { | 253 | { |
255 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); | 254 | sysfs_remove_group(&dev->kobj, &thermal_attr_group); |
256 | } | 255 | } |
257 | 256 | ||
258 | /* Mutex protecting device creation against CPU hotplug: */ | 257 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
265 | void *hcpu) | 264 | void *hcpu) |
266 | { | 265 | { |
267 | unsigned int cpu = (unsigned long)hcpu; | 266 | unsigned int cpu = (unsigned long)hcpu; |
268 | struct sys_device *sys_dev; | 267 | struct device *dev; |
269 | int err = 0; | 268 | int err = 0; |
270 | 269 | ||
271 | sys_dev = get_cpu_sysdev(cpu); | 270 | dev = get_cpu_device(cpu); |
272 | 271 | ||
273 | switch (action) { | 272 | switch (action) { |
274 | case CPU_UP_PREPARE: | 273 | case CPU_UP_PREPARE: |
275 | case CPU_UP_PREPARE_FROZEN: | 274 | case CPU_UP_PREPARE_FROZEN: |
276 | mutex_lock(&therm_cpu_lock); | 275 | mutex_lock(&therm_cpu_lock); |
277 | err = thermal_throttle_add_dev(sys_dev, cpu); | 276 | err = thermal_throttle_add_dev(dev, cpu); |
278 | mutex_unlock(&therm_cpu_lock); | 277 | mutex_unlock(&therm_cpu_lock); |
279 | WARN_ON(err); | 278 | WARN_ON(err); |
280 | break; | 279 | break; |
@@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
283 | case CPU_DEAD: | 282 | case CPU_DEAD: |
284 | case CPU_DEAD_FROZEN: | 283 | case CPU_DEAD_FROZEN: |
285 | mutex_lock(&therm_cpu_lock); | 284 | mutex_lock(&therm_cpu_lock); |
286 | thermal_throttle_remove_dev(sys_dev); | 285 | thermal_throttle_remove_dev(dev); |
287 | mutex_unlock(&therm_cpu_lock); | 286 | mutex_unlock(&therm_cpu_lock); |
288 | break; | 287 | break; |
289 | } | 288 | } |
@@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void) | |||
310 | #endif | 309 | #endif |
311 | /* connect live CPUs to sysfs */ | 310 | /* connect live CPUs to sysfs */ |
312 | for_each_online_cpu(cpu) { | 311 | for_each_online_cpu(cpu) { |
313 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); | 312 | err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); |
314 | WARN_ON(err); | 313 | WARN_ON(err); |
315 | } | 314 | } |
316 | #ifdef CONFIG_HOTPLUG_CPU | 315 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -323,17 +322,6 @@ device_initcall(thermal_throttle_init_device); | |||
323 | 322 | ||
324 | #endif /* CONFIG_SYSFS */ | 323 | #endif /* CONFIG_SYSFS */ |
325 | 324 | ||
326 | /* | ||
327 | * Set up the most two significant bit to notify mce log that this thermal | ||
328 | * event type. | ||
329 | * This is a temp solution. May be changed in the future with mce log | ||
330 | * infrasture. | ||
331 | */ | ||
332 | #define CORE_THROTTLED (0) | ||
333 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
334 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
335 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
336 | |||
337 | static void notify_thresholds(__u64 msr_val) | 325 | static void notify_thresholds(__u64 msr_val) |
338 | { | 326 | { |
339 | /* check whether the interrupt handler is defined; | 327 | /* check whether the interrupt handler is defined; |
@@ -363,27 +351,23 @@ static void intel_thermal_interrupt(void) | |||
363 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, | 351 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
364 | THERMAL_THROTTLING_EVENT, | 352 | THERMAL_THROTTLING_EVENT, |
365 | CORE_LEVEL) != 0) | 353 | CORE_LEVEL) != 0) |
366 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | 354 | mce_log_therm_throt_event(msr_val); |
367 | 355 | ||
368 | if (this_cpu_has(X86_FEATURE_PLN)) | 356 | if (this_cpu_has(X86_FEATURE_PLN)) |
369 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 357 | therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
370 | POWER_LIMIT_EVENT, | 358 | POWER_LIMIT_EVENT, |
371 | CORE_LEVEL) != 0) | 359 | CORE_LEVEL); |
372 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
373 | 360 | ||
374 | if (this_cpu_has(X86_FEATURE_PTS)) { | 361 | if (this_cpu_has(X86_FEATURE_PTS)) { |
375 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 362 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
376 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 363 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
377 | THERMAL_THROTTLING_EVENT, | 364 | THERMAL_THROTTLING_EVENT, |
378 | PACKAGE_LEVEL) != 0) | 365 | PACKAGE_LEVEL); |
379 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
380 | if (this_cpu_has(X86_FEATURE_PLN)) | 366 | if (this_cpu_has(X86_FEATURE_PLN)) |
381 | if (therm_throt_process(msr_val & | 367 | therm_throt_process(msr_val & |
382 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 368 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
383 | POWER_LIMIT_EVENT, | 369 | POWER_LIMIT_EVENT, |
384 | PACKAGE_LEVEL) != 0) | 370 | PACKAGE_LEVEL); |
385 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
386 | | msr_val); | ||
387 | } | 371 | } |
388 | } | 372 | } |
389 | 373 | ||
@@ -397,8 +381,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | |||
397 | 381 | ||
398 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | 382 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) |
399 | { | 383 | { |
400 | exit_idle(); | ||
401 | irq_enter(); | 384 | irq_enter(); |
385 | exit_idle(); | ||
402 | inc_irq_stat(irq_thermal_count); | 386 | inc_irq_stat(irq_thermal_count); |
403 | smp_thermal_vector(); | 387 | smp_thermal_vector(); |
404 | irq_exit(); | 388 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c9..aa578cadb940 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt; | |||
19 | 19 | ||
20 | asmlinkage void smp_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
21 | { | 21 | { |
22 | exit_idle(); | ||
23 | irq_enter(); | 22 | irq_enter(); |
23 | exit_idle(); | ||
24 | inc_irq_stat(irq_threshold_count); | 24 | inc_irq_stat(irq_threshold_count); |
25 | mce_threshold_vector(); | 25 | mce_threshold_vector(); |
26 | irq_exit(); | 26 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index a71efcdbb092..97b26356e9ee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -547,6 +547,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
547 | 547 | ||
548 | if (tmp != mask_lo) { | 548 | if (tmp != mask_lo) { |
549 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); | 549 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); |
550 | add_taint(TAINT_FIRMWARE_WORKAROUND); | ||
550 | mask_lo = tmp; | 551 | mask_lo = tmp; |
551 | } | 552 | } |
552 | } | 553 | } |
@@ -693,6 +694,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
693 | 694 | ||
694 | /* Disable MTRRs, and set the default type to uncached */ | 695 | /* Disable MTRRs, and set the default type to uncached */ |
695 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); | 696 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); |
697 | wbinvd(); | ||
696 | } | 698 | } |
697 | 699 | ||
698 | static void post_set(void) __releases(set_atomicity_lock) | 700 | static void post_set(void) __releases(set_atomicity_lock) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 640891014b2a..5adce1040b11 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -312,12 +312,8 @@ int x86_setup_perfctr(struct perf_event *event) | |||
312 | return -EOPNOTSUPP; | 312 | return -EOPNOTSUPP; |
313 | } | 313 | } |
314 | 314 | ||
315 | /* | ||
316 | * Do not allow config1 (extended registers) to propagate, | ||
317 | * there's no sane user-space generalization yet: | ||
318 | */ | ||
319 | if (attr->type == PERF_TYPE_RAW) | 315 | if (attr->type == PERF_TYPE_RAW) |
320 | return 0; | 316 | return x86_pmu_extra_regs(event->attr.config, event); |
321 | 317 | ||
322 | if (attr->type == PERF_TYPE_HW_CACHE) | 318 | if (attr->type == PERF_TYPE_HW_CACHE) |
323 | return set_ext_hw_attr(hwc, event); | 319 | return set_ext_hw_attr(hwc, event); |
@@ -488,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event) | |||
488 | return event->pmu == &pmu; | 484 | return event->pmu == &pmu; |
489 | } | 485 | } |
490 | 486 | ||
487 | /* | ||
488 | * Event scheduler state: | ||
489 | * | ||
490 | * Assign events iterating over all events and counters, beginning | ||
491 | * with events with least weights first. Keep the current iterator | ||
492 | * state in struct sched_state. | ||
493 | */ | ||
494 | struct sched_state { | ||
495 | int weight; | ||
496 | int event; /* event index */ | ||
497 | int counter; /* counter index */ | ||
498 | int unassigned; /* number of events to be assigned left */ | ||
499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
500 | }; | ||
501 | |||
502 | /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ | ||
503 | #define SCHED_STATES_MAX 2 | ||
504 | |||
505 | struct perf_sched { | ||
506 | int max_weight; | ||
507 | int max_events; | ||
508 | struct event_constraint **constraints; | ||
509 | struct sched_state state; | ||
510 | int saved_states; | ||
511 | struct sched_state saved[SCHED_STATES_MAX]; | ||
512 | }; | ||
513 | |||
514 | /* | ||
515 | * Initialize interator that runs through all events and counters. | ||
516 | */ | ||
517 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | ||
518 | int num, int wmin, int wmax) | ||
519 | { | ||
520 | int idx; | ||
521 | |||
522 | memset(sched, 0, sizeof(*sched)); | ||
523 | sched->max_events = num; | ||
524 | sched->max_weight = wmax; | ||
525 | sched->constraints = c; | ||
526 | |||
527 | for (idx = 0; idx < num; idx++) { | ||
528 | if (c[idx]->weight == wmin) | ||
529 | break; | ||
530 | } | ||
531 | |||
532 | sched->state.event = idx; /* start with min weight */ | ||
533 | sched->state.weight = wmin; | ||
534 | sched->state.unassigned = num; | ||
535 | } | ||
536 | |||
537 | static void perf_sched_save_state(struct perf_sched *sched) | ||
538 | { | ||
539 | if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) | ||
540 | return; | ||
541 | |||
542 | sched->saved[sched->saved_states] = sched->state; | ||
543 | sched->saved_states++; | ||
544 | } | ||
545 | |||
546 | static bool perf_sched_restore_state(struct perf_sched *sched) | ||
547 | { | ||
548 | if (!sched->saved_states) | ||
549 | return false; | ||
550 | |||
551 | sched->saved_states--; | ||
552 | sched->state = sched->saved[sched->saved_states]; | ||
553 | |||
554 | /* continue with next counter: */ | ||
555 | clear_bit(sched->state.counter++, sched->state.used); | ||
556 | |||
557 | return true; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Select a counter for the current event to schedule. Return true on | ||
562 | * success. | ||
563 | */ | ||
564 | static bool __perf_sched_find_counter(struct perf_sched *sched) | ||
565 | { | ||
566 | struct event_constraint *c; | ||
567 | int idx; | ||
568 | |||
569 | if (!sched->state.unassigned) | ||
570 | return false; | ||
571 | |||
572 | if (sched->state.event >= sched->max_events) | ||
573 | return false; | ||
574 | |||
575 | c = sched->constraints[sched->state.event]; | ||
576 | |||
577 | /* Prefer fixed purpose counters */ | ||
578 | if (x86_pmu.num_counters_fixed) { | ||
579 | idx = X86_PMC_IDX_FIXED; | ||
580 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | ||
581 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
582 | goto done; | ||
583 | } | ||
584 | } | ||
585 | /* Grab the first unused counter starting with idx */ | ||
586 | idx = sched->state.counter; | ||
587 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | ||
588 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
589 | goto done; | ||
590 | } | ||
591 | |||
592 | return false; | ||
593 | |||
594 | done: | ||
595 | sched->state.counter = idx; | ||
596 | |||
597 | if (c->overlap) | ||
598 | perf_sched_save_state(sched); | ||
599 | |||
600 | return true; | ||
601 | } | ||
602 | |||
603 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
604 | { | ||
605 | while (!__perf_sched_find_counter(sched)) { | ||
606 | if (!perf_sched_restore_state(sched)) | ||
607 | return false; | ||
608 | } | ||
609 | |||
610 | return true; | ||
611 | } | ||
612 | |||
613 | /* | ||
614 | * Go through all unassigned events and find the next one to schedule. | ||
615 | * Take events with the least weight first. Return true on success. | ||
616 | */ | ||
617 | static bool perf_sched_next_event(struct perf_sched *sched) | ||
618 | { | ||
619 | struct event_constraint *c; | ||
620 | |||
621 | if (!sched->state.unassigned || !--sched->state.unassigned) | ||
622 | return false; | ||
623 | |||
624 | do { | ||
625 | /* next event */ | ||
626 | sched->state.event++; | ||
627 | if (sched->state.event >= sched->max_events) { | ||
628 | /* next weight */ | ||
629 | sched->state.event = 0; | ||
630 | sched->state.weight++; | ||
631 | if (sched->state.weight > sched->max_weight) | ||
632 | return false; | ||
633 | } | ||
634 | c = sched->constraints[sched->state.event]; | ||
635 | } while (c->weight != sched->state.weight); | ||
636 | |||
637 | sched->state.counter = 0; /* start with first counter */ | ||
638 | |||
639 | return true; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Assign a counter for each event. | ||
644 | */ | ||
645 | static int perf_assign_events(struct event_constraint **constraints, int n, | ||
646 | int wmin, int wmax, int *assign) | ||
647 | { | ||
648 | struct perf_sched sched; | ||
649 | |||
650 | perf_sched_init(&sched, constraints, n, wmin, wmax); | ||
651 | |||
652 | do { | ||
653 | if (!perf_sched_find_counter(&sched)) | ||
654 | break; /* failed */ | ||
655 | if (assign) | ||
656 | assign[sched.state.event] = sched.state.counter; | ||
657 | } while (perf_sched_next_event(&sched)); | ||
658 | |||
659 | return sched.state.unassigned; | ||
660 | } | ||
661 | |||
491 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 662 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
492 | { | 663 | { |
493 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 664 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
494 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 665 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
495 | int i, j, w, wmax, num = 0; | 666 | int i, wmin, wmax, num = 0; |
496 | struct hw_perf_event *hwc; | 667 | struct hw_perf_event *hwc; |
497 | 668 | ||
498 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 669 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
499 | 670 | ||
500 | for (i = 0; i < n; i++) { | 671 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
501 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 672 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
502 | constraints[i] = c; | 673 | constraints[i] = c; |
674 | wmin = min(wmin, c->weight); | ||
675 | wmax = max(wmax, c->weight); | ||
503 | } | 676 | } |
504 | 677 | ||
505 | /* | 678 | /* |
@@ -525,59 +698,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
525 | if (assign) | 698 | if (assign) |
526 | assign[i] = hwc->idx; | 699 | assign[i] = hwc->idx; |
527 | } | 700 | } |
528 | if (i == n) | ||
529 | goto done; | ||
530 | |||
531 | /* | ||
532 | * begin slow path | ||
533 | */ | ||
534 | |||
535 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
536 | |||
537 | /* | ||
538 | * weight = number of possible counters | ||
539 | * | ||
540 | * 1 = most constrained, only works on one counter | ||
541 | * wmax = least constrained, works on any counter | ||
542 | * | ||
543 | * assign events to counters starting with most | ||
544 | * constrained events. | ||
545 | */ | ||
546 | wmax = x86_pmu.num_counters; | ||
547 | |||
548 | /* | ||
549 | * when fixed event counters are present, | ||
550 | * wmax is incremented by 1 to account | ||
551 | * for one more choice | ||
552 | */ | ||
553 | if (x86_pmu.num_counters_fixed) | ||
554 | wmax++; | ||
555 | |||
556 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
557 | /* for each event */ | ||
558 | for (i = 0; num && i < n; i++) { | ||
559 | c = constraints[i]; | ||
560 | hwc = &cpuc->event_list[i]->hw; | ||
561 | |||
562 | if (c->weight != w) | ||
563 | continue; | ||
564 | 701 | ||
565 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | 702 | /* slow path */ |
566 | if (!test_bit(j, used_mask)) | 703 | if (i != n) |
567 | break; | 704 | num = perf_assign_events(constraints, n, wmin, wmax, assign); |
568 | } | ||
569 | |||
570 | if (j == X86_PMC_IDX_MAX) | ||
571 | break; | ||
572 | 705 | ||
573 | __set_bit(j, used_mask); | ||
574 | |||
575 | if (assign) | ||
576 | assign[i] = j; | ||
577 | num--; | ||
578 | } | ||
579 | } | ||
580 | done: | ||
581 | /* | 706 | /* |
582 | * scheduling failed or is just a simulation, | 707 | * scheduling failed or is just a simulation, |
583 | * free resources if necessary | 708 | * free resources if necessary |
@@ -588,7 +713,7 @@ done: | |||
588 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | 713 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); |
589 | } | 714 | } |
590 | } | 715 | } |
591 | return num ? -ENOSPC : 0; | 716 | return num ? -EINVAL : 0; |
592 | } | 717 | } |
593 | 718 | ||
594 | /* | 719 | /* |
@@ -607,7 +732,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
607 | 732 | ||
608 | if (is_x86_event(leader)) { | 733 | if (is_x86_event(leader)) { |
609 | if (n >= max_count) | 734 | if (n >= max_count) |
610 | return -ENOSPC; | 735 | return -EINVAL; |
611 | cpuc->event_list[n] = leader; | 736 | cpuc->event_list[n] = leader; |
612 | n++; | 737 | n++; |
613 | } | 738 | } |
@@ -620,7 +745,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
620 | continue; | 745 | continue; |
621 | 746 | ||
622 | if (n >= max_count) | 747 | if (n >= max_count) |
623 | return -ENOSPC; | 748 | return -EINVAL; |
624 | 749 | ||
625 | cpuc->event_list[n] = event; | 750 | cpuc->event_list[n] = event; |
626 | n++; | 751 | n++; |
@@ -1123,6 +1248,7 @@ static void __init pmu_check_apic(void) | |||
1123 | 1248 | ||
1124 | static int __init init_hw_perf_events(void) | 1249 | static int __init init_hw_perf_events(void) |
1125 | { | 1250 | { |
1251 | struct x86_pmu_quirk *quirk; | ||
1126 | struct event_constraint *c; | 1252 | struct event_constraint *c; |
1127 | int err; | 1253 | int err; |
1128 | 1254 | ||
@@ -1151,8 +1277,8 @@ static int __init init_hw_perf_events(void) | |||
1151 | 1277 | ||
1152 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1278 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1153 | 1279 | ||
1154 | if (x86_pmu.quirks) | 1280 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1155 | x86_pmu.quirks(); | 1281 | quirk->func(); |
1156 | 1282 | ||
1157 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1283 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1158 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1284 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
@@ -1175,12 +1301,18 @@ static int __init init_hw_perf_events(void) | |||
1175 | 1301 | ||
1176 | unconstrained = (struct event_constraint) | 1302 | unconstrained = (struct event_constraint) |
1177 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1303 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1178 | 0, x86_pmu.num_counters); | 1304 | 0, x86_pmu.num_counters, 0); |
1179 | 1305 | ||
1180 | if (x86_pmu.event_constraints) { | 1306 | if (x86_pmu.event_constraints) { |
1307 | /* | ||
1308 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
1309 | * counter, so do not extend mask to generic counters | ||
1310 | */ | ||
1181 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1311 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1182 | if (c->cmask != X86_RAW_EVENT_MASK) | 1312 | if (c->cmask != X86_RAW_EVENT_MASK |
1313 | || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { | ||
1183 | continue; | 1314 | continue; |
1315 | } | ||
1184 | 1316 | ||
1185 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | 1317 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1186 | c->weight += x86_pmu.num_counters; | 1318 | c->weight += x86_pmu.num_counters; |
@@ -1316,7 +1448,7 @@ static int validate_event(struct perf_event *event) | |||
1316 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | 1448 | c = x86_pmu.get_event_constraints(fake_cpuc, event); |
1317 | 1449 | ||
1318 | if (!c || !c->weight) | 1450 | if (!c || !c->weight) |
1319 | ret = -ENOSPC; | 1451 | ret = -EINVAL; |
1320 | 1452 | ||
1321 | if (x86_pmu.put_event_constraints) | 1453 | if (x86_pmu.put_event_constraints) |
1322 | x86_pmu.put_event_constraints(fake_cpuc, event); | 1454 | x86_pmu.put_event_constraints(fake_cpuc, event); |
@@ -1341,7 +1473,7 @@ static int validate_group(struct perf_event *event) | |||
1341 | { | 1473 | { |
1342 | struct perf_event *leader = event->group_leader; | 1474 | struct perf_event *leader = event->group_leader; |
1343 | struct cpu_hw_events *fake_cpuc; | 1475 | struct cpu_hw_events *fake_cpuc; |
1344 | int ret = -ENOSPC, n; | 1476 | int ret = -EINVAL, n; |
1345 | 1477 | ||
1346 | fake_cpuc = allocate_fake_cpuc(); | 1478 | fake_cpuc = allocate_fake_cpuc(); |
1347 | if (IS_ERR(fake_cpuc)) | 1479 | if (IS_ERR(fake_cpuc)) |
@@ -1570,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1570 | 1702 | ||
1571 | return misc; | 1703 | return misc; |
1572 | } | 1704 | } |
1705 | |||
1706 | void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
1707 | { | ||
1708 | cap->version = x86_pmu.version; | ||
1709 | cap->num_counters_gp = x86_pmu.num_counters; | ||
1710 | cap->num_counters_fixed = x86_pmu.num_counters_fixed; | ||
1711 | cap->bit_width_gp = x86_pmu.cntval_bits; | ||
1712 | cap->bit_width_fixed = x86_pmu.cntval_bits; | ||
1713 | cap->events_mask = (unsigned int)x86_pmu.events_maskl; | ||
1714 | cap->events_mask_len = x86_pmu.events_mask_len; | ||
1715 | } | ||
1716 | EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4b..c30c807ddc72 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -45,6 +45,7 @@ struct event_constraint { | |||
45 | u64 code; | 45 | u64 code; |
46 | u64 cmask; | 46 | u64 cmask; |
47 | int weight; | 47 | int weight; |
48 | int overlap; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct amd_nb { | 51 | struct amd_nb { |
@@ -146,20 +147,47 @@ struct cpu_hw_events { | |||
146 | /* | 147 | /* |
147 | * AMD specific bits | 148 | * AMD specific bits |
148 | */ | 149 | */ |
149 | struct amd_nb *amd_nb; | 150 | struct amd_nb *amd_nb; |
151 | /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ | ||
152 | u64 perf_ctr_virt_mask; | ||
150 | 153 | ||
151 | void *kfree_on_online; | 154 | void *kfree_on_online; |
152 | }; | 155 | }; |
153 | 156 | ||
154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 157 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ |
155 | { .idxmsk64 = (n) }, \ | 158 | { .idxmsk64 = (n) }, \ |
156 | .code = (c), \ | 159 | .code = (c), \ |
157 | .cmask = (m), \ | 160 | .cmask = (m), \ |
158 | .weight = (w), \ | 161 | .weight = (w), \ |
162 | .overlap = (o), \ | ||
159 | } | 163 | } |
160 | 164 | ||
161 | #define EVENT_CONSTRAINT(c, n, m) \ | 165 | #define EVENT_CONSTRAINT(c, n, m) \ |
162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 166 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) |
167 | |||
168 | /* | ||
169 | * The overlap flag marks event constraints with overlapping counter | ||
170 | * masks. This is the case if the counter mask of such an event is not | ||
171 | * a subset of any other counter mask of a constraint with an equal or | ||
172 | * higher weight, e.g.: | ||
173 | * | ||
174 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); | ||
175 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
176 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
177 | * | ||
178 | * The event scheduler may not select the correct counter in the first | ||
179 | * cycle because it needs to know which subsequent events will be | ||
180 | * scheduled. It may fail to schedule the events then. So we set the | ||
181 | * overlap flag for such constraints to give the scheduler a hint which | ||
182 | * events to select for counter rescheduling. | ||
183 | * | ||
184 | * Care must be taken as the rescheduling algorithm is O(n!) which | ||
185 | * will increase scheduling cycles for an over-commited system | ||
186 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros | ||
187 | * and its counter masks must be kept at a minimum. | ||
188 | */ | ||
189 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | ||
190 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | ||
163 | 191 | ||
164 | /* | 192 | /* |
165 | * Constraint on the Event code. | 193 | * Constraint on the Event code. |
@@ -235,6 +263,11 @@ union perf_capabilities { | |||
235 | u64 capabilities; | 263 | u64 capabilities; |
236 | }; | 264 | }; |
237 | 265 | ||
266 | struct x86_pmu_quirk { | ||
267 | struct x86_pmu_quirk *next; | ||
268 | void (*func)(void); | ||
269 | }; | ||
270 | |||
238 | /* | 271 | /* |
239 | * struct x86_pmu - generic x86 pmu | 272 | * struct x86_pmu - generic x86 pmu |
240 | */ | 273 | */ |
@@ -259,6 +292,11 @@ struct x86_pmu { | |||
259 | int num_counters_fixed; | 292 | int num_counters_fixed; |
260 | int cntval_bits; | 293 | int cntval_bits; |
261 | u64 cntval_mask; | 294 | u64 cntval_mask; |
295 | union { | ||
296 | unsigned long events_maskl; | ||
297 | unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; | ||
298 | }; | ||
299 | int events_mask_len; | ||
262 | int apic; | 300 | int apic; |
263 | u64 max_period; | 301 | u64 max_period; |
264 | struct event_constraint * | 302 | struct event_constraint * |
@@ -268,7 +306,7 @@ struct x86_pmu { | |||
268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 306 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
269 | struct perf_event *event); | 307 | struct perf_event *event); |
270 | struct event_constraint *event_constraints; | 308 | struct event_constraint *event_constraints; |
271 | void (*quirks)(void); | 309 | struct x86_pmu_quirk *quirks; |
272 | int perfctr_second_write; | 310 | int perfctr_second_write; |
273 | 311 | ||
274 | int (*cpu_prepare)(int cpu); | 312 | int (*cpu_prepare)(int cpu); |
@@ -309,6 +347,15 @@ struct x86_pmu { | |||
309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 347 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
310 | }; | 348 | }; |
311 | 349 | ||
350 | #define x86_add_quirk(func_) \ | ||
351 | do { \ | ||
352 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
353 | .func = func_, \ | ||
354 | }; \ | ||
355 | __quirk.next = x86_pmu.quirks; \ | ||
356 | x86_pmu.quirks = &__quirk; \ | ||
357 | } while (0) | ||
358 | |||
312 | #define ERF_NO_HT_SHARING 1 | 359 | #define ERF_NO_HT_SHARING 1 |
313 | #define ERF_HAS_RSP_1 2 | 360 | #define ERF_HAS_RSP_1 2 |
314 | 361 | ||
@@ -372,9 +419,11 @@ void x86_pmu_disable_all(void); | |||
372 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 419 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
373 | u64 enable_mask) | 420 | u64 enable_mask) |
374 | { | 421 | { |
422 | u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); | ||
423 | |||
375 | if (hwc->extra_reg.reg) | 424 | if (hwc->extra_reg.reg) |
376 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); | 425 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); |
377 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 426 | wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); |
378 | } | 427 | } |
379 | 428 | ||
380 | void x86_pmu_enable_all(int added); | 429 | void x86_pmu_enable_all(int added); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a2..67250a52430b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/perf_event.h> | 1 | #include <linux/perf_event.h> |
2 | #include <linux/export.h> | ||
2 | #include <linux/types.h> | 3 | #include <linux/types.h> |
3 | #include <linux/init.h> | 4 | #include <linux/init.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
@@ -357,7 +358,9 @@ static void amd_pmu_cpu_starting(int cpu) | |||
357 | struct amd_nb *nb; | 358 | struct amd_nb *nb; |
358 | int i, nb_id; | 359 | int i, nb_id; |
359 | 360 | ||
360 | if (boot_cpu_data.x86_max_cores < 2) | 361 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; |
362 | |||
363 | if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) | ||
361 | return; | 364 | return; |
362 | 365 | ||
363 | nb_id = amd_get_nb_id(cpu); | 366 | nb_id = amd_get_nb_id(cpu); |
@@ -492,7 +495,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 495 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | 496 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | 497 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | 498 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 499 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 500 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
498 | 501 | ||
@@ -587,9 +590,9 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { | |||
587 | .put_event_constraints = amd_put_event_constraints, | 590 | .put_event_constraints = amd_put_event_constraints, |
588 | 591 | ||
589 | .cpu_prepare = amd_pmu_cpu_prepare, | 592 | .cpu_prepare = amd_pmu_cpu_prepare, |
590 | .cpu_starting = amd_pmu_cpu_starting, | ||
591 | .cpu_dead = amd_pmu_cpu_dead, | 593 | .cpu_dead = amd_pmu_cpu_dead, |
592 | #endif | 594 | #endif |
595 | .cpu_starting = amd_pmu_cpu_starting, | ||
593 | }; | 596 | }; |
594 | 597 | ||
595 | __init int amd_pmu_init(void) | 598 | __init int amd_pmu_init(void) |
@@ -621,3 +624,33 @@ __init int amd_pmu_init(void) | |||
621 | 624 | ||
622 | return 0; | 625 | return 0; |
623 | } | 626 | } |
627 | |||
628 | void amd_pmu_enable_virt(void) | ||
629 | { | ||
630 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
631 | |||
632 | cpuc->perf_ctr_virt_mask = 0; | ||
633 | |||
634 | /* Reload all events */ | ||
635 | x86_pmu_disable_all(); | ||
636 | x86_pmu_enable_all(0); | ||
637 | } | ||
638 | EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); | ||
639 | |||
640 | void amd_pmu_disable_virt(void) | ||
641 | { | ||
642 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
643 | |||
644 | /* | ||
645 | * We only mask out the Host-only bit so that host-only counting works | ||
646 | * when SVM is disabled. If someone sets up a guest-only counter when | ||
647 | * SVM is disabled the Guest-only bits still gets set and the counter | ||
648 | * will not count anything. | ||
649 | */ | ||
650 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; | ||
651 | |||
652 | /* Reload all events */ | ||
653 | x86_pmu_disable_all(); | ||
654 | x86_pmu_enable_all(0); | ||
655 | } | ||
656 | EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index ab6343d21825..3b8a2d30d14e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -199,8 +199,7 @@ static int force_ibs_eilvt_setup(void) | |||
199 | goto out; | 199 | goto out; |
200 | } | 200 | } |
201 | 201 | ||
202 | pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); | 202 | pr_info("IBS: LVT offset %d assigned\n", offset); |
203 | pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); | ||
204 | 203 | ||
205 | return 0; | 204 | return 0; |
206 | out: | 205 | out: |
@@ -265,19 +264,23 @@ perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *h | |||
265 | static __init int amd_ibs_init(void) | 264 | static __init int amd_ibs_init(void) |
266 | { | 265 | { |
267 | u32 caps; | 266 | u32 caps; |
268 | int ret; | 267 | int ret = -EINVAL; |
269 | 268 | ||
270 | caps = __get_ibs_caps(); | 269 | caps = __get_ibs_caps(); |
271 | if (!caps) | 270 | if (!caps) |
272 | return -ENODEV; /* ibs not supported by the cpu */ | 271 | return -ENODEV; /* ibs not supported by the cpu */ |
273 | 272 | ||
274 | if (!ibs_eilvt_valid()) { | 273 | /* |
275 | ret = force_ibs_eilvt_setup(); | 274 | * Force LVT offset assignment for family 10h: The offsets are |
276 | if (ret) { | 275 | * not assigned by the BIOS for this family, so the OS is |
277 | pr_err("Failed to setup IBS, %d\n", ret); | 276 | * responsible for doing it. If the OS assignment fails, fall |
278 | return ret; | 277 | * back to BIOS settings and try to setup this. |
279 | } | 278 | */ |
280 | } | 279 | if (boot_cpu_data.x86 == 0x10) |
280 | force_ibs_eilvt_setup(); | ||
281 | |||
282 | if (!ibs_eilvt_valid()) | ||
283 | goto out; | ||
281 | 284 | ||
282 | get_online_cpus(); | 285 | get_online_cpus(); |
283 | ibs_caps = caps; | 286 | ibs_caps = caps; |
@@ -287,7 +290,11 @@ static __init int amd_ibs_init(void) | |||
287 | smp_call_function(setup_APIC_ibs, NULL, 1); | 290 | smp_call_function(setup_APIC_ibs, NULL, 1); |
288 | put_online_cpus(); | 291 | put_online_cpus(); |
289 | 292 | ||
290 | return perf_event_ibs_init(); | 293 | ret = perf_event_ibs_init(); |
294 | out: | ||
295 | if (ret) | ||
296 | pr_err("Failed to setup IBS, %d\n", ret); | ||
297 | return ret; | ||
291 | } | 298 | } |
292 | 299 | ||
293 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | 300 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2be5ebe99872..3bd37bdf1b8e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | |||
28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
31 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | static struct event_constraint intel_core_event_constraints[] __read_mostly = | 34 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly = | |||
45 | { | 46 | { |
46 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 47 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
47 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 48 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
48 | /* | 49 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
49 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
50 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
51 | * ratio between these counters. | ||
52 | */ | ||
53 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 50 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
55 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 51 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
56 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 52 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
68 | { | 64 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 65 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 66 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 67 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
72 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | 68 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
73 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | 69 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
74 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | 70 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly | |||
90 | { | 86 | { |
91 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
92 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 88 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
93 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 89 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
94 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | 90 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
95 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | 91 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
96 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | 92 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
102 | { | 98 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 99 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 100 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
105 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 101 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
106 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | 102 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
107 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 103 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
108 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 104 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
125 | { | 121 | { |
126 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 122 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
127 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 123 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
128 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 124 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
129 | EVENT_CONSTRAINT_END | 125 | EVENT_CONSTRAINT_END |
130 | }; | 126 | }; |
131 | 127 | ||
@@ -1169,7 +1165,7 @@ again: | |||
1169 | */ | 1165 | */ |
1170 | c = &unconstrained; | 1166 | c = &unconstrained; |
1171 | } else if (intel_try_alt_er(event, orig_idx)) { | 1167 | } else if (intel_try_alt_er(event, orig_idx)) { |
1172 | raw_spin_unlock(&era->lock); | 1168 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1173 | goto again; | 1169 | goto again; |
1174 | } | 1170 | } |
1175 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1171 | raw_spin_unlock_irqrestore(&era->lock, flags); |
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1519 | .guest_get_msrs = intel_guest_get_msrs, | 1515 | .guest_get_msrs = intel_guest_get_msrs, |
1520 | }; | 1516 | }; |
1521 | 1517 | ||
1522 | static void intel_clovertown_quirks(void) | 1518 | static __init void intel_clovertown_quirk(void) |
1523 | { | 1519 | { |
1524 | /* | 1520 | /* |
1525 | * PEBS is unreliable due to: | 1521 | * PEBS is unreliable due to: |
@@ -1545,12 +1541,60 @@ static void intel_clovertown_quirks(void) | |||
1545 | x86_pmu.pebs_constraints = NULL; | 1541 | x86_pmu.pebs_constraints = NULL; |
1546 | } | 1542 | } |
1547 | 1543 | ||
1544 | static __init void intel_sandybridge_quirk(void) | ||
1545 | { | ||
1546 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
1547 | x86_pmu.pebs = 0; | ||
1548 | x86_pmu.pebs_constraints = NULL; | ||
1549 | } | ||
1550 | |||
1551 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | ||
1552 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, | ||
1553 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, | ||
1554 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, | ||
1555 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, | ||
1556 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, | ||
1557 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, | ||
1558 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, | ||
1559 | }; | ||
1560 | |||
1561 | static __init void intel_arch_events_quirk(void) | ||
1562 | { | ||
1563 | int bit; | ||
1564 | |||
1565 | /* disable event that reported as not presend by cpuid */ | ||
1566 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | ||
1567 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | ||
1568 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | ||
1569 | intel_arch_events_map[bit].name); | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | static __init void intel_nehalem_quirk(void) | ||
1574 | { | ||
1575 | union cpuid10_ebx ebx; | ||
1576 | |||
1577 | ebx.full = x86_pmu.events_maskl; | ||
1578 | if (ebx.split.no_branch_misses_retired) { | ||
1579 | /* | ||
1580 | * Erratum AAJ80 detected, we work it around by using | ||
1581 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1582 | * branch-misses, but it's still much better than the | ||
1583 | * architectural event which is often completely bogus: | ||
1584 | */ | ||
1585 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1586 | ebx.split.no_branch_misses_retired = 0; | ||
1587 | x86_pmu.events_maskl = ebx.full; | ||
1588 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | ||
1589 | } | ||
1590 | } | ||
1591 | |||
1548 | __init int intel_pmu_init(void) | 1592 | __init int intel_pmu_init(void) |
1549 | { | 1593 | { |
1550 | union cpuid10_edx edx; | 1594 | union cpuid10_edx edx; |
1551 | union cpuid10_eax eax; | 1595 | union cpuid10_eax eax; |
1596 | union cpuid10_ebx ebx; | ||
1552 | unsigned int unused; | 1597 | unsigned int unused; |
1553 | unsigned int ebx; | ||
1554 | int version; | 1598 | int version; |
1555 | 1599 | ||
1556 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 1600 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
@@ -1567,8 +1611,8 @@ __init int intel_pmu_init(void) | |||
1567 | * Check whether the Architectural PerfMon supports | 1611 | * Check whether the Architectural PerfMon supports |
1568 | * Branch Misses Retired hw_event or not. | 1612 | * Branch Misses Retired hw_event or not. |
1569 | */ | 1613 | */ |
1570 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | 1614 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
1571 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | 1615 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
1572 | return -ENODEV; | 1616 | return -ENODEV; |
1573 | 1617 | ||
1574 | version = eax.split.version_id; | 1618 | version = eax.split.version_id; |
@@ -1582,6 +1626,9 @@ __init int intel_pmu_init(void) | |||
1582 | x86_pmu.cntval_bits = eax.split.bit_width; | 1626 | x86_pmu.cntval_bits = eax.split.bit_width; |
1583 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; | 1627 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
1584 | 1628 | ||
1629 | x86_pmu.events_maskl = ebx.full; | ||
1630 | x86_pmu.events_mask_len = eax.split.mask_length; | ||
1631 | |||
1585 | /* | 1632 | /* |
1586 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1633 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1587 | * assume at least 3 events: | 1634 | * assume at least 3 events: |
@@ -1601,6 +1648,8 @@ __init int intel_pmu_init(void) | |||
1601 | 1648 | ||
1602 | intel_ds_init(); | 1649 | intel_ds_init(); |
1603 | 1650 | ||
1651 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ | ||
1652 | |||
1604 | /* | 1653 | /* |
1605 | * Install the hw-cache-events table: | 1654 | * Install the hw-cache-events table: |
1606 | */ | 1655 | */ |
@@ -1610,7 +1659,7 @@ __init int intel_pmu_init(void) | |||
1610 | break; | 1659 | break; |
1611 | 1660 | ||
1612 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1661 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
1613 | x86_pmu.quirks = intel_clovertown_quirks; | 1662 | x86_add_quirk(intel_clovertown_quirk); |
1614 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1663 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
1615 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1664 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
1616 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1665 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
@@ -1644,17 +1693,8 @@ __init int intel_pmu_init(void) | |||
1644 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1693 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1645 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1694 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; |
1646 | 1695 | ||
1647 | if (ebx & 0x40) { | 1696 | x86_add_quirk(intel_nehalem_quirk); |
1648 | /* | ||
1649 | * Erratum AAJ80 detected, we work it around by using | ||
1650 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1651 | * branch-misses, but it's still much better than the | ||
1652 | * architectural event which is often completely bogus: | ||
1653 | */ | ||
1654 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1655 | 1697 | ||
1656 | pr_cont("erratum AAJ80 worked around, "); | ||
1657 | } | ||
1658 | pr_cont("Nehalem events, "); | 1698 | pr_cont("Nehalem events, "); |
1659 | break; | 1699 | break; |
1660 | 1700 | ||
@@ -1694,6 +1734,7 @@ __init int intel_pmu_init(void) | |||
1694 | break; | 1734 | break; |
1695 | 1735 | ||
1696 | case 42: /* SandyBridge */ | 1736 | case 42: /* SandyBridge */ |
1737 | x86_add_quirk(intel_sandybridge_quirk); | ||
1697 | case 45: /* SandyBridge, "Romely-EP" */ | 1738 | case 45: /* SandyBridge, "Romely-EP" */ |
1698 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1739 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1699 | sizeof(hw_cache_event_ids)); | 1740 | sizeof(hw_cache_event_ids)); |
@@ -1730,5 +1771,6 @@ __init int intel_pmu_init(void) | |||
1730 | break; | 1771 | break; |
1731 | } | 1772 | } |
1732 | } | 1773 | } |
1774 | |||
1733 | return 0; | 1775 | return 0; |
1734 | } | 1776 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c0d238f49db8..d6bd49faa40c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -439,7 +439,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
440 | 440 | ||
441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
442 | WARN_ON_ONCE(cpuc->enabled); | ||
443 | 442 | ||
444 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | 443 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) |
445 | intel_pmu_lbr_enable(event); | 444 | intel_pmu_lbr_enable(event); |
@@ -493,6 +492,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
493 | unsigned long from = cpuc->lbr_entries[0].from; | 492 | unsigned long from = cpuc->lbr_entries[0].from; |
494 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | 493 | unsigned long old_to, to = cpuc->lbr_entries[0].to; |
495 | unsigned long ip = regs->ip; | 494 | unsigned long ip = regs->ip; |
495 | int is_64bit = 0; | ||
496 | 496 | ||
497 | /* | 497 | /* |
498 | * We don't need to fixup if the PEBS assist is fault like | 498 | * We don't need to fixup if the PEBS assist is fault like |
@@ -544,7 +544,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
544 | } else | 544 | } else |
545 | kaddr = (void *)to; | 545 | kaddr = (void *)to; |
546 | 546 | ||
547 | kernel_insn_init(&insn, kaddr); | 547 | #ifdef CONFIG_X86_64 |
548 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); | ||
549 | #endif | ||
550 | insn_init(&insn, kaddr, is_64bit); | ||
548 | insn_get_length(&insn); | 551 | insn_get_length(&insn); |
549 | to += insn.length; | 552 | to += insn.length; |
550 | } while (to < ip); | 553 | } while (to < ip); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 3fab3de3ce96..47a7e63bfe54 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -72,8 +72,6 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
72 | if (!x86_pmu.lbr_nr) | 72 | if (!x86_pmu.lbr_nr) |
73 | return; | 73 | return; |
74 | 74 | ||
75 | WARN_ON_ONCE(cpuc->enabled); | ||
76 | |||
77 | /* | 75 | /* |
78 | * Reset the LBR stack if we changed task context to | 76 | * Reset the LBR stack if we changed task context to |
79 | * avoid data leaks. | 77 | * avoid data leaks. |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 492bf1358a7c..ef484d9d0a25 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1268,7 +1268,7 @@ reserve: | |||
1268 | } | 1268 | } |
1269 | 1269 | ||
1270 | done: | 1270 | done: |
1271 | return num ? -ENOSPC : 0; | 1271 | return num ? -EINVAL : 0; |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | static __initconst const struct x86_pmu p4_pmu = { | 1274 | static __initconst const struct x86_pmu p4_pmu = { |
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 5abbea297e0c..7b3fe56b1c21 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c | |||
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = { | |||
16 | "100mhzsteps", | 16 | "100mhzsteps", |
17 | "hwpstate", | 17 | "hwpstate", |
18 | "", /* tsc invariant mapped to constant_tsc */ | 18 | "", /* tsc invariant mapped to constant_tsc */ |
19 | /* nothing */ | 19 | "cpb", /* core performance boost */ |
20 | "eff_freq_ro", /* Readonly aperf/mperf */ | ||
20 | }; | 21 | }; |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 14b23140e81f..8022c6681485 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | |||
64 | static int show_cpuinfo(struct seq_file *m, void *v) | 64 | static int show_cpuinfo(struct seq_file *m, void *v) |
65 | { | 65 | { |
66 | struct cpuinfo_x86 *c = v; | 66 | struct cpuinfo_x86 *c = v; |
67 | unsigned int cpu = 0; | 67 | unsigned int cpu; |
68 | int i; | 68 | int i; |
69 | 69 | ||
70 | #ifdef CONFIG_SMP | ||
71 | cpu = c->cpu_index; | 70 | cpu = c->cpu_index; |
72 | #endif | ||
73 | seq_printf(m, "processor\t: %u\n" | 71 | seq_printf(m, "processor\t: %u\n" |
74 | "vendor_id\t: %s\n" | 72 | "vendor_id\t: %s\n" |
75 | "cpu family\t: %d\n" | 73 | "cpu family\t: %d\n" |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 212a6a42527c..a524353d93f2 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = | |||
177 | .notifier_call = cpuid_class_cpu_callback, | 177 | .notifier_call = cpuid_class_cpu_callback, |
178 | }; | 178 | }; |
179 | 179 | ||
180 | static char *cpuid_devnode(struct device *dev, mode_t *mode) | 180 | static char *cpuid_devnode(struct device *dev, umode_t *mode) |
181 | { | 181 | { |
182 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); | 182 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); |
183 | } | 183 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1aae78f775fc..4025fe4f928f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -252,7 +252,8 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
252 | unsigned short ss; | 252 | unsigned short ss; |
253 | unsigned long sp; | 253 | unsigned long sp; |
254 | #endif | 254 | #endif |
255 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | 255 | printk(KERN_DEFAULT |
256 | "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
256 | #ifdef CONFIG_PREEMPT | 257 | #ifdef CONFIG_PREEMPT |
257 | printk("PREEMPT "); | 258 | printk("PREEMPT "); |
258 | #endif | 259 | #endif |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 3b97a80ce329..c99f9ed013d5 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -116,16 +116,16 @@ void show_registers(struct pt_regs *regs) | |||
116 | for (i = 0; i < code_len; i++, ip++) { | 116 | for (i = 0; i < code_len; i++, ip++) { |
117 | if (ip < (u8 *)PAGE_OFFSET || | 117 | if (ip < (u8 *)PAGE_OFFSET || |
118 | probe_kernel_address(ip, c)) { | 118 | probe_kernel_address(ip, c)) { |
119 | printk(" Bad EIP value."); | 119 | printk(KERN_CONT " Bad EIP value."); |
120 | break; | 120 | break; |
121 | } | 121 | } |
122 | if (ip == (u8 *)regs->ip) | 122 | if (ip == (u8 *)regs->ip) |
123 | printk("<%02x> ", c); | 123 | printk(KERN_CONT "<%02x> ", c); |
124 | else | 124 | else |
125 | printk("%02x ", c); | 125 | printk(KERN_CONT "%02x ", c); |
126 | } | 126 | } |
127 | } | 127 | } |
128 | printk("\n"); | 128 | printk(KERN_CONT "\n"); |
129 | } | 129 | } |
130 | 130 | ||
131 | int is_valid_bugaddr(unsigned long ip) | 131 | int is_valid_bugaddr(unsigned long ip) |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 19853ad8afc5..17107bd6e1f0 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -129,7 +129,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
129 | if (!stack) { | 129 | if (!stack) { |
130 | if (regs) | 130 | if (regs) |
131 | stack = (unsigned long *)regs->sp; | 131 | stack = (unsigned long *)regs->sp; |
132 | else if (task && task != current) | 132 | else if (task != current) |
133 | stack = (unsigned long *)task->thread.sp; | 133 | stack = (unsigned long *)task->thread.sp; |
134 | else | 134 | else |
135 | stack = &dummy; | 135 | stack = &dummy; |
@@ -269,11 +269,11 @@ void show_registers(struct pt_regs *regs) | |||
269 | unsigned char c; | 269 | unsigned char c; |
270 | u8 *ip; | 270 | u8 *ip; |
271 | 271 | ||
272 | printk(KERN_EMERG "Stack:\n"); | 272 | printk(KERN_DEFAULT "Stack:\n"); |
273 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 273 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
274 | 0, KERN_EMERG); | 274 | 0, KERN_DEFAULT); |
275 | 275 | ||
276 | printk(KERN_EMERG "Code: "); | 276 | printk(KERN_DEFAULT "Code: "); |
277 | 277 | ||
278 | ip = (u8 *)regs->ip - code_prologue; | 278 | ip = (u8 *)regs->ip - code_prologue; |
279 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | 279 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
@@ -284,16 +284,16 @@ void show_registers(struct pt_regs *regs) | |||
284 | for (i = 0; i < code_len; i++, ip++) { | 284 | for (i = 0; i < code_len; i++, ip++) { |
285 | if (ip < (u8 *)PAGE_OFFSET || | 285 | if (ip < (u8 *)PAGE_OFFSET || |
286 | probe_kernel_address(ip, c)) { | 286 | probe_kernel_address(ip, c)) { |
287 | printk(" Bad RIP value."); | 287 | printk(KERN_CONT " Bad RIP value."); |
288 | break; | 288 | break; |
289 | } | 289 | } |
290 | if (ip == (u8 *)regs->ip) | 290 | if (ip == (u8 *)regs->ip) |
291 | printk("<%02x> ", c); | 291 | printk(KERN_CONT "<%02x> ", c); |
292 | else | 292 | else |
293 | printk("%02x ", c); | 293 | printk(KERN_CONT "%02x ", c); |
294 | } | 294 | } |
295 | } | 295 | } |
296 | printk("\n"); | 296 | printk(KERN_CONT "\n"); |
297 | } | 297 | } |
298 | 298 | ||
299 | int is_valid_bugaddr(unsigned long ip) | 299 | int is_valid_bugaddr(unsigned long ip) |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 303a0e48f076..62d61e9976eb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/acpi.h> | 19 | #include <linux/acpi.h> |
20 | #include <linux/firmware-map.h> | 20 | #include <linux/firmware-map.h> |
21 | #include <linux/memblock.h> | 21 | #include <linux/memblock.h> |
22 | #include <linux/sort.h> | ||
22 | 23 | ||
23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
24 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
@@ -227,22 +228,38 @@ void __init e820_print_map(char *who) | |||
227 | * ____________________33__ | 228 | * ____________________33__ |
228 | * ______________________4_ | 229 | * ______________________4_ |
229 | */ | 230 | */ |
231 | struct change_member { | ||
232 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
233 | unsigned long long addr; /* address for this change point */ | ||
234 | }; | ||
235 | |||
236 | static int __init cpcompare(const void *a, const void *b) | ||
237 | { | ||
238 | struct change_member * const *app = a, * const *bpp = b; | ||
239 | const struct change_member *ap = *app, *bp = *bpp; | ||
240 | |||
241 | /* | ||
242 | * Inputs are pointers to two elements of change_point[]. If their | ||
243 | * addresses are unequal, their difference dominates. If the addresses | ||
244 | * are equal, then consider one that represents the end of its region | ||
245 | * to be greater than one that does not. | ||
246 | */ | ||
247 | if (ap->addr != bp->addr) | ||
248 | return ap->addr > bp->addr ? 1 : -1; | ||
249 | |||
250 | return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); | ||
251 | } | ||
230 | 252 | ||
231 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | 253 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, |
232 | u32 *pnr_map) | 254 | u32 *pnr_map) |
233 | { | 255 | { |
234 | struct change_member { | ||
235 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
236 | unsigned long long addr; /* address for this change point */ | ||
237 | }; | ||
238 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; | 256 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; |
239 | static struct change_member *change_point[2*E820_X_MAX] __initdata; | 257 | static struct change_member *change_point[2*E820_X_MAX] __initdata; |
240 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; | 258 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; |
241 | static struct e820entry new_bios[E820_X_MAX] __initdata; | 259 | static struct e820entry new_bios[E820_X_MAX] __initdata; |
242 | struct change_member *change_tmp; | ||
243 | unsigned long current_type, last_type; | 260 | unsigned long current_type, last_type; |
244 | unsigned long long last_addr; | 261 | unsigned long long last_addr; |
245 | int chgidx, still_changing; | 262 | int chgidx; |
246 | int overlap_entries; | 263 | int overlap_entries; |
247 | int new_bios_entry; | 264 | int new_bios_entry; |
248 | int old_nr, new_nr, chg_nr; | 265 | int old_nr, new_nr, chg_nr; |
@@ -279,35 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | |||
279 | chg_nr = chgidx; | 296 | chg_nr = chgidx; |
280 | 297 | ||
281 | /* sort change-point list by memory addresses (low -> high) */ | 298 | /* sort change-point list by memory addresses (low -> high) */ |
282 | still_changing = 1; | 299 | sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); |
283 | while (still_changing) { | ||
284 | still_changing = 0; | ||
285 | for (i = 1; i < chg_nr; i++) { | ||
286 | unsigned long long curaddr, lastaddr; | ||
287 | unsigned long long curpbaddr, lastpbaddr; | ||
288 | |||
289 | curaddr = change_point[i]->addr; | ||
290 | lastaddr = change_point[i - 1]->addr; | ||
291 | curpbaddr = change_point[i]->pbios->addr; | ||
292 | lastpbaddr = change_point[i - 1]->pbios->addr; | ||
293 | |||
294 | /* | ||
295 | * swap entries, when: | ||
296 | * | ||
297 | * curaddr > lastaddr or | ||
298 | * curaddr == lastaddr and curaddr == curpbaddr and | ||
299 | * lastaddr != lastpbaddr | ||
300 | */ | ||
301 | if (curaddr < lastaddr || | ||
302 | (curaddr == lastaddr && curaddr == curpbaddr && | ||
303 | lastaddr != lastpbaddr)) { | ||
304 | change_tmp = change_point[i]; | ||
305 | change_point[i] = change_point[i-1]; | ||
306 | change_point[i-1] = change_tmp; | ||
307 | still_changing = 1; | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | 300 | ||
312 | /* create a new bios memory map, removing overlaps */ | 301 | /* create a new bios memory map, removing overlaps */ |
313 | overlap_entries = 0; /* number of entries in the overlap table */ | 302 | overlap_entries = 0; /* number of entries in the overlap table */ |
@@ -714,7 +703,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn) | |||
714 | } | 703 | } |
715 | #endif | 704 | #endif |
716 | 705 | ||
717 | #ifdef CONFIG_HIBERNATION | 706 | #ifdef CONFIG_ACPI |
718 | /** | 707 | /** |
719 | * Mark ACPI NVS memory region, so that we can save/restore it during | 708 | * Mark ACPI NVS memory region, so that we can save/restore it during |
720 | * hibernation and the subsequent resume. | 709 | * hibernation and the subsequent resume. |
@@ -727,7 +716,7 @@ static int __init e820_mark_nvs_memory(void) | |||
727 | struct e820entry *ei = &e820.map[i]; | 716 | struct e820entry *ei = &e820.map[i]; |
728 | 717 | ||
729 | if (ei->type == E820_NVS) | 718 | if (ei->type == E820_NVS) |
730 | suspend_nvs_register(ei->addr, ei->size); | 719 | acpi_nvs_register(ei->addr, ei->size); |
731 | } | 720 | } |
732 | 721 | ||
733 | return 0; | 722 | return 0; |
@@ -738,35 +727,17 @@ core_initcall(e820_mark_nvs_memory); | |||
738 | /* | 727 | /* |
739 | * pre allocated 4k and reserved it in memblock and e820_saved | 728 | * pre allocated 4k and reserved it in memblock and e820_saved |
740 | */ | 729 | */ |
741 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | 730 | u64 __init early_reserve_e820(u64 size, u64 align) |
742 | { | 731 | { |
743 | u64 size = 0; | ||
744 | u64 addr; | 732 | u64 addr; |
745 | u64 start; | ||
746 | 733 | ||
747 | for (start = startt; ; start += size) { | 734 | addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); |
748 | start = memblock_x86_find_in_range_size(start, &size, align); | 735 | if (addr) { |
749 | if (start == MEMBLOCK_ERROR) | 736 | e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); |
750 | return 0; | 737 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); |
751 | if (size >= sizet) | 738 | update_e820_saved(); |
752 | break; | ||
753 | } | 739 | } |
754 | 740 | ||
755 | #ifdef CONFIG_X86_32 | ||
756 | if (start >= MAXMEM) | ||
757 | return 0; | ||
758 | if (start + size > MAXMEM) | ||
759 | size = MAXMEM - start; | ||
760 | #endif | ||
761 | |||
762 | addr = round_down(start + size - sizet, align); | ||
763 | if (addr < start) | ||
764 | return 0; | ||
765 | memblock_x86_reserve_range(addr, addr + sizet, "new next"); | ||
766 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); | ||
767 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); | ||
768 | update_e820_saved(); | ||
769 | |||
770 | return addr; | 741 | return addr; |
771 | } | 742 | } |
772 | 743 | ||
@@ -1090,7 +1061,7 @@ void __init memblock_x86_fill(void) | |||
1090 | * We are safe to enable resizing, beause memblock_x86_fill() | 1061 | * We are safe to enable resizing, beause memblock_x86_fill() |
1091 | * is rather later for x86 | 1062 | * is rather later for x86 |
1092 | */ | 1063 | */ |
1093 | memblock_can_resize = 1; | 1064 | memblock_allow_resize(); |
1094 | 1065 | ||
1095 | for (i = 0; i < e820.nr_map; i++) { | 1066 | for (i = 0; i < e820.nr_map; i++) { |
1096 | struct e820entry *ei = &e820.map[i]; | 1067 | struct e820entry *ei = &e820.map[i]; |
@@ -1105,22 +1076,36 @@ void __init memblock_x86_fill(void) | |||
1105 | memblock_add(ei->addr, ei->size); | 1076 | memblock_add(ei->addr, ei->size); |
1106 | } | 1077 | } |
1107 | 1078 | ||
1108 | memblock_analyze(); | ||
1109 | memblock_dump_all(); | 1079 | memblock_dump_all(); |
1110 | } | 1080 | } |
1111 | 1081 | ||
1112 | void __init memblock_find_dma_reserve(void) | 1082 | void __init memblock_find_dma_reserve(void) |
1113 | { | 1083 | { |
1114 | #ifdef CONFIG_X86_64 | 1084 | #ifdef CONFIG_X86_64 |
1115 | u64 free_size_pfn; | 1085 | u64 nr_pages = 0, nr_free_pages = 0; |
1116 | u64 mem_size_pfn; | 1086 | unsigned long start_pfn, end_pfn; |
1087 | phys_addr_t start, end; | ||
1088 | int i; | ||
1089 | u64 u; | ||
1090 | |||
1117 | /* | 1091 | /* |
1118 | * need to find out used area below MAX_DMA_PFN | 1092 | * need to find out used area below MAX_DMA_PFN |
1119 | * need to use memblock to get free size in [0, MAX_DMA_PFN] | 1093 | * need to use memblock to get free size in [0, MAX_DMA_PFN] |
1120 | * at first, and assume boot_mem will not take below MAX_DMA_PFN | 1094 | * at first, and assume boot_mem will not take below MAX_DMA_PFN |
1121 | */ | 1095 | */ |
1122 | mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | 1096 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
1123 | free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | 1097 | start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN); |
1124 | set_dma_reserve(mem_size_pfn - free_size_pfn); | 1098 | end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN); |
1099 | nr_pages += end_pfn - start_pfn; | ||
1100 | } | ||
1101 | |||
1102 | for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { | ||
1103 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); | ||
1104 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); | ||
1105 | if (start_pfn < end_pfn) | ||
1106 | nr_free_pages += end_pfn - start_pfn; | ||
1107 | } | ||
1108 | |||
1109 | set_dma_reserve(nr_pages - nr_free_pages); | ||
1125 | #endif | 1110 | #endif |
1126 | } | 1111 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a350f7f9..9b9f18b49918 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -240,14 +240,14 @@ static int __init setup_early_printk(char *buf) | |||
240 | if (!strncmp(buf, "xen", 3)) | 240 | if (!strncmp(buf, "xen", 3)) |
241 | early_console_register(&xenboot_console, keep); | 241 | early_console_register(&xenboot_console, keep); |
242 | #endif | 242 | #endif |
243 | #ifdef CONFIG_EARLY_PRINTK_MRST | 243 | #ifdef CONFIG_EARLY_PRINTK_INTEL_MID |
244 | if (!strncmp(buf, "mrst", 4)) { | 244 | if (!strncmp(buf, "mrst", 4)) { |
245 | mrst_early_console_init(); | 245 | mrst_early_console_init(); |
246 | early_console_register(&early_mrst_console, keep); | 246 | early_console_register(&early_mrst_console, keep); |
247 | } | 247 | } |
248 | 248 | ||
249 | if (!strncmp(buf, "hsu", 3)) { | 249 | if (!strncmp(buf, "hsu", 3)) { |
250 | hsu_early_console_init(); | 250 | hsu_early_console_init(buf + 3); |
251 | early_console_register(&early_hsu_console, keep); | 251 | early_console_register(&early_hsu_console, keep); |
252 | } | 252 | } |
253 | #endif | 253 | #endif |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f5344001..79d97e68f042 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -42,6 +42,7 @@ | |||
42 | */ | 42 | */ |
43 | 43 | ||
44 | #include <linux/linkage.h> | 44 | #include <linux/linkage.h> |
45 | #include <linux/err.h> | ||
45 | #include <asm/thread_info.h> | 46 | #include <asm/thread_info.h> |
46 | #include <asm/irqflags.h> | 47 | #include <asm/irqflags.h> |
47 | #include <asm/errno.h> | 48 | #include <asm/errno.h> |
@@ -81,8 +82,6 @@ | |||
81 | * enough to patch inline, increasing performance. | 82 | * enough to patch inline, increasing performance. |
82 | */ | 83 | */ |
83 | 84 | ||
84 | #define nr_syscalls ((syscall_table_size)/4) | ||
85 | |||
86 | #ifdef CONFIG_PREEMPT | 85 | #ifdef CONFIG_PREEMPT |
87 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 86 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
88 | #else | 87 | #else |
@@ -423,7 +422,7 @@ sysenter_past_esp: | |||
423 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 422 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
424 | jnz sysenter_audit | 423 | jnz sysenter_audit |
425 | sysenter_do_call: | 424 | sysenter_do_call: |
426 | cmpl $(nr_syscalls), %eax | 425 | cmpl $(NR_syscalls), %eax |
427 | jae syscall_badsys | 426 | jae syscall_badsys |
428 | call *sys_call_table(,%eax,4) | 427 | call *sys_call_table(,%eax,4) |
429 | movl %eax,PT_EAX(%esp) | 428 | movl %eax,PT_EAX(%esp) |
@@ -455,7 +454,7 @@ sysenter_audit: | |||
455 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | 454 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ |
456 | movl %eax,%edx /* 2nd arg: syscall number */ | 455 | movl %eax,%edx /* 2nd arg: syscall number */ |
457 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | 456 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ |
458 | call audit_syscall_entry | 457 | call __audit_syscall_entry |
459 | pushl_cfi %ebx | 458 | pushl_cfi %ebx |
460 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 459 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
461 | jmp sysenter_do_call | 460 | jmp sysenter_do_call |
@@ -466,11 +465,10 @@ sysexit_audit: | |||
466 | TRACE_IRQS_ON | 465 | TRACE_IRQS_ON |
467 | ENABLE_INTERRUPTS(CLBR_ANY) | 466 | ENABLE_INTERRUPTS(CLBR_ANY) |
468 | movl %eax,%edx /* second arg, syscall return value */ | 467 | movl %eax,%edx /* second arg, syscall return value */ |
469 | cmpl $0,%eax /* is it < 0? */ | 468 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
470 | setl %al /* 1 if so, 0 if not */ | 469 | setbe %al /* 1 if so, 0 if not */ |
471 | movzbl %al,%eax /* zero-extend that */ | 470 | movzbl %al,%eax /* zero-extend that */ |
472 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 471 | call __audit_syscall_exit |
473 | call audit_syscall_exit | ||
474 | DISABLE_INTERRUPTS(CLBR_ANY) | 472 | DISABLE_INTERRUPTS(CLBR_ANY) |
475 | TRACE_IRQS_OFF | 473 | TRACE_IRQS_OFF |
476 | movl TI_flags(%ebp), %ecx | 474 | movl TI_flags(%ebp), %ecx |
@@ -504,7 +502,7 @@ ENTRY(system_call) | |||
504 | # system call tracing in operation / emulation | 502 | # system call tracing in operation / emulation |
505 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 503 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
506 | jnz syscall_trace_entry | 504 | jnz syscall_trace_entry |
507 | cmpl $(nr_syscalls), %eax | 505 | cmpl $(NR_syscalls), %eax |
508 | jae syscall_badsys | 506 | jae syscall_badsys |
509 | syscall_call: | 507 | syscall_call: |
510 | call *sys_call_table(,%eax,4) | 508 | call *sys_call_table(,%eax,4) |
@@ -625,6 +623,8 @@ work_notifysig: # deal with pending signals and | |||
625 | movl %esp, %eax | 623 | movl %esp, %eax |
626 | jne work_notifysig_v86 # returning to kernel-space or | 624 | jne work_notifysig_v86 # returning to kernel-space or |
627 | # vm86-space | 625 | # vm86-space |
626 | TRACE_IRQS_ON | ||
627 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
628 | xorl %edx, %edx | 628 | xorl %edx, %edx |
629 | call do_notify_resume | 629 | call do_notify_resume |
630 | jmp resume_userspace_sig | 630 | jmp resume_userspace_sig |
@@ -638,6 +638,8 @@ work_notifysig_v86: | |||
638 | #else | 638 | #else |
639 | movl %esp, %eax | 639 | movl %esp, %eax |
640 | #endif | 640 | #endif |
641 | TRACE_IRQS_ON | ||
642 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
641 | xorl %edx, %edx | 643 | xorl %edx, %edx |
642 | call do_notify_resume | 644 | call do_notify_resume |
643 | jmp resume_userspace_sig | 645 | jmp resume_userspace_sig |
@@ -650,7 +652,7 @@ syscall_trace_entry: | |||
650 | movl %esp, %eax | 652 | movl %esp, %eax |
651 | call syscall_trace_enter | 653 | call syscall_trace_enter |
652 | /* What it returned is what we'll actually use. */ | 654 | /* What it returned is what we'll actually use. */ |
653 | cmpl $(nr_syscalls), %eax | 655 | cmpl $(NR_syscalls), %eax |
654 | jnae syscall_call | 656 | jnae syscall_call |
655 | jmp syscall_exit | 657 | jmp syscall_exit |
656 | END(syscall_trace_entry) | 658 | END(syscall_trace_entry) |
@@ -690,29 +692,28 @@ END(syscall_badsys) | |||
690 | * System calls that need a pt_regs pointer. | 692 | * System calls that need a pt_regs pointer. |
691 | */ | 693 | */ |
692 | #define PTREGSCALL0(name) \ | 694 | #define PTREGSCALL0(name) \ |
693 | ALIGN; \ | 695 | ENTRY(ptregs_##name) ; \ |
694 | ptregs_##name: \ | ||
695 | leal 4(%esp),%eax; \ | 696 | leal 4(%esp),%eax; \ |
696 | jmp sys_##name; | 697 | jmp sys_##name; \ |
698 | ENDPROC(ptregs_##name) | ||
697 | 699 | ||
698 | #define PTREGSCALL1(name) \ | 700 | #define PTREGSCALL1(name) \ |
699 | ALIGN; \ | 701 | ENTRY(ptregs_##name) ; \ |
700 | ptregs_##name: \ | ||
701 | leal 4(%esp),%edx; \ | 702 | leal 4(%esp),%edx; \ |
702 | movl (PT_EBX+4)(%esp),%eax; \ | 703 | movl (PT_EBX+4)(%esp),%eax; \ |
703 | jmp sys_##name; | 704 | jmp sys_##name; \ |
705 | ENDPROC(ptregs_##name) | ||
704 | 706 | ||
705 | #define PTREGSCALL2(name) \ | 707 | #define PTREGSCALL2(name) \ |
706 | ALIGN; \ | 708 | ENTRY(ptregs_##name) ; \ |
707 | ptregs_##name: \ | ||
708 | leal 4(%esp),%ecx; \ | 709 | leal 4(%esp),%ecx; \ |
709 | movl (PT_ECX+4)(%esp),%edx; \ | 710 | movl (PT_ECX+4)(%esp),%edx; \ |
710 | movl (PT_EBX+4)(%esp),%eax; \ | 711 | movl (PT_EBX+4)(%esp),%eax; \ |
711 | jmp sys_##name; | 712 | jmp sys_##name; \ |
713 | ENDPROC(ptregs_##name) | ||
712 | 714 | ||
713 | #define PTREGSCALL3(name) \ | 715 | #define PTREGSCALL3(name) \ |
714 | ALIGN; \ | 716 | ENTRY(ptregs_##name) ; \ |
715 | ptregs_##name: \ | ||
716 | CFI_STARTPROC; \ | 717 | CFI_STARTPROC; \ |
717 | leal 4(%esp),%eax; \ | 718 | leal 4(%esp),%eax; \ |
718 | pushl_cfi %eax; \ | 719 | pushl_cfi %eax; \ |
@@ -737,8 +738,7 @@ PTREGSCALL2(vm86) | |||
737 | PTREGSCALL1(vm86old) | 738 | PTREGSCALL1(vm86old) |
738 | 739 | ||
739 | /* Clone is an oddball. The 4th arg is in %edi */ | 740 | /* Clone is an oddball. The 4th arg is in %edi */ |
740 | ALIGN; | 741 | ENTRY(ptregs_clone) |
741 | ptregs_clone: | ||
742 | CFI_STARTPROC | 742 | CFI_STARTPROC |
743 | leal 4(%esp),%eax | 743 | leal 4(%esp),%eax |
744 | pushl_cfi %eax | 744 | pushl_cfi %eax |
@@ -1209,11 +1209,6 @@ return_to_handler: | |||
1209 | jmp *%ecx | 1209 | jmp *%ecx |
1210 | #endif | 1210 | #endif |
1211 | 1211 | ||
1212 | .section .rodata,"a" | ||
1213 | #include "syscall_table_32.S" | ||
1214 | |||
1215 | syscall_table_size=(.-sys_call_table) | ||
1216 | |||
1217 | /* | 1212 | /* |
1218 | * Some functions should be protected against kprobes | 1213 | * Some functions should be protected against kprobes |
1219 | */ | 1214 | */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0b..1333d9851778 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <linux/err.h> | ||
58 | 59 | ||
59 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 60 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
60 | #include <linux/elf-em.h> | 61 | #include <linux/elf-em.h> |
@@ -221,7 +222,7 @@ ENDPROC(native_usergs_sysret64) | |||
221 | /*CFI_REL_OFFSET ss,0*/ | 222 | /*CFI_REL_OFFSET ss,0*/ |
222 | pushq_cfi %rax /* rsp */ | 223 | pushq_cfi %rax /* rsp */ |
223 | CFI_REL_OFFSET rsp,0 | 224 | CFI_REL_OFFSET rsp,0 |
224 | pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ | 225 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ |
225 | /*CFI_REL_OFFSET rflags,0*/ | 226 | /*CFI_REL_OFFSET rflags,0*/ |
226 | pushq_cfi $__KERNEL_CS /* cs */ | 227 | pushq_cfi $__KERNEL_CS /* cs */ |
227 | /*CFI_REL_OFFSET cs,0*/ | 228 | /*CFI_REL_OFFSET cs,0*/ |
@@ -411,7 +412,7 @@ ENTRY(ret_from_fork) | |||
411 | RESTORE_REST | 412 | RESTORE_REST |
412 | 413 | ||
413 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 414 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
414 | je int_ret_from_sys_call | 415 | jz retint_restore_args |
415 | 416 | ||
416 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 417 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
417 | jnz int_ret_from_sys_call | 418 | jnz int_ret_from_sys_call |
@@ -465,7 +466,7 @@ ENTRY(system_call) | |||
465 | * after the swapgs, so that it can do the swapgs | 466 | * after the swapgs, so that it can do the swapgs |
466 | * for the guest and jump here on syscall. | 467 | * for the guest and jump here on syscall. |
467 | */ | 468 | */ |
468 | ENTRY(system_call_after_swapgs) | 469 | GLOBAL(system_call_after_swapgs) |
469 | 470 | ||
470 | movq %rsp,PER_CPU_VAR(old_rsp) | 471 | movq %rsp,PER_CPU_VAR(old_rsp) |
471 | movq PER_CPU_VAR(kernel_stack),%rsp | 472 | movq PER_CPU_VAR(kernel_stack),%rsp |
@@ -478,8 +479,7 @@ ENTRY(system_call_after_swapgs) | |||
478 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 479 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
479 | movq %rcx,RIP-ARGOFFSET(%rsp) | 480 | movq %rcx,RIP-ARGOFFSET(%rsp) |
480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 481 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
481 | GET_THREAD_INFO(%rcx) | 482 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
482 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) | ||
483 | jnz tracesys | 483 | jnz tracesys |
484 | system_call_fastpath: | 484 | system_call_fastpath: |
485 | cmpq $__NR_syscall_max,%rax | 485 | cmpq $__NR_syscall_max,%rax |
@@ -496,10 +496,9 @@ ret_from_sys_call: | |||
496 | /* edi: flagmask */ | 496 | /* edi: flagmask */ |
497 | sysret_check: | 497 | sysret_check: |
498 | LOCKDEP_SYS_EXIT | 498 | LOCKDEP_SYS_EXIT |
499 | GET_THREAD_INFO(%rcx) | ||
500 | DISABLE_INTERRUPTS(CLBR_NONE) | 499 | DISABLE_INTERRUPTS(CLBR_NONE) |
501 | TRACE_IRQS_OFF | 500 | TRACE_IRQS_OFF |
502 | movl TI_flags(%rcx),%edx | 501 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx |
503 | andl %edi,%edx | 502 | andl %edi,%edx |
504 | jnz sysret_careful | 503 | jnz sysret_careful |
505 | CFI_REMEMBER_STATE | 504 | CFI_REMEMBER_STATE |
@@ -550,7 +549,7 @@ badsys: | |||
550 | #ifdef CONFIG_AUDITSYSCALL | 549 | #ifdef CONFIG_AUDITSYSCALL |
551 | /* | 550 | /* |
552 | * Fast path for syscall audit without full syscall trace. | 551 | * Fast path for syscall audit without full syscall trace. |
553 | * We just call audit_syscall_entry() directly, and then | 552 | * We just call __audit_syscall_entry() directly, and then |
554 | * jump back to the normal fast path. | 553 | * jump back to the normal fast path. |
555 | */ | 554 | */ |
556 | auditsys: | 555 | auditsys: |
@@ -560,22 +559,21 @@ auditsys: | |||
560 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | 559 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ |
561 | movq %rax,%rsi /* 2nd arg: syscall number */ | 560 | movq %rax,%rsi /* 2nd arg: syscall number */ |
562 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | 561 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ |
563 | call audit_syscall_entry | 562 | call __audit_syscall_entry |
564 | LOAD_ARGS 0 /* reload call-clobbered registers */ | 563 | LOAD_ARGS 0 /* reload call-clobbered registers */ |
565 | jmp system_call_fastpath | 564 | jmp system_call_fastpath |
566 | 565 | ||
567 | /* | 566 | /* |
568 | * Return fast path for syscall audit. Call audit_syscall_exit() | 567 | * Return fast path for syscall audit. Call __audit_syscall_exit() |
569 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | 568 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT |
570 | * masked off. | 569 | * masked off. |
571 | */ | 570 | */ |
572 | sysret_audit: | 571 | sysret_audit: |
573 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | 572 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
574 | cmpq $0,%rsi /* is it < 0? */ | 573 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ |
575 | setl %al /* 1 if so, 0 if not */ | 574 | setbe %al /* 1 if so, 0 if not */ |
576 | movzbl %al,%edi /* zero-extend that into %edi */ | 575 | movzbl %al,%edi /* zero-extend that into %edi */ |
577 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 576 | call __audit_syscall_exit |
578 | call audit_syscall_exit | ||
579 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 577 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
580 | jmp sysret_check | 578 | jmp sysret_check |
581 | #endif /* CONFIG_AUDITSYSCALL */ | 579 | #endif /* CONFIG_AUDITSYSCALL */ |
@@ -583,7 +581,7 @@ sysret_audit: | |||
583 | /* Do syscall tracing */ | 581 | /* Do syscall tracing */ |
584 | tracesys: | 582 | tracesys: |
585 | #ifdef CONFIG_AUDITSYSCALL | 583 | #ifdef CONFIG_AUDITSYSCALL |
586 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 584 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
587 | jz auditsys | 585 | jz auditsys |
588 | #endif | 586 | #endif |
589 | SAVE_REST | 587 | SAVE_REST |
@@ -612,8 +610,6 @@ tracesys: | |||
612 | GLOBAL(int_ret_from_sys_call) | 610 | GLOBAL(int_ret_from_sys_call) |
613 | DISABLE_INTERRUPTS(CLBR_NONE) | 611 | DISABLE_INTERRUPTS(CLBR_NONE) |
614 | TRACE_IRQS_OFF | 612 | TRACE_IRQS_OFF |
615 | testl $3,CS-ARGOFFSET(%rsp) | ||
616 | je retint_restore_args | ||
617 | movl $_TIF_ALLWORK_MASK,%edi | 613 | movl $_TIF_ALLWORK_MASK,%edi |
618 | /* edi: mask to check */ | 614 | /* edi: mask to check */ |
619 | GLOBAL(int_with_check) | 615 | GLOBAL(int_with_check) |
@@ -953,6 +949,7 @@ END(common_interrupt) | |||
953 | ENTRY(\sym) | 949 | ENTRY(\sym) |
954 | INTR_FRAME | 950 | INTR_FRAME |
955 | pushq_cfi $~(\num) | 951 | pushq_cfi $~(\num) |
952 | .Lcommon_\sym: | ||
956 | interrupt \do_sym | 953 | interrupt \do_sym |
957 | jmp ret_from_intr | 954 | jmp ret_from_intr |
958 | CFI_ENDPROC | 955 | CFI_ENDPROC |
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ | |||
976 | x86_platform_ipi smp_x86_platform_ipi | 973 | x86_platform_ipi smp_x86_platform_ipi |
977 | 974 | ||
978 | #ifdef CONFIG_SMP | 975 | #ifdef CONFIG_SMP |
979 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | 976 | ALIGN |
977 | INTR_FRAME | ||
978 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
980 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | 979 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 |
981 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | 980 | .if NUM_INVALIDATE_TLB_VECTORS > \idx |
982 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ | 981 | ENTRY(invalidate_interrupt\idx) |
983 | invalidate_interrupt\idx smp_invalidate_interrupt | 982 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) |
983 | jmp .Lcommon_invalidate_interrupt0 | ||
984 | CFI_ADJUST_CFA_OFFSET -8 | ||
985 | END(invalidate_interrupt\idx) | ||
984 | .endif | 986 | .endif |
985 | .endr | 987 | .endr |
988 | CFI_ENDPROC | ||
989 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
990 | invalidate_interrupt0, smp_invalidate_interrupt | ||
986 | #endif | 991 | #endif |
987 | 992 | ||
988 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 993 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
@@ -1475,62 +1480,221 @@ ENTRY(error_exit) | |||
1475 | CFI_ENDPROC | 1480 | CFI_ENDPROC |
1476 | END(error_exit) | 1481 | END(error_exit) |
1477 | 1482 | ||
1483 | /* | ||
1484 | * Test if a given stack is an NMI stack or not. | ||
1485 | */ | ||
1486 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1487 | cmpq %\reg, \stack | ||
1488 | ja \normal_ret | ||
1489 | subq $EXCEPTION_STKSZ, %\reg | ||
1490 | cmpq %\reg, \stack | ||
1491 | jb \normal_ret | ||
1492 | jmp \nmi_ret | ||
1493 | .endm | ||
1478 | 1494 | ||
1479 | /* runs on exception stack */ | 1495 | /* runs on exception stack */ |
1480 | ENTRY(nmi) | 1496 | ENTRY(nmi) |
1481 | INTR_FRAME | 1497 | INTR_FRAME |
1482 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1498 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1483 | pushq_cfi $-1 | 1499 | /* |
1500 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
1501 | * the iretq it performs will take us out of NMI context. | ||
1502 | * This means that we can have nested NMIs where the next | ||
1503 | * NMI is using the top of the stack of the previous NMI. We | ||
1504 | * can't let it execute because the nested NMI will corrupt the | ||
1505 | * stack of the previous NMI. NMI handlers are not re-entrant | ||
1506 | * anyway. | ||
1507 | * | ||
1508 | * To handle this case we do the following: | ||
1509 | * Check the a special location on the stack that contains | ||
1510 | * a variable that is set when NMIs are executing. | ||
1511 | * The interrupted task's stack is also checked to see if it | ||
1512 | * is an NMI stack. | ||
1513 | * If the variable is not set and the stack is not the NMI | ||
1514 | * stack then: | ||
1515 | * o Set the special variable on the stack | ||
1516 | * o Copy the interrupt frame into a "saved" location on the stack | ||
1517 | * o Copy the interrupt frame into a "copy" location on the stack | ||
1518 | * o Continue processing the NMI | ||
1519 | * If the variable is set or the previous stack is the NMI stack: | ||
1520 | * o Modify the "copy" location to jump to the repeate_nmi | ||
1521 | * o return back to the first NMI | ||
1522 | * | ||
1523 | * Now on exit of the first NMI, we first clear the stack variable | ||
1524 | * The NMI stack will tell any nested NMIs at that point that it is | ||
1525 | * nested. Then we pop the stack normally with iret, and if there was | ||
1526 | * a nested NMI that updated the copy interrupt stack frame, a | ||
1527 | * jump will be made to the repeat_nmi code that will handle the second | ||
1528 | * NMI. | ||
1529 | */ | ||
1530 | |||
1531 | /* Use %rdx as out temp variable throughout */ | ||
1532 | pushq_cfi %rdx | ||
1533 | |||
1534 | /* | ||
1535 | * If %cs was not the kernel segment, then the NMI triggered in user | ||
1536 | * space, which means it is definitely not nested. | ||
1537 | */ | ||
1538 | cmpl $__KERNEL_CS, 16(%rsp) | ||
1539 | jne first_nmi | ||
1540 | |||
1541 | /* | ||
1542 | * Check the special variable on the stack to see if NMIs are | ||
1543 | * executing. | ||
1544 | */ | ||
1545 | cmpl $1, -8(%rsp) | ||
1546 | je nested_nmi | ||
1547 | |||
1548 | /* | ||
1549 | * Now test if the previous stack was an NMI stack. | ||
1550 | * We need the double check. We check the NMI stack to satisfy the | ||
1551 | * race when the first NMI clears the variable before returning. | ||
1552 | * We check the variable because the first NMI could be in a | ||
1553 | * breakpoint routine using a breakpoint stack. | ||
1554 | */ | ||
1555 | lea 6*8(%rsp), %rdx | ||
1556 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | ||
1557 | |||
1558 | nested_nmi: | ||
1559 | /* | ||
1560 | * Do nothing if we interrupted the fixup in repeat_nmi. | ||
1561 | * It's about to repeat the NMI handler, so we are fine | ||
1562 | * with ignoring this one. | ||
1563 | */ | ||
1564 | movq $repeat_nmi, %rdx | ||
1565 | cmpq 8(%rsp), %rdx | ||
1566 | ja 1f | ||
1567 | movq $end_repeat_nmi, %rdx | ||
1568 | cmpq 8(%rsp), %rdx | ||
1569 | ja nested_nmi_out | ||
1570 | |||
1571 | 1: | ||
1572 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | ||
1573 | leaq -6*8(%rsp), %rdx | ||
1574 | movq %rdx, %rsp | ||
1575 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
1576 | pushq_cfi $__KERNEL_DS | ||
1577 | pushq_cfi %rdx | ||
1578 | pushfq_cfi | ||
1579 | pushq_cfi $__KERNEL_CS | ||
1580 | pushq_cfi $repeat_nmi | ||
1581 | |||
1582 | /* Put stack back */ | ||
1583 | addq $(11*8), %rsp | ||
1584 | CFI_ADJUST_CFA_OFFSET -11*8 | ||
1585 | |||
1586 | nested_nmi_out: | ||
1587 | popq_cfi %rdx | ||
1588 | |||
1589 | /* No need to check faults here */ | ||
1590 | INTERRUPT_RETURN | ||
1591 | |||
1592 | first_nmi: | ||
1593 | /* | ||
1594 | * Because nested NMIs will use the pushed location that we | ||
1595 | * stored in rdx, we must keep that space available. | ||
1596 | * Here's what our stack frame will look like: | ||
1597 | * +-------------------------+ | ||
1598 | * | original SS | | ||
1599 | * | original Return RSP | | ||
1600 | * | original RFLAGS | | ||
1601 | * | original CS | | ||
1602 | * | original RIP | | ||
1603 | * +-------------------------+ | ||
1604 | * | temp storage for rdx | | ||
1605 | * +-------------------------+ | ||
1606 | * | NMI executing variable | | ||
1607 | * +-------------------------+ | ||
1608 | * | Saved SS | | ||
1609 | * | Saved Return RSP | | ||
1610 | * | Saved RFLAGS | | ||
1611 | * | Saved CS | | ||
1612 | * | Saved RIP | | ||
1613 | * +-------------------------+ | ||
1614 | * | copied SS | | ||
1615 | * | copied Return RSP | | ||
1616 | * | copied RFLAGS | | ||
1617 | * | copied CS | | ||
1618 | * | copied RIP | | ||
1619 | * +-------------------------+ | ||
1620 | * | pt_regs | | ||
1621 | * +-------------------------+ | ||
1622 | * | ||
1623 | * The saved RIP is used to fix up the copied RIP that a nested | ||
1624 | * NMI may zero out. The original stack frame and the temp storage | ||
1625 | * is also used by nested NMIs and can not be trusted on exit. | ||
1626 | */ | ||
1627 | /* Set the NMI executing variable on the stack. */ | ||
1628 | pushq_cfi $1 | ||
1629 | |||
1630 | /* Copy the stack frame to the Saved frame */ | ||
1631 | .rept 5 | ||
1632 | pushq_cfi 6*8(%rsp) | ||
1633 | .endr | ||
1634 | |||
1635 | /* Make another copy, this one may be modified by nested NMIs */ | ||
1636 | .rept 5 | ||
1637 | pushq_cfi 4*8(%rsp) | ||
1638 | .endr | ||
1639 | |||
1640 | /* Do not pop rdx, nested NMIs will corrupt it */ | ||
1641 | movq 11*8(%rsp), %rdx | ||
1642 | |||
1643 | /* | ||
1644 | * Everything below this point can be preempted by a nested | ||
1645 | * NMI if the first NMI took an exception. Repeated NMIs | ||
1646 | * caused by an exception and nested NMI will start here, and | ||
1647 | * can still be preempted by another NMI. | ||
1648 | */ | ||
1649 | restart_nmi: | ||
1650 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1484 | subq $ORIG_RAX-R15, %rsp | 1651 | subq $ORIG_RAX-R15, %rsp |
1485 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1652 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1653 | /* | ||
1654 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | ||
1655 | * as we should not be calling schedule in NMI context. | ||
1656 | * Even with normal interrupts enabled. An NMI should not be | ||
1657 | * setting NEED_RESCHED or anything that normal interrupts and | ||
1658 | * exceptions might do. | ||
1659 | */ | ||
1486 | call save_paranoid | 1660 | call save_paranoid |
1487 | DEFAULT_FRAME 0 | 1661 | DEFAULT_FRAME 0 |
1488 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1662 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1489 | movq %rsp,%rdi | 1663 | movq %rsp,%rdi |
1490 | movq $-1,%rsi | 1664 | movq $-1,%rsi |
1491 | call do_nmi | 1665 | call do_nmi |
1492 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1493 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1494 | /* ebx: no swapgs flag */ | ||
1495 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1496 | testl %ebx,%ebx /* swapgs needed? */ | 1666 | testl %ebx,%ebx /* swapgs needed? */ |
1497 | jnz nmi_restore | 1667 | jnz nmi_restore |
1498 | testl $3,CS(%rsp) | ||
1499 | jnz nmi_userspace | ||
1500 | nmi_swapgs: | 1668 | nmi_swapgs: |
1501 | SWAPGS_UNSAFE_STACK | 1669 | SWAPGS_UNSAFE_STACK |
1502 | nmi_restore: | 1670 | nmi_restore: |
1503 | RESTORE_ALL 8 | 1671 | RESTORE_ALL 8 |
1672 | /* Clear the NMI executing stack variable */ | ||
1673 | movq $0, 10*8(%rsp) | ||
1504 | jmp irq_return | 1674 | jmp irq_return |
1505 | nmi_userspace: | ||
1506 | GET_THREAD_INFO(%rcx) | ||
1507 | movl TI_flags(%rcx),%ebx | ||
1508 | andl $_TIF_WORK_MASK,%ebx | ||
1509 | jz nmi_swapgs | ||
1510 | movq %rsp,%rdi /* &pt_regs */ | ||
1511 | call sync_regs | ||
1512 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1513 | testl $_TIF_NEED_RESCHED,%ebx | ||
1514 | jnz nmi_schedule | ||
1515 | movl %ebx,%edx /* arg3: thread flags */ | ||
1516 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1517 | xorl %esi,%esi /* arg2: oldset */ | ||
1518 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1519 | call do_notify_resume | ||
1520 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1521 | jmp nmi_userspace | ||
1522 | nmi_schedule: | ||
1523 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1524 | call schedule | ||
1525 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1526 | jmp nmi_userspace | ||
1527 | CFI_ENDPROC | 1675 | CFI_ENDPROC |
1528 | #else | ||
1529 | jmp paranoid_exit | ||
1530 | CFI_ENDPROC | ||
1531 | #endif | ||
1532 | END(nmi) | 1676 | END(nmi) |
1533 | 1677 | ||
1678 | /* | ||
1679 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1680 | * it can lose its NMI context, and a nested NMI may come in. | ||
1681 | * In that case, the nested NMI will change the preempted NMI's | ||
1682 | * stack to jump to here when it does the final iret. | ||
1683 | */ | ||
1684 | repeat_nmi: | ||
1685 | INTR_FRAME | ||
1686 | /* Update the stack variable to say we are still in NMI */ | ||
1687 | movq $1, 5*8(%rsp) | ||
1688 | |||
1689 | /* copy the saved stack back to copy stack */ | ||
1690 | .rept 5 | ||
1691 | pushq_cfi 4*8(%rsp) | ||
1692 | .endr | ||
1693 | |||
1694 | jmp restart_nmi | ||
1695 | CFI_ENDPROC | ||
1696 | end_repeat_nmi: | ||
1697 | |||
1534 | ENTRY(ignore_sysret) | 1698 | ENTRY(ignore_sysret) |
1535 | CFI_STARTPROC | 1699 | CFI_STARTPROC |
1536 | mov $-ENOSYS,%eax | 1700 | mov $-ENOSYS,%eax |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index af0699ba48cf..48d9d4ea1020 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void) | |||
52 | lowmem = 0x9f000; | 52 | lowmem = 0x9f000; |
53 | 53 | ||
54 | /* reserve all memory between lowmem and the 1MB mark */ | 54 | /* reserve all memory between lowmem and the 1MB mark */ |
55 | memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); | 55 | memblock_reserve(lowmem, 0x100000 - lowmem); |
56 | } | 56 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3bb08509a7a1..51ff18616d50 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void) | |||
31 | 31 | ||
32 | void __init i386_start_kernel(void) | 32 | void __init i386_start_kernel(void) |
33 | { | 33 | { |
34 | memblock_init(); | 34 | memblock_reserve(__pa_symbol(&_text), |
35 | 35 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | |
36 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | ||
37 | 36 | ||
38 | #ifdef CONFIG_BLK_DEV_INITRD | 37 | #ifdef CONFIG_BLK_DEV_INITRD |
39 | /* Reserve INITRD */ | 38 | /* Reserve INITRD */ |
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void) | |||
42 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 41 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
43 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 42 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
44 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 43 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
45 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); | 44 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); |
46 | } | 45 | } |
47 | #endif | 46 | #endif |
48 | 47 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5655c2272adb..3a3b779f41d3 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
98 | { | 98 | { |
99 | copy_bootdata(__va(real_mode_data)); | 99 | copy_bootdata(__va(real_mode_data)); |
100 | 100 | ||
101 | memblock_init(); | 101 | memblock_reserve(__pa_symbol(&_text), |
102 | 102 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | |
103 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | ||
104 | 103 | ||
105 | #ifdef CONFIG_BLK_DEV_INITRD | 104 | #ifdef CONFIG_BLK_DEV_INITRD |
106 | /* Reserve INITRD */ | 105 | /* Reserve INITRD */ |
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
109 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 108 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
110 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 109 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; |
111 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 110 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
112 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); | 111 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); |
113 | } | 112 | } |
114 | #endif | 113 | #endif |
115 | 114 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e11e39478a49..40f4eb3766d1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -417,6 +417,10 @@ ENTRY(phys_base) | |||
417 | ENTRY(idt_table) | 417 | ENTRY(idt_table) |
418 | .skip IDT_ENTRIES * 16 | 418 | .skip IDT_ENTRIES * 16 |
419 | 419 | ||
420 | .align L1_CACHE_BYTES | ||
421 | ENTRY(nmi_idt_table) | ||
422 | .skip IDT_ENTRIES * 16 | ||
423 | |||
420 | __PAGE_ALIGNED_BSS | 424 | __PAGE_ALIGNED_BSS |
421 | .align PAGE_SIZE | 425 | .align PAGE_SIZE |
422 | ENTRY(empty_zero_page) | 426 | ENTRY(empty_zero_page) |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b946a9eac7d9..ad0de0c2714e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -2,7 +2,6 @@ | |||
2 | #include <linux/clockchips.h> | 2 | #include <linux/clockchips.h> |
3 | #include <linux/interrupt.h> | 3 | #include <linux/interrupt.h> |
4 | #include <linux/export.h> | 4 | #include <linux/export.h> |
5 | #include <linux/sysdev.h> | ||
6 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
7 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
8 | #include <linux/i8253.h> | 7 | #include <linux/i8253.h> |
@@ -32,8 +31,6 @@ | |||
32 | #define HPET_MIN_CYCLES 128 | 31 | #define HPET_MIN_CYCLES 128 |
33 | #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) | 32 | #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) |
34 | 33 | ||
35 | #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) | ||
36 | |||
37 | /* | 34 | /* |
38 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 35 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
39 | */ | 36 | */ |
@@ -55,6 +52,11 @@ struct hpet_dev { | |||
55 | char name[10]; | 52 | char name[10]; |
56 | }; | 53 | }; |
57 | 54 | ||
55 | inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) | ||
56 | { | ||
57 | return container_of(evtdev, struct hpet_dev, evt); | ||
58 | } | ||
59 | |||
58 | inline unsigned int hpet_readl(unsigned int a) | 60 | inline unsigned int hpet_readl(unsigned int a) |
59 | { | 61 | { |
60 | return readl(hpet_virt_address + a); | 62 | return readl(hpet_virt_address + a); |
@@ -1049,6 +1051,14 @@ int hpet_rtc_timer_init(void) | |||
1049 | } | 1051 | } |
1050 | EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); | 1052 | EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); |
1051 | 1053 | ||
1054 | static void hpet_disable_rtc_channel(void) | ||
1055 | { | ||
1056 | unsigned long cfg; | ||
1057 | cfg = hpet_readl(HPET_T1_CFG); | ||
1058 | cfg &= ~HPET_TN_ENABLE; | ||
1059 | hpet_writel(cfg, HPET_T1_CFG); | ||
1060 | } | ||
1061 | |||
1052 | /* | 1062 | /* |
1053 | * The functions below are called from rtc driver. | 1063 | * The functions below are called from rtc driver. |
1054 | * Return 0 if HPET is not being used. | 1064 | * Return 0 if HPET is not being used. |
@@ -1060,6 +1070,9 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | |||
1060 | return 0; | 1070 | return 0; |
1061 | 1071 | ||
1062 | hpet_rtc_flags &= ~bit_mask; | 1072 | hpet_rtc_flags &= ~bit_mask; |
1073 | if (unlikely(!hpet_rtc_flags)) | ||
1074 | hpet_disable_rtc_channel(); | ||
1075 | |||
1063 | return 1; | 1076 | return 1; |
1064 | } | 1077 | } |
1065 | EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); | 1078 | EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); |
@@ -1125,15 +1138,11 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); | |||
1125 | 1138 | ||
1126 | static void hpet_rtc_timer_reinit(void) | 1139 | static void hpet_rtc_timer_reinit(void) |
1127 | { | 1140 | { |
1128 | unsigned int cfg, delta; | 1141 | unsigned int delta; |
1129 | int lost_ints = -1; | 1142 | int lost_ints = -1; |
1130 | 1143 | ||
1131 | if (unlikely(!hpet_rtc_flags)) { | 1144 | if (unlikely(!hpet_rtc_flags)) |
1132 | cfg = hpet_readl(HPET_T1_CFG); | 1145 | hpet_disable_rtc_channel(); |
1133 | cfg &= ~HPET_TN_ENABLE; | ||
1134 | hpet_writel(cfg, HPET_T1_CFG); | ||
1135 | return; | ||
1136 | } | ||
1137 | 1146 | ||
1138 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | 1147 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) |
1139 | delta = hpet_default_delta; | 1148 | delta = hpet_default_delta; |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 429e0c92924e..7943e0c21bde 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -74,6 +74,10 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
74 | for_each_online_cpu(j) | 74 | for_each_online_cpu(j) |
75 | seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); | 75 | seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); |
76 | seq_printf(p, " IRQ work interrupts\n"); | 76 | seq_printf(p, " IRQ work interrupts\n"); |
77 | seq_printf(p, "%*s: ", prec, "RTR"); | ||
78 | for_each_online_cpu(j) | ||
79 | seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); | ||
80 | seq_printf(p, " APIC ICR read retries\n"); | ||
77 | #endif | 81 | #endif |
78 | if (x86_platform_ipi_callback) { | 82 | if (x86_platform_ipi_callback) { |
79 | seq_printf(p, "%*s: ", prec, "PLT"); | 83 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -136,6 +140,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
136 | sum += irq_stats(cpu)->irq_spurious_count; | 140 | sum += irq_stats(cpu)->irq_spurious_count; |
137 | sum += irq_stats(cpu)->apic_perf_irqs; | 141 | sum += irq_stats(cpu)->apic_perf_irqs; |
138 | sum += irq_stats(cpu)->apic_irq_work_irqs; | 142 | sum += irq_stats(cpu)->apic_irq_work_irqs; |
143 | sum += irq_stats(cpu)->icr_read_retry_count; | ||
139 | #endif | 144 | #endif |
140 | if (x86_platform_ipi_callback) | 145 | if (x86_platform_ipi_callback) |
141 | sum += irq_stats(cpu)->x86_platform_ipis; | 146 | sum += irq_stats(cpu)->x86_platform_ipis; |
@@ -181,8 +186,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
181 | unsigned vector = ~regs->orig_ax; | 186 | unsigned vector = ~regs->orig_ax; |
182 | unsigned irq; | 187 | unsigned irq; |
183 | 188 | ||
184 | exit_idle(); | ||
185 | irq_enter(); | 189 | irq_enter(); |
190 | exit_idle(); | ||
186 | 191 | ||
187 | irq = __this_cpu_read(vector_irq[vector]); | 192 | irq = __this_cpu_read(vector_irq[vector]); |
188 | 193 | ||
@@ -209,10 +214,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
209 | 214 | ||
210 | ack_APIC_irq(); | 215 | ack_APIC_irq(); |
211 | 216 | ||
212 | exit_idle(); | ||
213 | |||
214 | irq_enter(); | 217 | irq_enter(); |
215 | 218 | ||
219 | exit_idle(); | ||
220 | |||
216 | inc_irq_stat(x86_platform_ipis); | 221 | inc_irq_stat(x86_platform_ipis); |
217 | 222 | ||
218 | if (x86_platform_ipi_callback) | 223 | if (x86_platform_ipi_callback) |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 72090705a656..40fc86161d92 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs); | |||
28 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 28 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
29 | 29 | ||
30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
31 | |||
32 | int sysctl_panic_on_stackoverflow __read_mostly; | ||
33 | |||
31 | /* Debugging check for stack overflow: is there less than 1KB free? */ | 34 | /* Debugging check for stack overflow: is there less than 1KB free? */ |
32 | static int check_stack_overflow(void) | 35 | static int check_stack_overflow(void) |
33 | { | 36 | { |
@@ -43,6 +46,8 @@ static void print_stack_overflow(void) | |||
43 | { | 46 | { |
44 | printk(KERN_WARNING "low stack detected by irq handler\n"); | 47 | printk(KERN_WARNING "low stack detected by irq handler\n"); |
45 | dump_stack(); | 48 | dump_stack(); |
49 | if (sysctl_panic_on_stackoverflow) | ||
50 | panic("low stack detected by irq handler - check messages\n"); | ||
46 | } | 51 | } |
47 | 52 | ||
48 | #else | 53 | #else |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index acf8fbf8fbda..d04d3ecded62 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); | |||
26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | 26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); |
27 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 27 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
28 | 28 | ||
29 | int sysctl_panic_on_stackoverflow; | ||
30 | |||
29 | /* | 31 | /* |
30 | * Probabilistic stack overflow check: | 32 | * Probabilistic stack overflow check: |
31 | * | 33 | * |
@@ -36,15 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs); | |||
36 | static inline void stack_overflow_check(struct pt_regs *regs) | 38 | static inline void stack_overflow_check(struct pt_regs *regs) |
37 | { | 39 | { |
38 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 40 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
41 | #define STACK_TOP_MARGIN 128 | ||
42 | struct orig_ist *oist; | ||
43 | u64 irq_stack_top, irq_stack_bottom; | ||
44 | u64 estack_top, estack_bottom; | ||
39 | u64 curbase = (u64)task_stack_page(current); | 45 | u64 curbase = (u64)task_stack_page(current); |
40 | 46 | ||
41 | WARN_ONCE(regs->sp >= curbase && | 47 | if (user_mode_vm(regs)) |
42 | regs->sp <= curbase + THREAD_SIZE && | 48 | return; |
43 | regs->sp < curbase + sizeof(struct thread_info) + | 49 | |
44 | sizeof(struct pt_regs) + 128, | 50 | if (regs->sp >= curbase + sizeof(struct thread_info) + |
51 | sizeof(struct pt_regs) + STACK_TOP_MARGIN && | ||
52 | regs->sp <= curbase + THREAD_SIZE) | ||
53 | return; | ||
54 | |||
55 | irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + | ||
56 | STACK_TOP_MARGIN; | ||
57 | irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); | ||
58 | if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) | ||
59 | return; | ||
60 | |||
61 | oist = &__get_cpu_var(orig_ist); | ||
62 | estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; | ||
63 | estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; | ||
64 | if (regs->sp >= estack_top && regs->sp <= estack_bottom) | ||
65 | return; | ||
66 | |||
67 | WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", | ||
68 | current->comm, curbase, regs->sp, | ||
69 | irq_stack_top, irq_stack_bottom, | ||
70 | estack_top, estack_bottom); | ||
45 | 71 | ||
46 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 72 | if (sysctl_panic_on_stackoverflow) |
47 | current->comm, curbase, regs->sp); | 73 | panic("low stack detected by irq handler - check messages\n"); |
48 | #endif | 74 | #endif |
49 | } | 75 | } |
50 | 76 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b3300e6bacef..313fb5cddbce 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
12 | #include <linux/sysdev.h> | 12 | #include <linux/device.h> |
13 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
14 | #include <linux/acpi.h> | 14 | #include <linux/acpi.h> |
15 | #include <linux/io.h> | 15 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ea9d5f2f13ef..2889b3d43882 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
50 | put_online_cpus(); | 50 | put_online_cpus(); |
51 | } | 51 | } |
52 | 52 | ||
53 | void arch_jump_label_transform_static(struct jump_entry *entry, | 53 | __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, |
54 | enum jump_label_type type) | 54 | enum jump_label_type type) |
55 | { | 55 | { |
56 | __jump_label_transform(entry, type, text_poke_early); | 56 | __jump_label_transform(entry, type, text_poke_early); |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a9c2116001d6..f0c6fd6f176b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,8 +39,6 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | 41 | ||
42 | #define MMU_QUEUE_SIZE 1024 | ||
43 | |||
44 | static int kvmapf = 1; | 42 | static int kvmapf = 1; |
45 | 43 | ||
46 | static int parse_no_kvmapf(char *arg) | 44 | static int parse_no_kvmapf(char *arg) |
@@ -60,21 +58,10 @@ static int parse_no_stealacc(char *arg) | |||
60 | 58 | ||
61 | early_param("no-steal-acc", parse_no_stealacc); | 59 | early_param("no-steal-acc", parse_no_stealacc); |
62 | 60 | ||
63 | struct kvm_para_state { | ||
64 | u8 mmu_queue[MMU_QUEUE_SIZE]; | ||
65 | int mmu_queue_len; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | ||
69 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 61 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
70 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | 62 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
71 | static int has_steal_clock = 0; | 63 | static int has_steal_clock = 0; |
72 | 64 | ||
73 | static struct kvm_para_state *kvm_para_state(void) | ||
74 | { | ||
75 | return &per_cpu(para_state, raw_smp_processor_id()); | ||
76 | } | ||
77 | |||
78 | /* | 65 | /* |
79 | * No need for any "IO delay" on KVM | 66 | * No need for any "IO delay" on KVM |
80 | */ | 67 | */ |
@@ -271,151 +258,6 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
271 | } | 258 | } |
272 | } | 259 | } |
273 | 260 | ||
274 | static void kvm_mmu_op(void *buffer, unsigned len) | ||
275 | { | ||
276 | int r; | ||
277 | unsigned long a1, a2; | ||
278 | |||
279 | do { | ||
280 | a1 = __pa(buffer); | ||
281 | a2 = 0; /* on i386 __pa() always returns <4G */ | ||
282 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | ||
283 | buffer += r; | ||
284 | len -= r; | ||
285 | } while (len); | ||
286 | } | ||
287 | |||
288 | static void mmu_queue_flush(struct kvm_para_state *state) | ||
289 | { | ||
290 | if (state->mmu_queue_len) { | ||
291 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | ||
292 | state->mmu_queue_len = 0; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | static void kvm_deferred_mmu_op(void *buffer, int len) | ||
297 | { | ||
298 | struct kvm_para_state *state = kvm_para_state(); | ||
299 | |||
300 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { | ||
301 | kvm_mmu_op(buffer, len); | ||
302 | return; | ||
303 | } | ||
304 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | ||
305 | mmu_queue_flush(state); | ||
306 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | ||
307 | state->mmu_queue_len += len; | ||
308 | } | ||
309 | |||
310 | static void kvm_mmu_write(void *dest, u64 val) | ||
311 | { | ||
312 | __u64 pte_phys; | ||
313 | struct kvm_mmu_op_write_pte wpte; | ||
314 | |||
315 | #ifdef CONFIG_HIGHPTE | ||
316 | struct page *page; | ||
317 | unsigned long dst = (unsigned long) dest; | ||
318 | |||
319 | page = kmap_atomic_to_page(dest); | ||
320 | pte_phys = page_to_pfn(page); | ||
321 | pte_phys <<= PAGE_SHIFT; | ||
322 | pte_phys += (dst & ~(PAGE_MASK)); | ||
323 | #else | ||
324 | pte_phys = (unsigned long)__pa(dest); | ||
325 | #endif | ||
326 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | ||
327 | wpte.pte_val = val; | ||
328 | wpte.pte_phys = pte_phys; | ||
329 | |||
330 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * We only need to hook operations that are MMU writes. We hook these so that | ||
335 | * we can use lazy MMU mode to batch these operations. We could probably | ||
336 | * improve the performance of the host code if we used some of the information | ||
337 | * here to simplify processing of batched writes. | ||
338 | */ | ||
339 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | ||
340 | { | ||
341 | kvm_mmu_write(ptep, pte_val(pte)); | ||
342 | } | ||
343 | |||
344 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
345 | pte_t *ptep, pte_t pte) | ||
346 | { | ||
347 | kvm_mmu_write(ptep, pte_val(pte)); | ||
348 | } | ||
349 | |||
350 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
351 | { | ||
352 | kvm_mmu_write(pmdp, pmd_val(pmd)); | ||
353 | } | ||
354 | |||
355 | #if PAGETABLE_LEVELS >= 3 | ||
356 | #ifdef CONFIG_X86_PAE | ||
357 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
358 | { | ||
359 | kvm_mmu_write(ptep, pte_val(pte)); | ||
360 | } | ||
361 | |||
362 | static void kvm_pte_clear(struct mm_struct *mm, | ||
363 | unsigned long addr, pte_t *ptep) | ||
364 | { | ||
365 | kvm_mmu_write(ptep, 0); | ||
366 | } | ||
367 | |||
368 | static void kvm_pmd_clear(pmd_t *pmdp) | ||
369 | { | ||
370 | kvm_mmu_write(pmdp, 0); | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | ||
375 | { | ||
376 | kvm_mmu_write(pudp, pud_val(pud)); | ||
377 | } | ||
378 | |||
379 | #if PAGETABLE_LEVELS == 4 | ||
380 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
381 | { | ||
382 | kvm_mmu_write(pgdp, pgd_val(pgd)); | ||
383 | } | ||
384 | #endif | ||
385 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
386 | |||
387 | static void kvm_flush_tlb(void) | ||
388 | { | ||
389 | struct kvm_mmu_op_flush_tlb ftlb = { | ||
390 | .header.op = KVM_MMU_OP_FLUSH_TLB, | ||
391 | }; | ||
392 | |||
393 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | ||
394 | } | ||
395 | |||
396 | static void kvm_release_pt(unsigned long pfn) | ||
397 | { | ||
398 | struct kvm_mmu_op_release_pt rpt = { | ||
399 | .header.op = KVM_MMU_OP_RELEASE_PT, | ||
400 | .pt_phys = (u64)pfn << PAGE_SHIFT, | ||
401 | }; | ||
402 | |||
403 | kvm_mmu_op(&rpt, sizeof rpt); | ||
404 | } | ||
405 | |||
406 | static void kvm_enter_lazy_mmu(void) | ||
407 | { | ||
408 | paravirt_enter_lazy_mmu(); | ||
409 | } | ||
410 | |||
411 | static void kvm_leave_lazy_mmu(void) | ||
412 | { | ||
413 | struct kvm_para_state *state = kvm_para_state(); | ||
414 | |||
415 | mmu_queue_flush(state); | ||
416 | paravirt_leave_lazy_mmu(); | ||
417 | } | ||
418 | |||
419 | static void __init paravirt_ops_setup(void) | 261 | static void __init paravirt_ops_setup(void) |
420 | { | 262 | { |
421 | pv_info.name = "KVM"; | 263 | pv_info.name = "KVM"; |
@@ -424,29 +266,6 @@ static void __init paravirt_ops_setup(void) | |||
424 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 266 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
425 | pv_cpu_ops.io_delay = kvm_io_delay; | 267 | pv_cpu_ops.io_delay = kvm_io_delay; |
426 | 268 | ||
427 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | ||
428 | pv_mmu_ops.set_pte = kvm_set_pte; | ||
429 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | ||
430 | pv_mmu_ops.set_pmd = kvm_set_pmd; | ||
431 | #if PAGETABLE_LEVELS >= 3 | ||
432 | #ifdef CONFIG_X86_PAE | ||
433 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | ||
434 | pv_mmu_ops.pte_clear = kvm_pte_clear; | ||
435 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | ||
436 | #endif | ||
437 | pv_mmu_ops.set_pud = kvm_set_pud; | ||
438 | #if PAGETABLE_LEVELS == 4 | ||
439 | pv_mmu_ops.set_pgd = kvm_set_pgd; | ||
440 | #endif | ||
441 | #endif | ||
442 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | ||
443 | pv_mmu_ops.release_pte = kvm_release_pt; | ||
444 | pv_mmu_ops.release_pmd = kvm_release_pt; | ||
445 | pv_mmu_ops.release_pud = kvm_release_pt; | ||
446 | |||
447 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | ||
448 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | ||
449 | } | ||
450 | #ifdef CONFIG_X86_IO_APIC | 269 | #ifdef CONFIG_X86_IO_APIC |
451 | no_timer_check = 1; | 270 | no_timer_check = 1; |
452 | #endif | 271 | #endif |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index d494799aafcd..73465aab28f8 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -1,14 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * AMD CPU Microcode Update Driver for Linux | 2 | * AMD CPU Microcode Update Driver for Linux |
3 | * Copyright (C) 2008 Advanced Micro Devices Inc. | 3 | * Copyright (C) 2008-2011 Advanced Micro Devices Inc. |
4 | * | 4 | * |
5 | * Author: Peter Oruba <peter.oruba@amd.com> | 5 | * Author: Peter Oruba <peter.oruba@amd.com> |
6 | * | 6 | * |
7 | * Based on work by: | 7 | * Based on work by: |
8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | 8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> |
9 | * | 9 | * |
10 | * This driver allows to upgrade microcode on AMD | 10 | * Maintainers: |
11 | * family 0x10 and 0x11 processors. | 11 | * Andreas Herrmann <andreas.herrmann3@amd.com> |
12 | * Borislav Petkov <borislav.petkov@amd.com> | ||
13 | * | ||
14 | * This driver allows to upgrade microcode on F10h AMD | ||
15 | * CPUs and later. | ||
12 | * | 16 | * |
13 | * Licensed under the terms of the GNU General Public | 17 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 18 | * License version 2. See file COPYING for details. |
@@ -71,6 +75,9 @@ struct microcode_amd { | |||
71 | 75 | ||
72 | static struct equiv_cpu_entry *equiv_cpu_table; | 76 | static struct equiv_cpu_entry *equiv_cpu_table; |
73 | 77 | ||
78 | /* page-sized ucode patch buffer */ | ||
79 | void *patch; | ||
80 | |||
74 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 81 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
75 | { | 82 | { |
76 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 83 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
@@ -86,27 +93,76 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | |||
86 | return 0; | 93 | return 0; |
87 | } | 94 | } |
88 | 95 | ||
89 | static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, | 96 | static unsigned int verify_ucode_size(int cpu, u32 patch_size, |
90 | int rev) | 97 | unsigned int size) |
91 | { | 98 | { |
92 | unsigned int current_cpu_id; | 99 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
93 | u16 equiv_cpu_id = 0; | 100 | u32 max_size; |
94 | unsigned int i = 0; | 101 | |
102 | #define F1XH_MPB_MAX_SIZE 2048 | ||
103 | #define F14H_MPB_MAX_SIZE 1824 | ||
104 | #define F15H_MPB_MAX_SIZE 4096 | ||
105 | |||
106 | switch (c->x86) { | ||
107 | case 0x14: | ||
108 | max_size = F14H_MPB_MAX_SIZE; | ||
109 | break; | ||
110 | case 0x15: | ||
111 | max_size = F15H_MPB_MAX_SIZE; | ||
112 | break; | ||
113 | default: | ||
114 | max_size = F1XH_MPB_MAX_SIZE; | ||
115 | break; | ||
116 | } | ||
117 | |||
118 | if (patch_size > min_t(u32, size, max_size)) { | ||
119 | pr_err("patch size mismatch\n"); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | return patch_size; | ||
124 | } | ||
125 | |||
126 | static u16 find_equiv_id(void) | ||
127 | { | ||
128 | unsigned int current_cpu_id, i = 0; | ||
95 | 129 | ||
96 | BUG_ON(equiv_cpu_table == NULL); | 130 | BUG_ON(equiv_cpu_table == NULL); |
131 | |||
97 | current_cpu_id = cpuid_eax(0x00000001); | 132 | current_cpu_id = cpuid_eax(0x00000001); |
98 | 133 | ||
99 | while (equiv_cpu_table[i].installed_cpu != 0) { | 134 | while (equiv_cpu_table[i].installed_cpu != 0) { |
100 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { | 135 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) |
101 | equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; | 136 | return equiv_cpu_table[i].equiv_cpu; |
102 | break; | 137 | |
103 | } | ||
104 | i++; | 138 | i++; |
105 | } | 139 | } |
140 | return 0; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * we signal a good patch is found by returning its size > 0 | ||
145 | */ | ||
146 | static int get_matching_microcode(int cpu, const u8 *ucode_ptr, | ||
147 | unsigned int leftover_size, int rev, | ||
148 | unsigned int *current_size) | ||
149 | { | ||
150 | struct microcode_header_amd *mc_hdr; | ||
151 | unsigned int actual_size; | ||
152 | u16 equiv_cpu_id; | ||
153 | |||
154 | /* size of the current patch we're staring at */ | ||
155 | *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE; | ||
106 | 156 | ||
157 | equiv_cpu_id = find_equiv_id(); | ||
107 | if (!equiv_cpu_id) | 158 | if (!equiv_cpu_id) |
108 | return 0; | 159 | return 0; |
109 | 160 | ||
161 | /* | ||
162 | * let's look at the patch header itself now | ||
163 | */ | ||
164 | mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); | ||
165 | |||
110 | if (mc_hdr->processor_rev_id != equiv_cpu_id) | 166 | if (mc_hdr->processor_rev_id != equiv_cpu_id) |
111 | return 0; | 167 | return 0; |
112 | 168 | ||
@@ -120,7 +176,20 @@ static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, | |||
120 | if (mc_hdr->patch_id <= rev) | 176 | if (mc_hdr->patch_id <= rev) |
121 | return 0; | 177 | return 0; |
122 | 178 | ||
123 | return 1; | 179 | /* |
180 | * now that the header looks sane, verify its size | ||
181 | */ | ||
182 | actual_size = verify_ucode_size(cpu, *current_size, leftover_size); | ||
183 | if (!actual_size) | ||
184 | return 0; | ||
185 | |||
186 | /* clear the patch buffer */ | ||
187 | memset(patch, 0, PAGE_SIZE); | ||
188 | |||
189 | /* all looks ok, get the binary patch */ | ||
190 | get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); | ||
191 | |||
192 | return actual_size; | ||
124 | } | 193 | } |
125 | 194 | ||
126 | static int apply_microcode_amd(int cpu) | 195 | static int apply_microcode_amd(int cpu) |
@@ -155,63 +224,6 @@ static int apply_microcode_amd(int cpu) | |||
155 | return 0; | 224 | return 0; |
156 | } | 225 | } |
157 | 226 | ||
158 | static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) | ||
159 | { | ||
160 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
161 | u32 max_size, actual_size; | ||
162 | |||
163 | #define F1XH_MPB_MAX_SIZE 2048 | ||
164 | #define F14H_MPB_MAX_SIZE 1824 | ||
165 | #define F15H_MPB_MAX_SIZE 4096 | ||
166 | |||
167 | switch (c->x86) { | ||
168 | case 0x14: | ||
169 | max_size = F14H_MPB_MAX_SIZE; | ||
170 | break; | ||
171 | case 0x15: | ||
172 | max_size = F15H_MPB_MAX_SIZE; | ||
173 | break; | ||
174 | default: | ||
175 | max_size = F1XH_MPB_MAX_SIZE; | ||
176 | break; | ||
177 | } | ||
178 | |||
179 | actual_size = *(u32 *)(buf + 4); | ||
180 | |||
181 | if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) { | ||
182 | pr_err("section size mismatch\n"); | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | return actual_size; | ||
187 | } | ||
188 | |||
189 | static struct microcode_header_amd * | ||
190 | get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size) | ||
191 | { | ||
192 | struct microcode_header_amd *mc = NULL; | ||
193 | unsigned int actual_size = 0; | ||
194 | |||
195 | if (*(u32 *)buf != UCODE_UCODE_TYPE) { | ||
196 | pr_err("invalid type field in container file section header\n"); | ||
197 | goto out; | ||
198 | } | ||
199 | |||
200 | actual_size = verify_ucode_size(cpu, buf, size); | ||
201 | if (!actual_size) | ||
202 | goto out; | ||
203 | |||
204 | mc = vzalloc(actual_size); | ||
205 | if (!mc) | ||
206 | goto out; | ||
207 | |||
208 | get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size); | ||
209 | *mc_size = actual_size + SECTION_HDR_SIZE; | ||
210 | |||
211 | out: | ||
212 | return mc; | ||
213 | } | ||
214 | |||
215 | static int install_equiv_cpu_table(const u8 *buf) | 227 | static int install_equiv_cpu_table(const u8 *buf) |
216 | { | 228 | { |
217 | unsigned int *ibuf = (unsigned int *)buf; | 229 | unsigned int *ibuf = (unsigned int *)buf; |
@@ -247,36 +259,38 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
247 | { | 259 | { |
248 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 260 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
249 | struct microcode_header_amd *mc_hdr = NULL; | 261 | struct microcode_header_amd *mc_hdr = NULL; |
250 | unsigned int mc_size, leftover; | 262 | unsigned int mc_size, leftover, current_size = 0; |
251 | int offset; | 263 | int offset; |
252 | const u8 *ucode_ptr = data; | 264 | const u8 *ucode_ptr = data; |
253 | void *new_mc = NULL; | 265 | void *new_mc = NULL; |
254 | unsigned int new_rev = uci->cpu_sig.rev; | 266 | unsigned int new_rev = uci->cpu_sig.rev; |
255 | enum ucode_state state = UCODE_OK; | 267 | enum ucode_state state = UCODE_ERROR; |
256 | 268 | ||
257 | offset = install_equiv_cpu_table(ucode_ptr); | 269 | offset = install_equiv_cpu_table(ucode_ptr); |
258 | if (offset < 0) { | 270 | if (offset < 0) { |
259 | pr_err("failed to create equivalent cpu table\n"); | 271 | pr_err("failed to create equivalent cpu table\n"); |
260 | return UCODE_ERROR; | 272 | goto out; |
261 | } | 273 | } |
262 | |||
263 | ucode_ptr += offset; | 274 | ucode_ptr += offset; |
264 | leftover = size - offset; | 275 | leftover = size - offset; |
265 | 276 | ||
266 | while (leftover) { | 277 | if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { |
267 | mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size); | 278 | pr_err("invalid type field in container file section header\n"); |
268 | if (!mc_hdr) | 279 | goto free_table; |
269 | break; | 280 | } |
270 | 281 | ||
271 | if (get_matching_microcode(cpu, mc_hdr, new_rev)) { | 282 | while (leftover) { |
272 | vfree(new_mc); | 283 | mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, |
284 | new_rev, ¤t_size); | ||
285 | if (mc_size) { | ||
286 | mc_hdr = patch; | ||
287 | new_mc = patch; | ||
273 | new_rev = mc_hdr->patch_id; | 288 | new_rev = mc_hdr->patch_id; |
274 | new_mc = mc_hdr; | 289 | goto out_ok; |
275 | } else | 290 | } |
276 | vfree(mc_hdr); | ||
277 | 291 | ||
278 | ucode_ptr += mc_size; | 292 | ucode_ptr += current_size; |
279 | leftover -= mc_size; | 293 | leftover -= current_size; |
280 | } | 294 | } |
281 | 295 | ||
282 | if (!new_mc) { | 296 | if (!new_mc) { |
@@ -284,29 +298,46 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
284 | goto free_table; | 298 | goto free_table; |
285 | } | 299 | } |
286 | 300 | ||
287 | if (!leftover) { | 301 | out_ok: |
288 | vfree(uci->mc); | 302 | uci->mc = new_mc; |
289 | uci->mc = new_mc; | 303 | state = UCODE_OK; |
290 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", | 304 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", |
291 | cpu, uci->cpu_sig.rev, new_rev); | 305 | cpu, uci->cpu_sig.rev, new_rev); |
292 | } else { | ||
293 | vfree(new_mc); | ||
294 | state = UCODE_ERROR; | ||
295 | } | ||
296 | 306 | ||
297 | free_table: | 307 | free_table: |
298 | free_equiv_cpu_table(); | 308 | free_equiv_cpu_table(); |
299 | 309 | ||
310 | out: | ||
300 | return state; | 311 | return state; |
301 | } | 312 | } |
302 | 313 | ||
314 | /* | ||
315 | * AMD microcode firmware naming convention, up to family 15h they are in | ||
316 | * the legacy file: | ||
317 | * | ||
318 | * amd-ucode/microcode_amd.bin | ||
319 | * | ||
320 | * This legacy file is always smaller than 2K in size. | ||
321 | * | ||
322 | * Starting at family 15h they are in family specific firmware files: | ||
323 | * | ||
324 | * amd-ucode/microcode_amd_fam15h.bin | ||
325 | * amd-ucode/microcode_amd_fam16h.bin | ||
326 | * ... | ||
327 | * | ||
328 | * These might be larger than 2K. | ||
329 | */ | ||
303 | static enum ucode_state request_microcode_amd(int cpu, struct device *device) | 330 | static enum ucode_state request_microcode_amd(int cpu, struct device *device) |
304 | { | 331 | { |
305 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 332 | char fw_name[36] = "amd-ucode/microcode_amd.bin"; |
306 | const struct firmware *fw; | 333 | const struct firmware *fw; |
307 | enum ucode_state ret = UCODE_NFOUND; | 334 | enum ucode_state ret = UCODE_NFOUND; |
335 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
336 | |||
337 | if (c->x86 >= 0x15) | ||
338 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); | ||
308 | 339 | ||
309 | if (request_firmware(&fw, fw_name, device)) { | 340 | if (request_firmware(&fw, (const char *)fw_name, device)) { |
310 | pr_err("failed to load file %s\n", fw_name); | 341 | pr_err("failed to load file %s\n", fw_name); |
311 | goto out; | 342 | goto out; |
312 | } | 343 | } |
@@ -329,7 +360,6 @@ out: | |||
329 | static enum ucode_state | 360 | static enum ucode_state |
330 | request_microcode_user(int cpu, const void __user *buf, size_t size) | 361 | request_microcode_user(int cpu, const void __user *buf, size_t size) |
331 | { | 362 | { |
332 | pr_info("AMD microcode update via /dev/cpu/microcode not supported\n"); | ||
333 | return UCODE_ERROR; | 363 | return UCODE_ERROR; |
334 | } | 364 | } |
335 | 365 | ||
@@ -337,7 +367,6 @@ static void microcode_fini_cpu_amd(int cpu) | |||
337 | { | 367 | { |
338 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 368 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
339 | 369 | ||
340 | vfree(uci->mc); | ||
341 | uci->mc = NULL; | 370 | uci->mc = NULL; |
342 | } | 371 | } |
343 | 372 | ||
@@ -351,5 +380,14 @@ static struct microcode_ops microcode_amd_ops = { | |||
351 | 380 | ||
352 | struct microcode_ops * __init init_amd_microcode(void) | 381 | struct microcode_ops * __init init_amd_microcode(void) |
353 | { | 382 | { |
383 | patch = (void *)get_zeroed_page(GFP_KERNEL); | ||
384 | if (!patch) | ||
385 | return NULL; | ||
386 | |||
354 | return µcode_amd_ops; | 387 | return µcode_amd_ops; |
355 | } | 388 | } |
389 | |||
390 | void __exit exit_amd_microcode(void) | ||
391 | { | ||
392 | free_page((unsigned long)patch); | ||
393 | } | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index f2d2a664e797..fda91c307104 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -256,7 +256,7 @@ static int __init microcode_dev_init(void) | |||
256 | return 0; | 256 | return 0; |
257 | } | 257 | } |
258 | 258 | ||
259 | static void microcode_dev_exit(void) | 259 | static void __exit microcode_dev_exit(void) |
260 | { | 260 | { |
261 | misc_deregister(µcode_dev); | 261 | misc_deregister(µcode_dev); |
262 | } | 262 | } |
@@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu) | |||
292 | return err; | 292 | return err; |
293 | } | 293 | } |
294 | 294 | ||
295 | static ssize_t reload_store(struct sys_device *dev, | 295 | static ssize_t reload_store(struct device *dev, |
296 | struct sysdev_attribute *attr, | 296 | struct device_attribute *attr, |
297 | const char *buf, size_t size) | 297 | const char *buf, size_t size) |
298 | { | 298 | { |
299 | unsigned long val; | 299 | unsigned long val; |
@@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev, | |||
318 | return ret; | 318 | return ret; |
319 | } | 319 | } |
320 | 320 | ||
321 | static ssize_t version_show(struct sys_device *dev, | 321 | static ssize_t version_show(struct device *dev, |
322 | struct sysdev_attribute *attr, char *buf) | 322 | struct device_attribute *attr, char *buf) |
323 | { | 323 | { |
324 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 324 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
325 | 325 | ||
326 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); | 326 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); |
327 | } | 327 | } |
328 | 328 | ||
329 | static ssize_t pf_show(struct sys_device *dev, | 329 | static ssize_t pf_show(struct device *dev, |
330 | struct sysdev_attribute *attr, char *buf) | 330 | struct device_attribute *attr, char *buf) |
331 | { | 331 | { |
332 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 332 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
333 | 333 | ||
334 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); | 334 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); |
335 | } | 335 | } |
336 | 336 | ||
337 | static SYSDEV_ATTR(reload, 0200, NULL, reload_store); | 337 | static DEVICE_ATTR(reload, 0200, NULL, reload_store); |
338 | static SYSDEV_ATTR(version, 0400, version_show, NULL); | 338 | static DEVICE_ATTR(version, 0400, version_show, NULL); |
339 | static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); | 339 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); |
340 | 340 | ||
341 | static struct attribute *mc_default_attrs[] = { | 341 | static struct attribute *mc_default_attrs[] = { |
342 | &attr_reload.attr, | 342 | &dev_attr_reload.attr, |
343 | &attr_version.attr, | 343 | &dev_attr_version.attr, |
344 | &attr_processor_flags.attr, | 344 | &dev_attr_processor_flags.attr, |
345 | NULL | 345 | NULL |
346 | }; | 346 | }; |
347 | 347 | ||
@@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu) | |||
405 | return ustate; | 405 | return ustate; |
406 | } | 406 | } |
407 | 407 | ||
408 | static int mc_sysdev_add(struct sys_device *sys_dev) | 408 | static int mc_device_add(struct device *dev, struct subsys_interface *sif) |
409 | { | 409 | { |
410 | int err, cpu = sys_dev->id; | 410 | int err, cpu = dev->id; |
411 | 411 | ||
412 | if (!cpu_online(cpu)) | 412 | if (!cpu_online(cpu)) |
413 | return 0; | 413 | return 0; |
414 | 414 | ||
415 | pr_debug("CPU%d added\n", cpu); | 415 | pr_debug("CPU%d added\n", cpu); |
416 | 416 | ||
417 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 417 | err = sysfs_create_group(&dev->kobj, &mc_attr_group); |
418 | if (err) | 418 | if (err) |
419 | return err; | 419 | return err; |
420 | 420 | ||
421 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { | 421 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { |
422 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 422 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
423 | return -EINVAL; | 423 | return -EINVAL; |
424 | } | 424 | } |
425 | 425 | ||
426 | return err; | 426 | return err; |
427 | } | 427 | } |
428 | 428 | ||
429 | static int mc_sysdev_remove(struct sys_device *sys_dev) | 429 | static int mc_device_remove(struct device *dev, struct subsys_interface *sif) |
430 | { | 430 | { |
431 | int cpu = sys_dev->id; | 431 | int cpu = dev->id; |
432 | 432 | ||
433 | if (!cpu_online(cpu)) | 433 | if (!cpu_online(cpu)) |
434 | return 0; | 434 | return 0; |
435 | 435 | ||
436 | pr_debug("CPU%d removed\n", cpu); | 436 | pr_debug("CPU%d removed\n", cpu); |
437 | microcode_fini_cpu(cpu); | 437 | microcode_fini_cpu(cpu); |
438 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 438 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
439 | return 0; | 439 | return 0; |
440 | } | 440 | } |
441 | 441 | ||
442 | static struct sysdev_driver mc_sysdev_driver = { | 442 | static struct subsys_interface mc_cpu_interface = { |
443 | .add = mc_sysdev_add, | 443 | .name = "microcode", |
444 | .remove = mc_sysdev_remove, | 444 | .subsys = &cpu_subsys, |
445 | .add_dev = mc_device_add, | ||
446 | .remove_dev = mc_device_remove, | ||
445 | }; | 447 | }; |
446 | 448 | ||
447 | /** | 449 | /** |
@@ -464,9 +466,9 @@ static __cpuinit int | |||
464 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | 466 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) |
465 | { | 467 | { |
466 | unsigned int cpu = (unsigned long)hcpu; | 468 | unsigned int cpu = (unsigned long)hcpu; |
467 | struct sys_device *sys_dev; | 469 | struct device *dev; |
468 | 470 | ||
469 | sys_dev = get_cpu_sysdev(cpu); | 471 | dev = get_cpu_device(cpu); |
470 | switch (action) { | 472 | switch (action) { |
471 | case CPU_ONLINE: | 473 | case CPU_ONLINE: |
472 | case CPU_ONLINE_FROZEN: | 474 | case CPU_ONLINE_FROZEN: |
@@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
474 | case CPU_DOWN_FAILED: | 476 | case CPU_DOWN_FAILED: |
475 | case CPU_DOWN_FAILED_FROZEN: | 477 | case CPU_DOWN_FAILED_FROZEN: |
476 | pr_debug("CPU%d added\n", cpu); | 478 | pr_debug("CPU%d added\n", cpu); |
477 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | 479 | if (sysfs_create_group(&dev->kobj, &mc_attr_group)) |
478 | pr_err("Failed to create group for CPU%d\n", cpu); | 480 | pr_err("Failed to create group for CPU%d\n", cpu); |
479 | break; | 481 | break; |
480 | case CPU_DOWN_PREPARE: | 482 | case CPU_DOWN_PREPARE: |
481 | case CPU_DOWN_PREPARE_FROZEN: | 483 | case CPU_DOWN_PREPARE_FROZEN: |
482 | /* Suspend is in progress, only remove the interface */ | 484 | /* Suspend is in progress, only remove the interface */ |
483 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 485 | sysfs_remove_group(&dev->kobj, &mc_attr_group); |
484 | pr_debug("CPU%d removed\n", cpu); | 486 | pr_debug("CPU%d removed\n", cpu); |
485 | break; | 487 | break; |
486 | 488 | ||
@@ -519,27 +521,23 @@ static int __init microcode_init(void) | |||
519 | 521 | ||
520 | microcode_pdev = platform_device_register_simple("microcode", -1, | 522 | microcode_pdev = platform_device_register_simple("microcode", -1, |
521 | NULL, 0); | 523 | NULL, 0); |
522 | if (IS_ERR(microcode_pdev)) { | 524 | if (IS_ERR(microcode_pdev)) |
523 | microcode_dev_exit(); | ||
524 | return PTR_ERR(microcode_pdev); | 525 | return PTR_ERR(microcode_pdev); |
525 | } | ||
526 | 526 | ||
527 | get_online_cpus(); | 527 | get_online_cpus(); |
528 | mutex_lock(µcode_mutex); | 528 | mutex_lock(µcode_mutex); |
529 | 529 | ||
530 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | 530 | error = subsys_interface_register(&mc_cpu_interface); |
531 | 531 | ||
532 | mutex_unlock(µcode_mutex); | 532 | mutex_unlock(µcode_mutex); |
533 | put_online_cpus(); | 533 | put_online_cpus(); |
534 | 534 | ||
535 | if (error) { | 535 | if (error) |
536 | platform_device_unregister(microcode_pdev); | 536 | goto out_pdev; |
537 | return error; | ||
538 | } | ||
539 | 537 | ||
540 | error = microcode_dev_init(); | 538 | error = microcode_dev_init(); |
541 | if (error) | 539 | if (error) |
542 | return error; | 540 | goto out_driver; |
543 | 541 | ||
544 | register_syscore_ops(&mc_syscore_ops); | 542 | register_syscore_ops(&mc_syscore_ops); |
545 | register_hotcpu_notifier(&mc_cpu_notifier); | 543 | register_hotcpu_notifier(&mc_cpu_notifier); |
@@ -548,11 +546,27 @@ static int __init microcode_init(void) | |||
548 | " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); | 546 | " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); |
549 | 547 | ||
550 | return 0; | 548 | return 0; |
549 | |||
550 | out_driver: | ||
551 | get_online_cpus(); | ||
552 | mutex_lock(µcode_mutex); | ||
553 | |||
554 | subsys_interface_unregister(&mc_cpu_interface); | ||
555 | |||
556 | mutex_unlock(µcode_mutex); | ||
557 | put_online_cpus(); | ||
558 | |||
559 | out_pdev: | ||
560 | platform_device_unregister(microcode_pdev); | ||
561 | return error; | ||
562 | |||
551 | } | 563 | } |
552 | module_init(microcode_init); | 564 | module_init(microcode_init); |
553 | 565 | ||
554 | static void __exit microcode_exit(void) | 566 | static void __exit microcode_exit(void) |
555 | { | 567 | { |
568 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
569 | |||
556 | microcode_dev_exit(); | 570 | microcode_dev_exit(); |
557 | 571 | ||
558 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 572 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
@@ -561,7 +575,7 @@ static void __exit microcode_exit(void) | |||
561 | get_online_cpus(); | 575 | get_online_cpus(); |
562 | mutex_lock(µcode_mutex); | 576 | mutex_lock(µcode_mutex); |
563 | 577 | ||
564 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 578 | subsys_interface_unregister(&mc_cpu_interface); |
565 | 579 | ||
566 | mutex_unlock(µcode_mutex); | 580 | mutex_unlock(µcode_mutex); |
567 | put_online_cpus(); | 581 | put_online_cpus(); |
@@ -570,6 +584,9 @@ static void __exit microcode_exit(void) | |||
570 | 584 | ||
571 | microcode_ops = NULL; | 585 | microcode_ops = NULL; |
572 | 586 | ||
587 | if (c->x86_vendor == X86_VENDOR_AMD) | ||
588 | exit_amd_microcode(); | ||
589 | |||
573 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | 590 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); |
574 | } | 591 | } |
575 | module_exit(microcode_exit); | 592 | module_exit(microcode_exit); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9103b89c145a..ca470e4c92dc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
95 | } | 95 | } |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | set_bit(m->busid, mp_bus_not_pci); | ||
98 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { | 99 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { |
99 | set_bit(m->busid, mp_bus_not_pci); | ||
100 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 100 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
101 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; | 101 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; |
102 | #endif | 102 | #endif |
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early) | |||
564 | 564 | ||
565 | static void __init smp_reserve_memory(struct mpf_intel *mpf) | 565 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
566 | { | 566 | { |
567 | unsigned long size = get_mpc_size(mpf->physptr); | 567 | memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr)); |
568 | |||
569 | memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); | ||
570 | } | 568 | } |
571 | 569 | ||
572 | static int __init smp_scan_config(unsigned long base, unsigned long length) | 570 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
595 | mpf, (u64)virt_to_phys(mpf)); | 593 | mpf, (u64)virt_to_phys(mpf)); |
596 | 594 | ||
597 | mem = virt_to_phys(mpf); | 595 | mem = virt_to_phys(mpf); |
598 | memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); | 596 | memblock_reserve(mem, sizeof(*mpf)); |
599 | if (mpf->physptr) | 597 | if (mpf->physptr) |
600 | smp_reserve_memory(mpf); | 598 | smp_reserve_memory(mpf); |
601 | 599 | ||
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt); | |||
836 | 834 | ||
837 | void __init early_reserve_e820_mpc_new(void) | 835 | void __init early_reserve_e820_mpc_new(void) |
838 | { | 836 | { |
839 | if (enable_update_mptable && alloc_mptable) { | 837 | if (enable_update_mptable && alloc_mptable) |
840 | u64 startt = 0; | 838 | mpc_new_phys = early_reserve_e820(mpc_new_length, 4); |
841 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); | ||
842 | } | ||
843 | } | 839 | } |
844 | 840 | ||
845 | static int __init update_mp_table(void) | 841 | static int __init update_mp_table(void) |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2c143e..96356762a51d 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { | |||
236 | .notifier_call = msr_class_cpu_callback, | 236 | .notifier_call = msr_class_cpu_callback, |
237 | }; | 237 | }; |
238 | 238 | ||
239 | static char *msr_devnode(struct device *dev, mode_t *mode) | 239 | static char *msr_devnode(struct device *dev, umode_t *mode) |
240 | { | 240 | { |
241 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); | 241 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); |
242 | } | 242 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e88f37b58ddd..47acaf319165 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
405 | unknown_nmi_error(reason, regs); | 405 | unknown_nmi_error(reason, regs); |
406 | } | 406 | } |
407 | 407 | ||
408 | /* | ||
409 | * NMIs can hit breakpoints which will cause it to lose its | ||
410 | * NMI context with the CPU when the breakpoint does an iret. | ||
411 | */ | ||
412 | #ifdef CONFIG_X86_32 | ||
413 | /* | ||
414 | * For i386, NMIs use the same stack as the kernel, and we can | ||
415 | * add a workaround to the iret problem in C. Simply have 3 states | ||
416 | * the NMI can be in. | ||
417 | * | ||
418 | * 1) not running | ||
419 | * 2) executing | ||
420 | * 3) latched | ||
421 | * | ||
422 | * When no NMI is in progress, it is in the "not running" state. | ||
423 | * When an NMI comes in, it goes into the "executing" state. | ||
424 | * Normally, if another NMI is triggered, it does not interrupt | ||
425 | * the running NMI and the HW will simply latch it so that when | ||
426 | * the first NMI finishes, it will restart the second NMI. | ||
427 | * (Note, the latch is binary, thus multiple NMIs triggering, | ||
428 | * when one is running, are ignored. Only one NMI is restarted.) | ||
429 | * | ||
430 | * If an NMI hits a breakpoint that executes an iret, another | ||
431 | * NMI can preempt it. We do not want to allow this new NMI | ||
432 | * to run, but we want to execute it when the first one finishes. | ||
433 | * We set the state to "latched", and the first NMI will perform | ||
434 | * an cmpxchg on the state, and if it doesn't successfully | ||
435 | * reset the state to "not running" it will restart the next | ||
436 | * NMI. | ||
437 | */ | ||
438 | enum nmi_states { | ||
439 | NMI_NOT_RUNNING, | ||
440 | NMI_EXECUTING, | ||
441 | NMI_LATCHED, | ||
442 | }; | ||
443 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | ||
444 | |||
445 | #define nmi_nesting_preprocess(regs) \ | ||
446 | do { \ | ||
447 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | ||
448 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | ||
449 | return; \ | ||
450 | } \ | ||
451 | nmi_restart: \ | ||
452 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | ||
453 | } while (0) | ||
454 | |||
455 | #define nmi_nesting_postprocess() \ | ||
456 | do { \ | ||
457 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | ||
458 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | ||
459 | goto nmi_restart; \ | ||
460 | } while (0) | ||
461 | #else /* x86_64 */ | ||
462 | /* | ||
463 | * In x86_64 things are a bit more difficult. This has the same problem | ||
464 | * where an NMI hitting a breakpoint that calls iret will remove the | ||
465 | * NMI context, allowing a nested NMI to enter. What makes this more | ||
466 | * difficult is that both NMIs and breakpoints have their own stack. | ||
467 | * When a new NMI or breakpoint is executed, the stack is set to a fixed | ||
468 | * point. If an NMI is nested, it will have its stack set at that same | ||
469 | * fixed address that the first NMI had, and will start corrupting the | ||
470 | * stack. This is handled in entry_64.S, but the same problem exists with | ||
471 | * the breakpoint stack. | ||
472 | * | ||
473 | * If a breakpoint is being processed, and the debug stack is being used, | ||
474 | * if an NMI comes in and also hits a breakpoint, the stack pointer | ||
475 | * will be set to the same fixed address as the breakpoint that was | ||
476 | * interrupted, causing that stack to be corrupted. To handle this case, | ||
477 | * check if the stack that was interrupted is the debug stack, and if | ||
478 | * so, change the IDT so that new breakpoints will use the current stack | ||
479 | * and not switch to the fixed address. On return of the NMI, switch back | ||
480 | * to the original IDT. | ||
481 | */ | ||
482 | static DEFINE_PER_CPU(int, update_debug_stack); | ||
483 | |||
484 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | ||
485 | { | ||
486 | /* | ||
487 | * If we interrupted a breakpoint, it is possible that | ||
488 | * the nmi handler will have breakpoints too. We need to | ||
489 | * change the IDT such that breakpoints that happen here | ||
490 | * continue to use the NMI stack. | ||
491 | */ | ||
492 | if (unlikely(is_debug_stack(regs->sp))) { | ||
493 | debug_stack_set_zero(); | ||
494 | __get_cpu_var(update_debug_stack) = 1; | ||
495 | } | ||
496 | } | ||
497 | |||
498 | static inline void nmi_nesting_postprocess(void) | ||
499 | { | ||
500 | if (unlikely(__get_cpu_var(update_debug_stack))) | ||
501 | debug_stack_reset(); | ||
502 | } | ||
503 | #endif | ||
504 | |||
408 | dotraplinkage notrace __kprobes void | 505 | dotraplinkage notrace __kprobes void |
409 | do_nmi(struct pt_regs *regs, long error_code) | 506 | do_nmi(struct pt_regs *regs, long error_code) |
410 | { | 507 | { |
508 | nmi_nesting_preprocess(regs); | ||
509 | |||
411 | nmi_enter(); | 510 | nmi_enter(); |
412 | 511 | ||
413 | inc_irq_stat(__nmi_count); | 512 | inc_irq_stat(__nmi_count); |
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
416 | default_do_nmi(regs); | 515 | default_do_nmi(regs); |
417 | 516 | ||
418 | nmi_exit(); | 517 | nmi_exit(); |
518 | |||
519 | /* On i386, may loop back to preprocess */ | ||
520 | nmi_nesting_postprocess(); | ||
419 | } | 521 | } |
420 | 522 | ||
421 | void stop_nmi(void) | 523 | void stop_nmi(void) |
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c new file mode 100644 index 000000000000..0d01a8ea4e11 --- /dev/null +++ b/arch/x86/kernel/nmi_selftest.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * arch/x86/kernel/nmi-selftest.c | ||
3 | * | ||
4 | * Testsuite for NMI: IPIs | ||
5 | * | ||
6 | * Started by Don Zickus: | ||
7 | * (using lib/locking-selftest.c as a guide) | ||
8 | * | ||
9 | * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/smp.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/delay.h> | ||
15 | |||
16 | #include <asm/apic.h> | ||
17 | #include <asm/nmi.h> | ||
18 | |||
19 | #define SUCCESS 0 | ||
20 | #define FAILURE 1 | ||
21 | #define TIMEOUT 2 | ||
22 | |||
23 | static int nmi_fail; | ||
24 | |||
25 | /* check to see if NMI IPIs work on this machine */ | ||
26 | static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly; | ||
27 | |||
28 | static int testcase_total; | ||
29 | static int testcase_successes; | ||
30 | static int expected_testcase_failures; | ||
31 | static int unexpected_testcase_failures; | ||
32 | static int unexpected_testcase_unknowns; | ||
33 | |||
34 | static int nmi_unk_cb(unsigned int val, struct pt_regs *regs) | ||
35 | { | ||
36 | unexpected_testcase_unknowns++; | ||
37 | return NMI_HANDLED; | ||
38 | } | ||
39 | |||
40 | static void init_nmi_testsuite(void) | ||
41 | { | ||
42 | /* trap all the unknown NMIs we may generate */ | ||
43 | register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); | ||
44 | } | ||
45 | |||
46 | static void cleanup_nmi_testsuite(void) | ||
47 | { | ||
48 | unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); | ||
49 | } | ||
50 | |||
51 | static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) | ||
52 | { | ||
53 | int cpu = raw_smp_processor_id(); | ||
54 | |||
55 | if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask))) | ||
56 | return NMI_HANDLED; | ||
57 | |||
58 | return NMI_DONE; | ||
59 | } | ||
60 | |||
61 | static void test_nmi_ipi(struct cpumask *mask) | ||
62 | { | ||
63 | unsigned long timeout; | ||
64 | |||
65 | if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, | ||
66 | NMI_FLAG_FIRST, "nmi_selftest")) { | ||
67 | nmi_fail = FAILURE; | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | /* sync above data before sending NMI */ | ||
72 | wmb(); | ||
73 | |||
74 | apic->send_IPI_mask(mask, NMI_VECTOR); | ||
75 | |||
76 | /* Don't wait longer than a second */ | ||
77 | timeout = USEC_PER_SEC; | ||
78 | while (!cpumask_empty(mask) && timeout--) | ||
79 | udelay(1); | ||
80 | |||
81 | /* What happens if we timeout, do we still unregister?? */ | ||
82 | unregister_nmi_handler(NMI_LOCAL, "nmi_selftest"); | ||
83 | |||
84 | if (!timeout) | ||
85 | nmi_fail = TIMEOUT; | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | static void remote_ipi(void) | ||
90 | { | ||
91 | cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); | ||
92 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); | ||
93 | if (!cpumask_empty(to_cpumask(nmi_ipi_mask))) | ||
94 | test_nmi_ipi(to_cpumask(nmi_ipi_mask)); | ||
95 | } | ||
96 | |||
97 | static void local_ipi(void) | ||
98 | { | ||
99 | cpumask_clear(to_cpumask(nmi_ipi_mask)); | ||
100 | cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); | ||
101 | test_nmi_ipi(to_cpumask(nmi_ipi_mask)); | ||
102 | } | ||
103 | |||
104 | static void reset_nmi(void) | ||
105 | { | ||
106 | nmi_fail = 0; | ||
107 | } | ||
108 | |||
109 | static void dotest(void (*testcase_fn)(void), int expected) | ||
110 | { | ||
111 | testcase_fn(); | ||
112 | /* | ||
113 | * Filter out expected failures: | ||
114 | */ | ||
115 | if (nmi_fail != expected) { | ||
116 | unexpected_testcase_failures++; | ||
117 | |||
118 | if (nmi_fail == FAILURE) | ||
119 | printk("FAILED |"); | ||
120 | else if (nmi_fail == TIMEOUT) | ||
121 | printk("TIMEOUT|"); | ||
122 | else | ||
123 | printk("ERROR |"); | ||
124 | dump_stack(); | ||
125 | } else { | ||
126 | testcase_successes++; | ||
127 | printk(" ok |"); | ||
128 | } | ||
129 | testcase_total++; | ||
130 | |||
131 | reset_nmi(); | ||
132 | } | ||
133 | |||
134 | static inline void print_testname(const char *testname) | ||
135 | { | ||
136 | printk("%12s:", testname); | ||
137 | } | ||
138 | |||
139 | void nmi_selftest(void) | ||
140 | { | ||
141 | init_nmi_testsuite(); | ||
142 | |||
143 | /* | ||
144 | * Run the testsuite: | ||
145 | */ | ||
146 | printk("----------------\n"); | ||
147 | printk("| NMI testsuite:\n"); | ||
148 | printk("--------------------\n"); | ||
149 | |||
150 | print_testname("remote IPI"); | ||
151 | dotest(remote_ipi, SUCCESS); | ||
152 | printk("\n"); | ||
153 | print_testname("local IPI"); | ||
154 | dotest(local_ipi, SUCCESS); | ||
155 | printk("\n"); | ||
156 | |||
157 | cleanup_nmi_testsuite(); | ||
158 | |||
159 | if (unexpected_testcase_failures) { | ||
160 | printk("--------------------\n"); | ||
161 | printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", | ||
162 | unexpected_testcase_failures, testcase_total); | ||
163 | printk("-----------------------------------------------------------------\n"); | ||
164 | } else if (expected_testcase_failures && testcase_successes) { | ||
165 | printk("--------------------\n"); | ||
166 | printk("%3d out of %3d testcases failed, as expected. |\n", | ||
167 | expected_testcase_failures, testcase_total); | ||
168 | printk("----------------------------------------------------\n"); | ||
169 | } else if (expected_testcase_failures && !testcase_successes) { | ||
170 | printk("--------------------\n"); | ||
171 | printk("All %3d testcases failed, as expected. |\n", | ||
172 | expected_testcase_failures); | ||
173 | printk("----------------------------------------\n"); | ||
174 | } else { | ||
175 | printk("--------------------\n"); | ||
176 | printk("Good, all %3d testcases passed! |\n", | ||
177 | testcase_successes); | ||
178 | printk("---------------------------------\n"); | ||
179 | } | ||
180 | } | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 80dc793b3f63..1c4d769e21ea 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
49 | 58 | ||
50 | /* Dummy device used for NULL arguments (normally ISA). */ | 59 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p) | |||
169 | #endif | 178 | #endif |
170 | if (!strncmp(p, "pt", 2)) | 179 | if (!strncmp(p, "pt", 2)) |
171 | iommu_pass_through = 1; | 180 | iommu_pass_through = 1; |
181 | if (!strncmp(p, "group_mf", 8)) | ||
182 | iommu_group_mf = 1; | ||
172 | 183 | ||
173 | gart_parse_options(p); | 184 | gart_parse_options(p); |
174 | 185 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b9b3b1a51643..15763af7bfe3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
293 | regs.orig_ax = -1; | 293 | regs.orig_ax = -1; |
294 | regs.ip = (unsigned long) kernel_thread_helper; | 294 | regs.ip = (unsigned long) kernel_thread_helper; |
295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
296 | regs.flags = X86_EFLAGS_IF | 0x2; | 296 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; |
297 | 297 | ||
298 | /* Ok, create the new process.. */ | 298 | /* Ok, create the new process.. */ |
299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | 299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); |
@@ -403,6 +403,14 @@ void default_idle(void) | |||
403 | EXPORT_SYMBOL(default_idle); | 403 | EXPORT_SYMBOL(default_idle); |
404 | #endif | 404 | #endif |
405 | 405 | ||
406 | bool set_pm_idle_to_default(void) | ||
407 | { | ||
408 | bool ret = !!pm_idle; | ||
409 | |||
410 | pm_idle = default_idle; | ||
411 | |||
412 | return ret; | ||
413 | } | ||
406 | void stop_this_cpu(void *dummy) | 414 | void stop_this_cpu(void *dummy) |
407 | { | 415 | { |
408 | local_irq_disable(); | 416 | local_irq_disable(); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 795b79f984c2..c08d1ff12b7c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -99,7 +99,8 @@ void cpu_idle(void) | |||
99 | 99 | ||
100 | /* endless idle loop with no priority at all */ | 100 | /* endless idle loop with no priority at all */ |
101 | while (1) { | 101 | while (1) { |
102 | tick_nohz_stop_sched_tick(1); | 102 | tick_nohz_idle_enter(); |
103 | rcu_idle_enter(); | ||
103 | while (!need_resched()) { | 104 | while (!need_resched()) { |
104 | 105 | ||
105 | check_pgt_cache(); | 106 | check_pgt_cache(); |
@@ -116,7 +117,8 @@ void cpu_idle(void) | |||
116 | pm_idle(); | 117 | pm_idle(); |
117 | start_critical_timings(); | 118 | start_critical_timings(); |
118 | } | 119 | } |
119 | tick_nohz_restart_sched_tick(); | 120 | rcu_idle_exit(); |
121 | tick_nohz_idle_exit(); | ||
120 | preempt_enable_no_resched(); | 122 | preempt_enable_no_resched(); |
121 | schedule(); | 123 | schedule(); |
122 | preempt_disable(); | 124 | preempt_disable(); |
@@ -212,6 +214,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
212 | 214 | ||
213 | task_user_gs(p) = get_user_gs(regs); | 215 | task_user_gs(p) = get_user_gs(regs); |
214 | 216 | ||
217 | p->fpu_counter = 0; | ||
215 | p->thread.io_bitmap_ptr = NULL; | 218 | p->thread.io_bitmap_ptr = NULL; |
216 | tsk = current; | 219 | tsk = current; |
217 | err = -ENOMEM; | 220 | err = -ENOMEM; |
@@ -297,22 +300,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
297 | *next = &next_p->thread; | 300 | *next = &next_p->thread; |
298 | int cpu = smp_processor_id(); | 301 | int cpu = smp_processor_id(); |
299 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 302 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
300 | bool preload_fpu; | 303 | fpu_switch_t fpu; |
301 | 304 | ||
302 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 305 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
303 | 306 | ||
304 | /* | 307 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
305 | * If the task has used fpu the last 5 timeslices, just do a full | ||
306 | * restore of the math state immediately to avoid the trap; the | ||
307 | * chances of needing FPU soon are obviously high now | ||
308 | */ | ||
309 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
310 | |||
311 | __unlazy_fpu(prev_p); | ||
312 | |||
313 | /* we're going to use this soon, after a few expensive things */ | ||
314 | if (preload_fpu) | ||
315 | prefetch(next->fpu.state); | ||
316 | 308 | ||
317 | /* | 309 | /* |
318 | * Reload esp0. | 310 | * Reload esp0. |
@@ -352,11 +344,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
352 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | 344 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
353 | __switch_to_xtra(prev_p, next_p, tss); | 345 | __switch_to_xtra(prev_p, next_p, tss); |
354 | 346 | ||
355 | /* If we're going to preload the fpu context, make sure clts | ||
356 | is run while we're batching the cpu state updates. */ | ||
357 | if (preload_fpu) | ||
358 | clts(); | ||
359 | |||
360 | /* | 347 | /* |
361 | * Leave lazy mode, flushing any hypercalls made here. | 348 | * Leave lazy mode, flushing any hypercalls made here. |
362 | * This must be done before restoring TLS segments so | 349 | * This must be done before restoring TLS segments so |
@@ -366,15 +353,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
366 | */ | 353 | */ |
367 | arch_end_context_switch(next_p); | 354 | arch_end_context_switch(next_p); |
368 | 355 | ||
369 | if (preload_fpu) | ||
370 | __math_state_restore(); | ||
371 | |||
372 | /* | 356 | /* |
373 | * Restore %gs if needed (which is common) | 357 | * Restore %gs if needed (which is common) |
374 | */ | 358 | */ |
375 | if (prev->gs | next->gs) | 359 | if (prev->gs | next->gs) |
376 | lazy_load_gs(next->gs); | 360 | lazy_load_gs(next->gs); |
377 | 361 | ||
362 | switch_fpu_finish(next_p, fpu); | ||
363 | |||
378 | percpu_write(current_task, next_p); | 364 | percpu_write(current_task, next_p); |
379 | 365 | ||
380 | return prev_p; | 366 | return prev_p; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3bd7e6eebf31..cfa5c90c01db 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -122,7 +122,7 @@ void cpu_idle(void) | |||
122 | 122 | ||
123 | /* endless idle loop with no priority at all */ | 123 | /* endless idle loop with no priority at all */ |
124 | while (1) { | 124 | while (1) { |
125 | tick_nohz_stop_sched_tick(1); | 125 | tick_nohz_idle_enter(); |
126 | while (!need_resched()) { | 126 | while (!need_resched()) { |
127 | 127 | ||
128 | rmb(); | 128 | rmb(); |
@@ -139,8 +139,14 @@ void cpu_idle(void) | |||
139 | enter_idle(); | 139 | enter_idle(); |
140 | /* Don't trace irqs off for idle */ | 140 | /* Don't trace irqs off for idle */ |
141 | stop_critical_timings(); | 141 | stop_critical_timings(); |
142 | |||
143 | /* enter_idle() needs rcu for notifiers */ | ||
144 | rcu_idle_enter(); | ||
145 | |||
142 | if (cpuidle_idle_call()) | 146 | if (cpuidle_idle_call()) |
143 | pm_idle(); | 147 | pm_idle(); |
148 | |||
149 | rcu_idle_exit(); | ||
144 | start_critical_timings(); | 150 | start_critical_timings(); |
145 | 151 | ||
146 | /* In many cases the interrupt that ended idle | 152 | /* In many cases the interrupt that ended idle |
@@ -149,7 +155,7 @@ void cpu_idle(void) | |||
149 | __exit_idle(); | 155 | __exit_idle(); |
150 | } | 156 | } |
151 | 157 | ||
152 | tick_nohz_restart_sched_tick(); | 158 | tick_nohz_idle_exit(); |
153 | preempt_enable_no_resched(); | 159 | preempt_enable_no_resched(); |
154 | schedule(); | 160 | schedule(); |
155 | preempt_disable(); | 161 | preempt_disable(); |
@@ -280,6 +286,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
280 | 286 | ||
281 | set_tsk_thread_flag(p, TIF_FORK); | 287 | set_tsk_thread_flag(p, TIF_FORK); |
282 | 288 | ||
289 | p->fpu_counter = 0; | ||
283 | p->thread.io_bitmap_ptr = NULL; | 290 | p->thread.io_bitmap_ptr = NULL; |
284 | 291 | ||
285 | savesegment(gs, p->thread.gsindex); | 292 | savesegment(gs, p->thread.gsindex); |
@@ -293,13 +300,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
293 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 300 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
294 | 301 | ||
295 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 302 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
296 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 303 | p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, |
304 | IO_BITMAP_BYTES, GFP_KERNEL); | ||
297 | if (!p->thread.io_bitmap_ptr) { | 305 | if (!p->thread.io_bitmap_ptr) { |
298 | p->thread.io_bitmap_max = 0; | 306 | p->thread.io_bitmap_max = 0; |
299 | return -ENOMEM; | 307 | return -ENOMEM; |
300 | } | 308 | } |
301 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, | ||
302 | IO_BITMAP_BYTES); | ||
303 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | 309 | set_tsk_thread_flag(p, TIF_IO_BITMAP); |
304 | } | 310 | } |
305 | 311 | ||
@@ -381,18 +387,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
381 | int cpu = smp_processor_id(); | 387 | int cpu = smp_processor_id(); |
382 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 388 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
383 | unsigned fsindex, gsindex; | 389 | unsigned fsindex, gsindex; |
384 | bool preload_fpu; | 390 | fpu_switch_t fpu; |
385 | |||
386 | /* | ||
387 | * If the task has used fpu the last 5 timeslices, just do a full | ||
388 | * restore of the math state immediately to avoid the trap; the | ||
389 | * chances of needing FPU soon are obviously high now | ||
390 | */ | ||
391 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
392 | 391 | ||
393 | /* we're going to use this soon, after a few expensive things */ | 392 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
394 | if (preload_fpu) | ||
395 | prefetch(next->fpu.state); | ||
396 | 393 | ||
397 | /* | 394 | /* |
398 | * Reload esp0, LDT and the page table pointer: | 395 | * Reload esp0, LDT and the page table pointer: |
@@ -422,13 +419,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
422 | 419 | ||
423 | load_TLS(next, cpu); | 420 | load_TLS(next, cpu); |
424 | 421 | ||
425 | /* Must be after DS reload */ | ||
426 | __unlazy_fpu(prev_p); | ||
427 | |||
428 | /* Make sure cpu is ready for new context */ | ||
429 | if (preload_fpu) | ||
430 | clts(); | ||
431 | |||
432 | /* | 422 | /* |
433 | * Leave lazy mode, flushing any hypercalls made here. | 423 | * Leave lazy mode, flushing any hypercalls made here. |
434 | * This must be done before restoring TLS segments so | 424 | * This must be done before restoring TLS segments so |
@@ -469,6 +459,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
469 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 459 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
470 | prev->gsindex = gsindex; | 460 | prev->gsindex = gsindex; |
471 | 461 | ||
462 | switch_fpu_finish(next_p, fpu); | ||
463 | |||
472 | /* | 464 | /* |
473 | * Switch the PDA and FPU contexts. | 465 | * Switch the PDA and FPU contexts. |
474 | */ | 466 | */ |
@@ -487,13 +479,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
487 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | 479 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
488 | __switch_to_xtra(prev_p, next_p, tss); | 480 | __switch_to_xtra(prev_p, next_p, tss); |
489 | 481 | ||
490 | /* | ||
491 | * Preload the FPU context, now that we've determined that the | ||
492 | * task is likely to be using it. | ||
493 | */ | ||
494 | if (preload_fpu) | ||
495 | __math_state_restore(); | ||
496 | |||
497 | return prev_p; | 482 | return prev_p; |
498 | } | 483 | } |
499 | 484 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 82528799c5de..50267386b766 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -749,7 +749,8 @@ put: | |||
749 | /* | 749 | /* |
750 | * Handle PTRACE_POKEUSR calls for the debug register area. | 750 | * Handle PTRACE_POKEUSR calls for the debug register area. |
751 | */ | 751 | */ |
752 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | 752 | static int ptrace_set_debugreg(struct task_struct *tsk, int n, |
753 | unsigned long val) | ||
753 | { | 754 | { |
754 | struct thread_struct *thread = &(tsk->thread); | 755 | struct thread_struct *thread = &(tsk->thread); |
755 | int rc = 0; | 756 | int rc = 0; |
@@ -1391,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1391 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1392 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1392 | trace_sys_enter(regs, regs->orig_ax); | 1393 | trace_sys_enter(regs, regs->orig_ax); |
1393 | 1394 | ||
1394 | if (unlikely(current->audit_context)) { | 1395 | if (IS_IA32) |
1395 | if (IS_IA32) | 1396 | audit_syscall_entry(AUDIT_ARCH_I386, |
1396 | audit_syscall_entry(AUDIT_ARCH_I386, | 1397 | regs->orig_ax, |
1397 | regs->orig_ax, | 1398 | regs->bx, regs->cx, |
1398 | regs->bx, regs->cx, | 1399 | regs->dx, regs->si); |
1399 | regs->dx, regs->si); | ||
1400 | #ifdef CONFIG_X86_64 | 1400 | #ifdef CONFIG_X86_64 |
1401 | else | 1401 | else |
1402 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1402 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
1403 | regs->orig_ax, | 1403 | regs->orig_ax, |
1404 | regs->di, regs->si, | 1404 | regs->di, regs->si, |
1405 | regs->dx, regs->r10); | 1405 | regs->dx, regs->r10); |
1406 | #endif | 1406 | #endif |
1407 | } | ||
1408 | 1407 | ||
1409 | return ret ?: regs->orig_ax; | 1408 | return ret ?: regs->orig_ax; |
1410 | } | 1409 | } |
@@ -1413,8 +1412,7 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1413 | { | 1412 | { |
1414 | bool step; | 1413 | bool step; |
1415 | 1414 | ||
1416 | if (unlikely(current->audit_context)) | 1415 | audit_syscall_exit(regs); |
1417 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1418 | 1416 | ||
1419 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1417 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1420 | trace_sys_exit(regs, regs->ax); | 1418 | trace_sys_exit(regs, regs->ax); |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index b78643d0f9a5..03920a15a632 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -553,4 +553,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, | |||
553 | quirk_amd_nb_node); | 553 | quirk_amd_nb_node); |
554 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, | 554 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, |
555 | quirk_amd_nb_node); | 555 | quirk_amd_nb_node); |
556 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0, | ||
557 | quirk_amd_nb_node); | ||
558 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1, | ||
559 | quirk_amd_nb_node); | ||
560 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2, | ||
561 | quirk_amd_nb_node); | ||
562 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3, | ||
563 | quirk_amd_nb_node); | ||
564 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4, | ||
565 | quirk_amd_nb_node); | ||
566 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5, | ||
567 | quirk_amd_nb_node); | ||
568 | |||
556 | #endif | 569 | #endif |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e334be1182b9..d840e69a853c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -39,6 +39,14 @@ static int reboot_mode; | |||
39 | enum reboot_type reboot_type = BOOT_ACPI; | 39 | enum reboot_type reboot_type = BOOT_ACPI; |
40 | int reboot_force; | 40 | int reboot_force; |
41 | 41 | ||
42 | /* This variable is used privately to keep track of whether or not | ||
43 | * reboot_type is still set to its default value (i.e., reboot= hasn't | ||
44 | * been set on the command line). This is needed so that we can | ||
45 | * suppress DMI scanning for reboot quirks. Without it, it's | ||
46 | * impossible to override a faulty reboot quirk without recompiling. | ||
47 | */ | ||
48 | static int reboot_default = 1; | ||
49 | |||
42 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 50 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
43 | static int reboot_cpu = -1; | 51 | static int reboot_cpu = -1; |
44 | #endif | 52 | #endif |
@@ -67,6 +75,12 @@ bool port_cf9_safe = false; | |||
67 | static int __init reboot_setup(char *str) | 75 | static int __init reboot_setup(char *str) |
68 | { | 76 | { |
69 | for (;;) { | 77 | for (;;) { |
78 | /* Having anything passed on the command line via | ||
79 | * reboot= will cause us to disable DMI checking | ||
80 | * below. | ||
81 | */ | ||
82 | reboot_default = 0; | ||
83 | |||
70 | switch (*str) { | 84 | switch (*str) { |
71 | case 'w': | 85 | case 'w': |
72 | reboot_mode = 0x1234; | 86 | reboot_mode = 0x1234; |
@@ -124,7 +138,7 @@ __setup("reboot=", reboot_setup); | |||
124 | */ | 138 | */ |
125 | 139 | ||
126 | /* | 140 | /* |
127 | * Some machines require the "reboot=b" commandline option, | 141 | * Some machines require the "reboot=b" or "reboot=k" commandline options, |
128 | * this quirk makes that automatic. | 142 | * this quirk makes that automatic. |
129 | */ | 143 | */ |
130 | static int __init set_bios_reboot(const struct dmi_system_id *d) | 144 | static int __init set_bios_reboot(const struct dmi_system_id *d) |
@@ -136,6 +150,15 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) | |||
136 | return 0; | 150 | return 0; |
137 | } | 151 | } |
138 | 152 | ||
153 | static int __init set_kbd_reboot(const struct dmi_system_id *d) | ||
154 | { | ||
155 | if (reboot_type != BOOT_KBD) { | ||
156 | reboot_type = BOOT_KBD; | ||
157 | printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); | ||
158 | } | ||
159 | return 0; | ||
160 | } | ||
161 | |||
139 | static struct dmi_system_id __initdata reboot_dmi_table[] = { | 162 | static struct dmi_system_id __initdata reboot_dmi_table[] = { |
140 | { /* Handle problems with rebooting on Dell E520's */ | 163 | { /* Handle problems with rebooting on Dell E520's */ |
141 | .callback = set_bios_reboot, | 164 | .callback = set_bios_reboot, |
@@ -286,16 +309,8 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
286 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), | 309 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), |
287 | }, | 310 | }, |
288 | }, | 311 | }, |
289 | { /* Handle problems with rebooting on VersaLogic Menlow boards */ | ||
290 | .callback = set_bios_reboot, | ||
291 | .ident = "VersaLogic Menlow based board", | ||
292 | .matches = { | ||
293 | DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"), | ||
294 | DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), | ||
295 | }, | ||
296 | }, | ||
297 | { /* Handle reboot issue on Acer Aspire one */ | 312 | { /* Handle reboot issue on Acer Aspire one */ |
298 | .callback = set_bios_reboot, | 313 | .callback = set_kbd_reboot, |
299 | .ident = "Acer Aspire One A110", | 314 | .ident = "Acer Aspire One A110", |
300 | .matches = { | 315 | .matches = { |
301 | DMI_MATCH(DMI_SYS_VENDOR, "Acer"), | 316 | DMI_MATCH(DMI_SYS_VENDOR, "Acer"), |
@@ -307,7 +322,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
307 | 322 | ||
308 | static int __init reboot_init(void) | 323 | static int __init reboot_init(void) |
309 | { | 324 | { |
310 | dmi_check_system(reboot_dmi_table); | 325 | /* Only do the DMI check if reboot_type hasn't been overridden |
326 | * on the command line | ||
327 | */ | ||
328 | if (reboot_default) { | ||
329 | dmi_check_system(reboot_dmi_table); | ||
330 | } | ||
311 | return 0; | 331 | return 0; |
312 | } | 332 | } |
313 | core_initcall(reboot_init); | 333 | core_initcall(reboot_init); |
@@ -443,12 +463,25 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
443 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), | 463 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), |
444 | }, | 464 | }, |
445 | }, | 465 | }, |
466 | { /* Handle problems with rebooting on the OptiPlex 990. */ | ||
467 | .callback = set_pci_reboot, | ||
468 | .ident = "Dell OptiPlex 990", | ||
469 | .matches = { | ||
470 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
471 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), | ||
472 | }, | ||
473 | }, | ||
446 | { } | 474 | { } |
447 | }; | 475 | }; |
448 | 476 | ||
449 | static int __init pci_reboot_init(void) | 477 | static int __init pci_reboot_init(void) |
450 | { | 478 | { |
451 | dmi_check_system(pci_reboot_dmi_table); | 479 | /* Only do the DMI check if reboot_type hasn't been overridden |
480 | * on the command line | ||
481 | */ | ||
482 | if (reboot_default) { | ||
483 | dmi_check_system(pci_reboot_dmi_table); | ||
484 | } | ||
452 | return 0; | 485 | return 0; |
453 | } | 486 | } |
454 | core_initcall(pci_reboot_init); | 487 | core_initcall(pci_reboot_init); |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 348ce016a835..af6db6ec5b2a 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/vsyscall.h> | 12 | #include <asm/vsyscall.h> |
13 | #include <asm/x86_init.h> | 13 | #include <asm/x86_init.h> |
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/mrst.h> | ||
15 | 16 | ||
16 | #ifdef CONFIG_X86_32 | 17 | #ifdef CONFIG_X86_32 |
17 | /* | 18 | /* |
@@ -242,6 +243,10 @@ static __init int add_rtc_cmos(void) | |||
242 | if (of_have_populated_dt()) | 243 | if (of_have_populated_dt()) |
243 | return 0; | 244 | return 0; |
244 | 245 | ||
246 | /* Intel MID platforms don't have ioport rtc */ | ||
247 | if (mrst_identify_cpu()) | ||
248 | return -ENODEV; | ||
249 | |||
245 | platform_device_register(&rtc_device); | 250 | platform_device_register(&rtc_device); |
246 | dev_info(&rtc_device.dev, | 251 | dev_info(&rtc_device.dev, |
247 | "registered platform RTC device (no PNP device found)\n"); | 252 | "registered platform RTC device (no PNP device found)\n"); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cf0ef986cb6d..d7d5099fe874 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void) | |||
306 | static void __init reserve_brk(void) | 306 | static void __init reserve_brk(void) |
307 | { | 307 | { |
308 | if (_brk_end > _brk_start) | 308 | if (_brk_end > _brk_start) |
309 | memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); | 309 | memblock_reserve(__pa(_brk_start), |
310 | __pa(_brk_end) - __pa(_brk_start)); | ||
310 | 311 | ||
311 | /* Mark brk area as locked down and no longer taking any | 312 | /* Mark brk area as locked down and no longer taking any |
312 | new allocations */ | 313 | new allocations */ |
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void) | |||
331 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, | 332 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, |
332 | PAGE_SIZE); | 333 | PAGE_SIZE); |
333 | 334 | ||
334 | if (ramdisk_here == MEMBLOCK_ERROR) | 335 | if (!ramdisk_here) |
335 | panic("Cannot find place for new RAMDISK of size %lld\n", | 336 | panic("Cannot find place for new RAMDISK of size %lld\n", |
336 | ramdisk_size); | 337 | ramdisk_size); |
337 | 338 | ||
338 | /* Note: this includes all the lowmem currently occupied by | 339 | /* Note: this includes all the lowmem currently occupied by |
339 | the initrd, we rely on that fact to keep the data intact. */ | 340 | the initrd, we rely on that fact to keep the data intact. */ |
340 | memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); | 341 | memblock_reserve(ramdisk_here, area_size); |
341 | initrd_start = ramdisk_here + PAGE_OFFSET; | 342 | initrd_start = ramdisk_here + PAGE_OFFSET; |
342 | initrd_end = initrd_start + ramdisk_size; | 343 | initrd_end = initrd_start + ramdisk_size; |
343 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | 344 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", |
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void) | |||
393 | initrd_start = 0; | 394 | initrd_start = 0; |
394 | 395 | ||
395 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 396 | if (ramdisk_size >= (end_of_lowmem>>1)) { |
396 | memblock_x86_free_range(ramdisk_image, ramdisk_end); | 397 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
397 | printk(KERN_ERR "initrd too large to handle, " | 398 | printk(KERN_ERR "initrd too large to handle, " |
398 | "disabling initrd\n"); | 399 | "disabling initrd\n"); |
399 | return; | 400 | return; |
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void) | |||
416 | 417 | ||
417 | relocate_initrd(); | 418 | relocate_initrd(); |
418 | 419 | ||
419 | memblock_x86_free_range(ramdisk_image, ramdisk_end); | 420 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
420 | } | 421 | } |
421 | #else | 422 | #else |
422 | static void __init reserve_initrd(void) | 423 | static void __init reserve_initrd(void) |
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
490 | { | 491 | { |
491 | struct setup_data *data; | 492 | struct setup_data *data; |
492 | u64 pa_data; | 493 | u64 pa_data; |
493 | char buf[32]; | ||
494 | 494 | ||
495 | if (boot_params.hdr.version < 0x0209) | 495 | if (boot_params.hdr.version < 0x0209) |
496 | return; | 496 | return; |
497 | pa_data = boot_params.hdr.setup_data; | 497 | pa_data = boot_params.hdr.setup_data; |
498 | while (pa_data) { | 498 | while (pa_data) { |
499 | data = early_memremap(pa_data, sizeof(*data)); | 499 | data = early_memremap(pa_data, sizeof(*data)); |
500 | sprintf(buf, "setup data %x", data->type); | 500 | memblock_reserve(pa_data, sizeof(*data) + data->len); |
501 | memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
502 | pa_data = data->next; | 501 | pa_data = data->next; |
503 | early_iounmap(data, sizeof(*data)); | 502 | early_iounmap(data, sizeof(*data)); |
504 | } | 503 | } |
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void) | |||
554 | crash_base = memblock_find_in_range(alignment, | 553 | crash_base = memblock_find_in_range(alignment, |
555 | CRASH_KERNEL_ADDR_MAX, crash_size, alignment); | 554 | CRASH_KERNEL_ADDR_MAX, crash_size, alignment); |
556 | 555 | ||
557 | if (crash_base == MEMBLOCK_ERROR) { | 556 | if (!crash_base) { |
558 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 557 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
559 | return; | 558 | return; |
560 | } | 559 | } |
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void) | |||
568 | return; | 567 | return; |
569 | } | 568 | } |
570 | } | 569 | } |
571 | memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); | 570 | memblock_reserve(crash_base, crash_size); |
572 | 571 | ||
573 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 572 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
574 | "for crashkernel (System RAM: %ldMB)\n", | 573 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void) | |||
626 | addr = find_ibft_region(&size); | 625 | addr = find_ibft_region(&size); |
627 | 626 | ||
628 | if (size) | 627 | if (size) |
629 | memblock_x86_reserve_range(addr, addr + size, "* ibft"); | 628 | memblock_reserve(addr, size); |
630 | } | 629 | } |
631 | 630 | ||
632 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | 631 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
@@ -750,12 +749,7 @@ void __init setup_arch(char **cmdline_p) | |||
750 | #endif | 749 | #endif |
751 | #ifdef CONFIG_EFI | 750 | #ifdef CONFIG_EFI |
752 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 751 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
753 | #ifdef CONFIG_X86_32 | 752 | EFI_LOADER_SIGNATURE, 4)) { |
754 | "EL32", | ||
755 | #else | ||
756 | "EL64", | ||
757 | #endif | ||
758 | 4)) { | ||
759 | efi_enabled = 1; | 753 | efi_enabled = 1; |
760 | efi_memblock_x86_reserve_range(); | 754 | efi_memblock_x86_reserve_range(); |
761 | } | 755 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 54ddaeb221c1..46a01bdc27e2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -682,7 +682,6 @@ static int | |||
682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
683 | struct pt_regs *regs) | 683 | struct pt_regs *regs) |
684 | { | 684 | { |
685 | sigset_t blocked; | ||
686 | int ret; | 685 | int ret; |
687 | 686 | ||
688 | /* Are we from a system call? */ | 687 | /* Are we from a system call? */ |
@@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
733 | */ | 732 | */ |
734 | regs->flags &= ~X86_EFLAGS_TF; | 733 | regs->flags &= ~X86_EFLAGS_TF; |
735 | 734 | ||
736 | sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); | 735 | block_sigmask(ka, sig); |
737 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
738 | sigaddset(&blocked, sig); | ||
739 | set_current_blocked(&blocked); | ||
740 | 736 | ||
741 | tracehook_signal_handler(sig, info, ka, regs, | 737 | tracehook_signal_handler(sig, info, ka, regs, |
742 | test_thread_flag(TIF_SINGLESTEP)); | 738 | test_thread_flag(TIF_SINGLESTEP)); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 16204dc15484..66c74f481cab 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/mmu_context.h> | 29 | #include <asm/mmu_context.h> |
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/nmi.h> | ||
32 | /* | 33 | /* |
33 | * Some notes on x86 processor bugs affecting SMP operation: | 34 | * Some notes on x86 processor bugs affecting SMP operation: |
34 | * | 35 | * |
@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
148 | free_cpumask_var(allbutself); | 149 | free_cpumask_var(allbutself); |
149 | } | 150 | } |
150 | 151 | ||
152 | static atomic_t stopping_cpu = ATOMIC_INIT(-1); | ||
153 | |||
154 | static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) | ||
155 | { | ||
156 | /* We are registered on stopping cpu too, avoid spurious NMI */ | ||
157 | if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) | ||
158 | return NMI_HANDLED; | ||
159 | |||
160 | stop_this_cpu(NULL); | ||
161 | |||
162 | return NMI_HANDLED; | ||
163 | } | ||
164 | |||
165 | static void native_nmi_stop_other_cpus(int wait) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | unsigned long timeout; | ||
169 | |||
170 | if (reboot_force) | ||
171 | return; | ||
172 | |||
173 | /* | ||
174 | * Use an own vector here because smp_call_function | ||
175 | * does lots of things not suitable in a panic situation. | ||
176 | */ | ||
177 | if (num_online_cpus() > 1) { | ||
178 | /* did someone beat us here? */ | ||
179 | if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) | ||
180 | return; | ||
181 | |||
182 | if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, | ||
183 | NMI_FLAG_FIRST, "smp_stop")) | ||
184 | /* Note: we ignore failures here */ | ||
185 | return; | ||
186 | |||
187 | /* sync above data before sending NMI */ | ||
188 | wmb(); | ||
189 | |||
190 | apic->send_IPI_allbutself(NMI_VECTOR); | ||
191 | |||
192 | /* | ||
193 | * Don't wait longer than a second if the caller | ||
194 | * didn't ask us to wait. | ||
195 | */ | ||
196 | timeout = USEC_PER_SEC; | ||
197 | while (num_online_cpus() > 1 && (wait || timeout--)) | ||
198 | udelay(1); | ||
199 | } | ||
200 | |||
201 | local_irq_save(flags); | ||
202 | disable_local_APIC(); | ||
203 | local_irq_restore(flags); | ||
204 | } | ||
205 | |||
151 | /* | 206 | /* |
152 | * this function calls the 'stop' function on all other CPUs in the system. | 207 | * this function calls the 'stop' function on all other CPUs in the system. |
153 | */ | 208 | */ |
@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void) | |||
160 | irq_exit(); | 215 | irq_exit(); |
161 | } | 216 | } |
162 | 217 | ||
163 | static void native_stop_other_cpus(int wait) | 218 | static void native_irq_stop_other_cpus(int wait) |
164 | { | 219 | { |
165 | unsigned long flags; | 220 | unsigned long flags; |
166 | unsigned long timeout; | 221 | unsigned long timeout; |
@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait) | |||
194 | local_irq_restore(flags); | 249 | local_irq_restore(flags); |
195 | } | 250 | } |
196 | 251 | ||
252 | static void native_smp_disable_nmi_ipi(void) | ||
253 | { | ||
254 | smp_ops.stop_other_cpus = native_irq_stop_other_cpus; | ||
255 | } | ||
256 | |||
197 | /* | 257 | /* |
198 | * Reschedule call back. | 258 | * Reschedule call back. |
199 | */ | 259 | */ |
@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
225 | irq_exit(); | 285 | irq_exit(); |
226 | } | 286 | } |
227 | 287 | ||
288 | static int __init nonmi_ipi_setup(char *str) | ||
289 | { | ||
290 | native_smp_disable_nmi_ipi(); | ||
291 | return 1; | ||
292 | } | ||
293 | |||
294 | __setup("nonmi_ipi", nonmi_ipi_setup); | ||
295 | |||
228 | struct smp_ops smp_ops = { | 296 | struct smp_ops smp_ops = { |
229 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 297 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
230 | .smp_prepare_cpus = native_smp_prepare_cpus, | 298 | .smp_prepare_cpus = native_smp_prepare_cpus, |
231 | .smp_cpus_done = native_smp_cpus_done, | 299 | .smp_cpus_done = native_smp_cpus_done, |
232 | 300 | ||
233 | .stop_other_cpus = native_stop_other_cpus, | 301 | .stop_other_cpus = native_nmi_stop_other_cpus, |
234 | .smp_send_reschedule = native_smp_send_reschedule, | 302 | .smp_send_reschedule = native_smp_send_reschedule, |
235 | 303 | ||
236 | .cpu_up = native_cpu_up, | 304 | .cpu_up = native_cpu_up, |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9f548cb4a958..66d250c00d11 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -207,23 +207,29 @@ static void __cpuinit smp_callin(void) | |||
207 | * Need to setup vector mappings before we enable interrupts. | 207 | * Need to setup vector mappings before we enable interrupts. |
208 | */ | 208 | */ |
209 | setup_vector_irq(smp_processor_id()); | 209 | setup_vector_irq(smp_processor_id()); |
210 | |||
211 | /* | ||
212 | * Save our processor parameters. Note: this information | ||
213 | * is needed for clock calibration. | ||
214 | */ | ||
215 | smp_store_cpu_info(cpuid); | ||
216 | |||
210 | /* | 217 | /* |
211 | * Get our bogomips. | 218 | * Get our bogomips. |
219 | * Update loops_per_jiffy in cpu_data. Previous call to | ||
220 | * smp_store_cpu_info() stored a value that is close but not as | ||
221 | * accurate as the value just calculated. | ||
212 | * | 222 | * |
213 | * Need to enable IRQs because it can take longer and then | 223 | * Need to enable IRQs because it can take longer and then |
214 | * the NMI watchdog might kill us. | 224 | * the NMI watchdog might kill us. |
215 | */ | 225 | */ |
216 | local_irq_enable(); | 226 | local_irq_enable(); |
217 | calibrate_delay(); | 227 | calibrate_delay(); |
228 | cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; | ||
218 | local_irq_disable(); | 229 | local_irq_disable(); |
219 | pr_debug("Stack at about %p\n", &cpuid); | 230 | pr_debug("Stack at about %p\n", &cpuid); |
220 | 231 | ||
221 | /* | 232 | /* |
222 | * Save our processor parameters | ||
223 | */ | ||
224 | smp_store_cpu_info(cpuid); | ||
225 | |||
226 | /* | ||
227 | * This must be done before setting cpu_online_mask | 233 | * This must be done before setting cpu_online_mask |
228 | * or calling notify_cpu_starting. | 234 | * or calling notify_cpu_starting. |
229 | */ | 235 | */ |
@@ -840,7 +846,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
840 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 846 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
841 | 847 | ||
842 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 848 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
843 | !physid_isset(apicid, phys_cpu_present_map)) { | 849 | !physid_isset(apicid, phys_cpu_present_map) || |
850 | (!x2apic_mode && apicid >= 255)) { | ||
844 | printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); | 851 | printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); |
845 | return -EINVAL; | 852 | return -EINVAL; |
846 | } | 853 | } |
@@ -1142,6 +1149,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1142 | { | 1149 | { |
1143 | pr_debug("Boot done.\n"); | 1150 | pr_debug("Boot done.\n"); |
1144 | 1151 | ||
1152 | nmi_selftest(); | ||
1145 | impress_friends(); | 1153 | impress_friends(); |
1146 | #ifdef CONFIG_X86_IO_APIC | 1154 | #ifdef CONFIG_X86_IO_APIC |
1147 | setup_ioapic_dest(); | 1155 | setup_ioapic_dest(); |
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c new file mode 100644 index 000000000000..147fcd4941c4 --- /dev/null +++ b/arch/x86/kernel/syscall_32.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* System call table for i386. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <linux/sys.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; | ||
9 | #include <asm/syscalls_32.h> | ||
10 | #undef __SYSCALL_I386 | ||
11 | |||
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = sym, | ||
13 | |||
14 | typedef asmlinkage void (*sys_call_ptr_t)(void); | ||
15 | |||
16 | extern asmlinkage void sys_ni_syscall(void); | ||
17 | |||
18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | ||
19 | /* | ||
20 | * Smells like a compiler bug -- it doesn't work | ||
21 | * when the & below is removed. | ||
22 | */ | ||
23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | ||
24 | #include <asm/syscalls_32.h> | ||
25 | }; | ||
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index de87d6008295..7ac7943be02c 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c | |||
@@ -5,15 +5,11 @@ | |||
5 | #include <linux/cache.h> | 5 | #include <linux/cache.h> |
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | 7 | ||
8 | #define __NO_STUBS | 8 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; |
9 | #include <asm/syscalls_64.h> | ||
10 | #undef __SYSCALL_64 | ||
9 | 11 | ||
10 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; | 12 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, |
11 | #undef _ASM_X86_UNISTD_64_H | ||
12 | #include <asm/unistd_64.h> | ||
13 | |||
14 | #undef __SYSCALL | ||
15 | #define __SYSCALL(nr, sym) [nr] = sym, | ||
16 | #undef _ASM_X86_UNISTD_64_H | ||
17 | 13 | ||
18 | typedef void (*sys_call_ptr_t)(void); | 14 | typedef void (*sys_call_ptr_t)(void); |
19 | 15 | ||
@@ -21,9 +17,9 @@ extern void sys_ni_syscall(void); | |||
21 | 17 | ||
22 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | 18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { |
23 | /* | 19 | /* |
24 | *Smells like a like a compiler bug -- it doesn't work | 20 | * Smells like a compiler bug -- it doesn't work |
25 | *when the & below is removed. | 21 | * when the & below is removed. |
26 | */ | 22 | */ |
27 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | 23 | [0 ... __NR_syscall_max] = &sys_ni_syscall, |
28 | #include <asm/unistd_64.h> | 24 | #include <asm/syscalls_64.h> |
29 | }; | 25 | }; |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S deleted file mode 100644 index 9a0e31293920..000000000000 --- a/arch/x86/kernel/syscall_table_32.S +++ /dev/null | |||
@@ -1,350 +0,0 @@ | |||
1 | ENTRY(sys_call_table) | ||
2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | ||
3 | .long sys_exit | ||
4 | .long ptregs_fork | ||
5 | .long sys_read | ||
6 | .long sys_write | ||
7 | .long sys_open /* 5 */ | ||
8 | .long sys_close | ||
9 | .long sys_waitpid | ||
10 | .long sys_creat | ||
11 | .long sys_link | ||
12 | .long sys_unlink /* 10 */ | ||
13 | .long ptregs_execve | ||
14 | .long sys_chdir | ||
15 | .long sys_time | ||
16 | .long sys_mknod | ||
17 | .long sys_chmod /* 15 */ | ||
18 | .long sys_lchown16 | ||
19 | .long sys_ni_syscall /* old break syscall holder */ | ||
20 | .long sys_stat | ||
21 | .long sys_lseek | ||
22 | .long sys_getpid /* 20 */ | ||
23 | .long sys_mount | ||
24 | .long sys_oldumount | ||
25 | .long sys_setuid16 | ||
26 | .long sys_getuid16 | ||
27 | .long sys_stime /* 25 */ | ||
28 | .long sys_ptrace | ||
29 | .long sys_alarm | ||
30 | .long sys_fstat | ||
31 | .long sys_pause | ||
32 | .long sys_utime /* 30 */ | ||
33 | .long sys_ni_syscall /* old stty syscall holder */ | ||
34 | .long sys_ni_syscall /* old gtty syscall holder */ | ||
35 | .long sys_access | ||
36 | .long sys_nice | ||
37 | .long sys_ni_syscall /* 35 - old ftime syscall holder */ | ||
38 | .long sys_sync | ||
39 | .long sys_kill | ||
40 | .long sys_rename | ||
41 | .long sys_mkdir | ||
42 | .long sys_rmdir /* 40 */ | ||
43 | .long sys_dup | ||
44 | .long sys_pipe | ||
45 | .long sys_times | ||
46 | .long sys_ni_syscall /* old prof syscall holder */ | ||
47 | .long sys_brk /* 45 */ | ||
48 | .long sys_setgid16 | ||
49 | .long sys_getgid16 | ||
50 | .long sys_signal | ||
51 | .long sys_geteuid16 | ||
52 | .long sys_getegid16 /* 50 */ | ||
53 | .long sys_acct | ||
54 | .long sys_umount /* recycled never used phys() */ | ||
55 | .long sys_ni_syscall /* old lock syscall holder */ | ||
56 | .long sys_ioctl | ||
57 | .long sys_fcntl /* 55 */ | ||
58 | .long sys_ni_syscall /* old mpx syscall holder */ | ||
59 | .long sys_setpgid | ||
60 | .long sys_ni_syscall /* old ulimit syscall holder */ | ||
61 | .long sys_olduname | ||
62 | .long sys_umask /* 60 */ | ||
63 | .long sys_chroot | ||
64 | .long sys_ustat | ||
65 | .long sys_dup2 | ||
66 | .long sys_getppid | ||
67 | .long sys_getpgrp /* 65 */ | ||
68 | .long sys_setsid | ||
69 | .long sys_sigaction | ||
70 | .long sys_sgetmask | ||
71 | .long sys_ssetmask | ||
72 | .long sys_setreuid16 /* 70 */ | ||
73 | .long sys_setregid16 | ||
74 | .long sys_sigsuspend | ||
75 | .long sys_sigpending | ||
76 | .long sys_sethostname | ||
77 | .long sys_setrlimit /* 75 */ | ||
78 | .long sys_old_getrlimit | ||
79 | .long sys_getrusage | ||
80 | .long sys_gettimeofday | ||
81 | .long sys_settimeofday | ||
82 | .long sys_getgroups16 /* 80 */ | ||
83 | .long sys_setgroups16 | ||
84 | .long sys_old_select | ||
85 | .long sys_symlink | ||
86 | .long sys_lstat | ||
87 | .long sys_readlink /* 85 */ | ||
88 | .long sys_uselib | ||
89 | .long sys_swapon | ||
90 | .long sys_reboot | ||
91 | .long sys_old_readdir | ||
92 | .long sys_old_mmap /* 90 */ | ||
93 | .long sys_munmap | ||
94 | .long sys_truncate | ||
95 | .long sys_ftruncate | ||
96 | .long sys_fchmod | ||
97 | .long sys_fchown16 /* 95 */ | ||
98 | .long sys_getpriority | ||
99 | .long sys_setpriority | ||
100 | .long sys_ni_syscall /* old profil syscall holder */ | ||
101 | .long sys_statfs | ||
102 | .long sys_fstatfs /* 100 */ | ||
103 | .long sys_ioperm | ||
104 | .long sys_socketcall | ||
105 | .long sys_syslog | ||
106 | .long sys_setitimer | ||
107 | .long sys_getitimer /* 105 */ | ||
108 | .long sys_newstat | ||
109 | .long sys_newlstat | ||
110 | .long sys_newfstat | ||
111 | .long sys_uname | ||
112 | .long ptregs_iopl /* 110 */ | ||
113 | .long sys_vhangup | ||
114 | .long sys_ni_syscall /* old "idle" system call */ | ||
115 | .long ptregs_vm86old | ||
116 | .long sys_wait4 | ||
117 | .long sys_swapoff /* 115 */ | ||
118 | .long sys_sysinfo | ||
119 | .long sys_ipc | ||
120 | .long sys_fsync | ||
121 | .long ptregs_sigreturn | ||
122 | .long ptregs_clone /* 120 */ | ||
123 | .long sys_setdomainname | ||
124 | .long sys_newuname | ||
125 | .long sys_modify_ldt | ||
126 | .long sys_adjtimex | ||
127 | .long sys_mprotect /* 125 */ | ||
128 | .long sys_sigprocmask | ||
129 | .long sys_ni_syscall /* old "create_module" */ | ||
130 | .long sys_init_module | ||
131 | .long sys_delete_module | ||
132 | .long sys_ni_syscall /* 130: old "get_kernel_syms" */ | ||
133 | .long sys_quotactl | ||
134 | .long sys_getpgid | ||
135 | .long sys_fchdir | ||
136 | .long sys_bdflush | ||
137 | .long sys_sysfs /* 135 */ | ||
138 | .long sys_personality | ||
139 | .long sys_ni_syscall /* reserved for afs_syscall */ | ||
140 | .long sys_setfsuid16 | ||
141 | .long sys_setfsgid16 | ||
142 | .long sys_llseek /* 140 */ | ||
143 | .long sys_getdents | ||
144 | .long sys_select | ||
145 | .long sys_flock | ||
146 | .long sys_msync | ||
147 | .long sys_readv /* 145 */ | ||
148 | .long sys_writev | ||
149 | .long sys_getsid | ||
150 | .long sys_fdatasync | ||
151 | .long sys_sysctl | ||
152 | .long sys_mlock /* 150 */ | ||
153 | .long sys_munlock | ||
154 | .long sys_mlockall | ||
155 | .long sys_munlockall | ||
156 | .long sys_sched_setparam | ||
157 | .long sys_sched_getparam /* 155 */ | ||
158 | .long sys_sched_setscheduler | ||
159 | .long sys_sched_getscheduler | ||
160 | .long sys_sched_yield | ||
161 | .long sys_sched_get_priority_max | ||
162 | .long sys_sched_get_priority_min /* 160 */ | ||
163 | .long sys_sched_rr_get_interval | ||
164 | .long sys_nanosleep | ||
165 | .long sys_mremap | ||
166 | .long sys_setresuid16 | ||
167 | .long sys_getresuid16 /* 165 */ | ||
168 | .long ptregs_vm86 | ||
169 | .long sys_ni_syscall /* Old sys_query_module */ | ||
170 | .long sys_poll | ||
171 | .long sys_ni_syscall /* Old nfsservctl */ | ||
172 | .long sys_setresgid16 /* 170 */ | ||
173 | .long sys_getresgid16 | ||
174 | .long sys_prctl | ||
175 | .long ptregs_rt_sigreturn | ||
176 | .long sys_rt_sigaction | ||
177 | .long sys_rt_sigprocmask /* 175 */ | ||
178 | .long sys_rt_sigpending | ||
179 | .long sys_rt_sigtimedwait | ||
180 | .long sys_rt_sigqueueinfo | ||
181 | .long sys_rt_sigsuspend | ||
182 | .long sys_pread64 /* 180 */ | ||
183 | .long sys_pwrite64 | ||
184 | .long sys_chown16 | ||
185 | .long sys_getcwd | ||
186 | .long sys_capget | ||
187 | .long sys_capset /* 185 */ | ||
188 | .long ptregs_sigaltstack | ||
189 | .long sys_sendfile | ||
190 | .long sys_ni_syscall /* reserved for streams1 */ | ||
191 | .long sys_ni_syscall /* reserved for streams2 */ | ||
192 | .long ptregs_vfork /* 190 */ | ||
193 | .long sys_getrlimit | ||
194 | .long sys_mmap_pgoff | ||
195 | .long sys_truncate64 | ||
196 | .long sys_ftruncate64 | ||
197 | .long sys_stat64 /* 195 */ | ||
198 | .long sys_lstat64 | ||
199 | .long sys_fstat64 | ||
200 | .long sys_lchown | ||
201 | .long sys_getuid | ||
202 | .long sys_getgid /* 200 */ | ||
203 | .long sys_geteuid | ||
204 | .long sys_getegid | ||
205 | .long sys_setreuid | ||
206 | .long sys_setregid | ||
207 | .long sys_getgroups /* 205 */ | ||
208 | .long sys_setgroups | ||
209 | .long sys_fchown | ||
210 | .long sys_setresuid | ||
211 | .long sys_getresuid | ||
212 | .long sys_setresgid /* 210 */ | ||
213 | .long sys_getresgid | ||
214 | .long sys_chown | ||
215 | .long sys_setuid | ||
216 | .long sys_setgid | ||
217 | .long sys_setfsuid /* 215 */ | ||
218 | .long sys_setfsgid | ||
219 | .long sys_pivot_root | ||
220 | .long sys_mincore | ||
221 | .long sys_madvise | ||
222 | .long sys_getdents64 /* 220 */ | ||
223 | .long sys_fcntl64 | ||
224 | .long sys_ni_syscall /* reserved for TUX */ | ||
225 | .long sys_ni_syscall | ||
226 | .long sys_gettid | ||
227 | .long sys_readahead /* 225 */ | ||
228 | .long sys_setxattr | ||
229 | .long sys_lsetxattr | ||
230 | .long sys_fsetxattr | ||
231 | .long sys_getxattr | ||
232 | .long sys_lgetxattr /* 230 */ | ||
233 | .long sys_fgetxattr | ||
234 | .long sys_listxattr | ||
235 | .long sys_llistxattr | ||
236 | .long sys_flistxattr | ||
237 | .long sys_removexattr /* 235 */ | ||
238 | .long sys_lremovexattr | ||
239 | .long sys_fremovexattr | ||
240 | .long sys_tkill | ||
241 | .long sys_sendfile64 | ||
242 | .long sys_futex /* 240 */ | ||
243 | .long sys_sched_setaffinity | ||
244 | .long sys_sched_getaffinity | ||
245 | .long sys_set_thread_area | ||
246 | .long sys_get_thread_area | ||
247 | .long sys_io_setup /* 245 */ | ||
248 | .long sys_io_destroy | ||
249 | .long sys_io_getevents | ||
250 | .long sys_io_submit | ||
251 | .long sys_io_cancel | ||
252 | .long sys_fadvise64 /* 250 */ | ||
253 | .long sys_ni_syscall | ||
254 | .long sys_exit_group | ||
255 | .long sys_lookup_dcookie | ||
256 | .long sys_epoll_create | ||
257 | .long sys_epoll_ctl /* 255 */ | ||
258 | .long sys_epoll_wait | ||
259 | .long sys_remap_file_pages | ||
260 | .long sys_set_tid_address | ||
261 | .long sys_timer_create | ||
262 | .long sys_timer_settime /* 260 */ | ||
263 | .long sys_timer_gettime | ||
264 | .long sys_timer_getoverrun | ||
265 | .long sys_timer_delete | ||
266 | .long sys_clock_settime | ||
267 | .long sys_clock_gettime /* 265 */ | ||
268 | .long sys_clock_getres | ||
269 | .long sys_clock_nanosleep | ||
270 | .long sys_statfs64 | ||
271 | .long sys_fstatfs64 | ||
272 | .long sys_tgkill /* 270 */ | ||
273 | .long sys_utimes | ||
274 | .long sys_fadvise64_64 | ||
275 | .long sys_ni_syscall /* sys_vserver */ | ||
276 | .long sys_mbind | ||
277 | .long sys_get_mempolicy | ||
278 | .long sys_set_mempolicy | ||
279 | .long sys_mq_open | ||
280 | .long sys_mq_unlink | ||
281 | .long sys_mq_timedsend | ||
282 | .long sys_mq_timedreceive /* 280 */ | ||
283 | .long sys_mq_notify | ||
284 | .long sys_mq_getsetattr | ||
285 | .long sys_kexec_load | ||
286 | .long sys_waitid | ||
287 | .long sys_ni_syscall /* 285 */ /* available */ | ||
288 | .long sys_add_key | ||
289 | .long sys_request_key | ||
290 | .long sys_keyctl | ||
291 | .long sys_ioprio_set | ||
292 | .long sys_ioprio_get /* 290 */ | ||
293 | .long sys_inotify_init | ||
294 | .long sys_inotify_add_watch | ||
295 | .long sys_inotify_rm_watch | ||
296 | .long sys_migrate_pages | ||
297 | .long sys_openat /* 295 */ | ||
298 | .long sys_mkdirat | ||
299 | .long sys_mknodat | ||
300 | .long sys_fchownat | ||
301 | .long sys_futimesat | ||
302 | .long sys_fstatat64 /* 300 */ | ||
303 | .long sys_unlinkat | ||
304 | .long sys_renameat | ||
305 | .long sys_linkat | ||
306 | .long sys_symlinkat | ||
307 | .long sys_readlinkat /* 305 */ | ||
308 | .long sys_fchmodat | ||
309 | .long sys_faccessat | ||
310 | .long sys_pselect6 | ||
311 | .long sys_ppoll | ||
312 | .long sys_unshare /* 310 */ | ||
313 | .long sys_set_robust_list | ||
314 | .long sys_get_robust_list | ||
315 | .long sys_splice | ||
316 | .long sys_sync_file_range | ||
317 | .long sys_tee /* 315 */ | ||
318 | .long sys_vmsplice | ||
319 | .long sys_move_pages | ||
320 | .long sys_getcpu | ||
321 | .long sys_epoll_pwait | ||
322 | .long sys_utimensat /* 320 */ | ||
323 | .long sys_signalfd | ||
324 | .long sys_timerfd_create | ||
325 | .long sys_eventfd | ||
326 | .long sys_fallocate | ||
327 | .long sys_timerfd_settime /* 325 */ | ||
328 | .long sys_timerfd_gettime | ||
329 | .long sys_signalfd4 | ||
330 | .long sys_eventfd2 | ||
331 | .long sys_epoll_create1 | ||
332 | .long sys_dup3 /* 330 */ | ||
333 | .long sys_pipe2 | ||
334 | .long sys_inotify_init1 | ||
335 | .long sys_preadv | ||
336 | .long sys_pwritev | ||
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | ||
338 | .long sys_perf_event_open | ||
339 | .long sys_recvmmsg | ||
340 | .long sys_fanotify_init | ||
341 | .long sys_fanotify_mark | ||
342 | .long sys_prlimit64 /* 340 */ | ||
343 | .long sys_name_to_handle_at | ||
344 | .long sys_open_by_handle_at | ||
345 | .long sys_clock_adjtime | ||
346 | .long sys_syncfs | ||
347 | .long sys_sendmmsg /* 345 */ | ||
348 | .long sys_setns | ||
349 | .long sys_process_vm_readv | ||
350 | .long sys_process_vm_writev | ||
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a91ae7709b49..a73b61055ad6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -14,11 +14,11 @@ void __init setup_trampolines(void) | |||
14 | 14 | ||
15 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 15 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | 16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); |
17 | if (mem == MEMBLOCK_ERROR) | 17 | if (!mem) |
18 | panic("Cannot allocate trampoline\n"); | 18 | panic("Cannot allocate trampoline\n"); |
19 | 19 | ||
20 | x86_trampoline_base = __va(mem); | 20 | x86_trampoline_base = __va(mem); |
21 | memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); | 21 | memblock_reserve(mem, size); |
22 | 22 | ||
23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | 23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", |
24 | x86_trampoline_base, (unsigned long long)mem, size); | 24 | x86_trampoline_base, (unsigned long long)mem, size); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a8e3eb83466c..4bbe04d96744 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -306,19 +306,20 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
306 | == NOTIFY_STOP) | 306 | == NOTIFY_STOP) |
307 | return; | 307 | return; |
308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
309 | #ifdef CONFIG_KPROBES | 309 | |
310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
311 | == NOTIFY_STOP) | 311 | == NOTIFY_STOP) |
312 | return; | 312 | return; |
313 | #else | ||
314 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) | ||
315 | == NOTIFY_STOP) | ||
316 | return; | ||
317 | #endif | ||
318 | 313 | ||
314 | /* | ||
315 | * Let others (NMI) know that the debug stack is in use | ||
316 | * as we may switch to the interrupt stack. | ||
317 | */ | ||
318 | debug_stack_usage_inc(); | ||
319 | preempt_conditional_sti(regs); | 319 | preempt_conditional_sti(regs); |
320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
321 | preempt_conditional_cli(regs); | 321 | preempt_conditional_cli(regs); |
322 | debug_stack_usage_dec(); | ||
322 | } | 323 | } |
323 | 324 | ||
324 | #ifdef CONFIG_X86_64 | 325 | #ifdef CONFIG_X86_64 |
@@ -411,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
411 | SIGTRAP) == NOTIFY_STOP) | 412 | SIGTRAP) == NOTIFY_STOP) |
412 | return; | 413 | return; |
413 | 414 | ||
415 | /* | ||
416 | * Let others (NMI) know that the debug stack is in use | ||
417 | * as we may switch to the interrupt stack. | ||
418 | */ | ||
419 | debug_stack_usage_inc(); | ||
420 | |||
414 | /* It's safe to allow irq's after DR6 has been saved */ | 421 | /* It's safe to allow irq's after DR6 has been saved */ |
415 | preempt_conditional_sti(regs); | 422 | preempt_conditional_sti(regs); |
416 | 423 | ||
@@ -418,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
418 | handle_vm86_trap((struct kernel_vm86_regs *) regs, | 425 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
419 | error_code, 1); | 426 | error_code, 1); |
420 | preempt_conditional_cli(regs); | 427 | preempt_conditional_cli(regs); |
428 | debug_stack_usage_dec(); | ||
421 | return; | 429 | return; |
422 | } | 430 | } |
423 | 431 | ||
@@ -437,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
437 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) | 445 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) |
438 | send_sigtrap(tsk, regs, error_code, si_code); | 446 | send_sigtrap(tsk, regs, error_code, si_code); |
439 | preempt_conditional_cli(regs); | 447 | preempt_conditional_cli(regs); |
448 | debug_stack_usage_dec(); | ||
440 | 449 | ||
441 | return; | 450 | return; |
442 | } | 451 | } |
@@ -562,41 +571,18 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | |||
562 | } | 571 | } |
563 | 572 | ||
564 | /* | 573 | /* |
565 | * __math_state_restore assumes that cr0.TS is already clear and the | ||
566 | * fpu state is all ready for use. Used during context switch. | ||
567 | */ | ||
568 | void __math_state_restore(void) | ||
569 | { | ||
570 | struct thread_info *thread = current_thread_info(); | ||
571 | struct task_struct *tsk = thread->task; | ||
572 | |||
573 | /* | ||
574 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
575 | */ | ||
576 | if (unlikely(restore_fpu_checking(tsk))) { | ||
577 | stts(); | ||
578 | force_sig(SIGSEGV, tsk); | ||
579 | return; | ||
580 | } | ||
581 | |||
582 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
583 | tsk->fpu_counter++; | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * 'math_state_restore()' saves the current math information in the | 574 | * 'math_state_restore()' saves the current math information in the |
588 | * old math state array, and gets the new ones from the current task | 575 | * old math state array, and gets the new ones from the current task |
589 | * | 576 | * |
590 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | 577 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. |
591 | * Don't touch unless you *really* know how it works. | 578 | * Don't touch unless you *really* know how it works. |
592 | * | 579 | * |
593 | * Must be called with kernel preemption disabled (in this case, | 580 | * Must be called with kernel preemption disabled (eg with local |
594 | * local interrupts are disabled at the call-site in entry.S). | 581 | * local interrupts as in the case of do_device_not_available). |
595 | */ | 582 | */ |
596 | asmlinkage void math_state_restore(void) | 583 | void math_state_restore(void) |
597 | { | 584 | { |
598 | struct thread_info *thread = current_thread_info(); | 585 | struct task_struct *tsk = current; |
599 | struct task_struct *tsk = thread->task; | ||
600 | 586 | ||
601 | if (!tsk_used_math(tsk)) { | 587 | if (!tsk_used_math(tsk)) { |
602 | local_irq_enable(); | 588 | local_irq_enable(); |
@@ -613,9 +599,17 @@ asmlinkage void math_state_restore(void) | |||
613 | local_irq_disable(); | 599 | local_irq_disable(); |
614 | } | 600 | } |
615 | 601 | ||
616 | clts(); /* Allow maths ops (or we recurse) */ | 602 | __thread_fpu_begin(tsk); |
603 | /* | ||
604 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
605 | */ | ||
606 | if (unlikely(restore_fpu_checking(tsk))) { | ||
607 | __thread_fpu_end(tsk); | ||
608 | force_sig(SIGSEGV, tsk); | ||
609 | return; | ||
610 | } | ||
617 | 611 | ||
618 | __math_state_restore(); | 612 | tsk->fpu_counter++; |
619 | } | 613 | } |
620 | EXPORT_SYMBOL_GPL(math_state_restore); | 614 | EXPORT_SYMBOL_GPL(math_state_restore); |
621 | 615 | ||
@@ -723,4 +717,10 @@ void __init trap_init(void) | |||
723 | cpu_init(); | 717 | cpu_init(); |
724 | 718 | ||
725 | x86_init.irqs.trap_init(); | 719 | x86_init.irqs.trap_init(); |
720 | |||
721 | #ifdef CONFIG_X86_64 | ||
722 | memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); | ||
723 | set_nmi_gate(1, &debug); | ||
724 | set_nmi_gate(3, &int3); | ||
725 | #endif | ||
726 | } | 726 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index db483369f10b..a62c201c97ec 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable; | |||
35 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 35 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
36 | static int __read_mostly tsc_disabled = -1; | 36 | static int __read_mostly tsc_disabled = -1; |
37 | 37 | ||
38 | static int tsc_clocksource_reliable; | 38 | int tsc_clocksource_reliable; |
39 | /* | 39 | /* |
40 | * Scheduler clock - returns current time in nanosec units. | 40 | * Scheduler clock - returns current time in nanosec units. |
41 | */ | 41 | */ |
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | |||
178 | } | 178 | } |
179 | 179 | ||
180 | #define CAL_MS 10 | 180 | #define CAL_MS 10 |
181 | #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) | 181 | #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) |
182 | #define CAL_PIT_LOOPS 1000 | 182 | #define CAL_PIT_LOOPS 1000 |
183 | 183 | ||
184 | #define CAL2_MS 50 | 184 | #define CAL2_MS 50 |
185 | #define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) | 185 | #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) |
186 | #define CAL2_PIT_LOOPS 5000 | 186 | #define CAL2_PIT_LOOPS 5000 |
187 | 187 | ||
188 | 188 | ||
@@ -290,14 +290,15 @@ static inline int pit_verify_msb(unsigned char val) | |||
290 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) | 290 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) |
291 | { | 291 | { |
292 | int count; | 292 | int count; |
293 | u64 tsc = 0; | 293 | u64 tsc = 0, prev_tsc = 0; |
294 | 294 | ||
295 | for (count = 0; count < 50000; count++) { | 295 | for (count = 0; count < 50000; count++) { |
296 | if (!pit_verify_msb(val)) | 296 | if (!pit_verify_msb(val)) |
297 | break; | 297 | break; |
298 | prev_tsc = tsc; | ||
298 | tsc = get_cycles(); | 299 | tsc = get_cycles(); |
299 | } | 300 | } |
300 | *deltap = get_cycles() - tsc; | 301 | *deltap = get_cycles() - prev_tsc; |
301 | *tscp = tsc; | 302 | *tscp = tsc; |
302 | 303 | ||
303 | /* | 304 | /* |
@@ -311,9 +312,9 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de | |||
311 | * How many MSB values do we want to see? We aim for | 312 | * How many MSB values do we want to see? We aim for |
312 | * a maximum error rate of 500ppm (in practice the | 313 | * a maximum error rate of 500ppm (in practice the |
313 | * real error is much smaller), but refuse to spend | 314 | * real error is much smaller), but refuse to spend |
314 | * more than 25ms on it. | 315 | * more than 50ms on it. |
315 | */ | 316 | */ |
316 | #define MAX_QUICK_PIT_MS 25 | 317 | #define MAX_QUICK_PIT_MS 50 |
317 | #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | 318 | #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) |
318 | 319 | ||
319 | static unsigned long quick_pit_calibrate(void) | 320 | static unsigned long quick_pit_calibrate(void) |
@@ -383,15 +384,12 @@ success: | |||
383 | * | 384 | * |
384 | * As a result, we can depend on there not being | 385 | * As a result, we can depend on there not being |
385 | * any odd delays anywhere, and the TSC reads are | 386 | * any odd delays anywhere, and the TSC reads are |
386 | * reliable (within the error). We also adjust the | 387 | * reliable (within the error). |
387 | * delta to the middle of the error bars, just | ||
388 | * because it looks nicer. | ||
389 | * | 388 | * |
390 | * kHz = ticks / time-in-seconds / 1000; | 389 | * kHz = ticks / time-in-seconds / 1000; |
391 | * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 | 390 | * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 |
392 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) | 391 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) |
393 | */ | 392 | */ |
394 | delta += (long)(d2 - d1)/2; | ||
395 | delta *= PIT_TICK_RATE; | 393 | delta *= PIT_TICK_RATE; |
396 | do_div(delta, i*256*1000); | 394 | do_div(delta, i*256*1000); |
397 | printk("Fast TSC calibration using PIT\n"); | 395 | printk("Fast TSC calibration using PIT\n"); |
@@ -995,3 +993,23 @@ void __init tsc_init(void) | |||
995 | check_system_tsc_reliable(); | 993 | check_system_tsc_reliable(); |
996 | } | 994 | } |
997 | 995 | ||
996 | #ifdef CONFIG_SMP | ||
997 | /* | ||
998 | * If we have a constant TSC and are using the TSC for the delay loop, | ||
999 | * we can skip clock calibration if another cpu in the same socket has already | ||
1000 | * been calibrated. This assumes that CONSTANT_TSC applies to all | ||
1001 | * cpus in the socket - this should be a safe assumption. | ||
1002 | */ | ||
1003 | unsigned long __cpuinit calibrate_delay_is_known(void) | ||
1004 | { | ||
1005 | int i, cpu = smp_processor_id(); | ||
1006 | |||
1007 | if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) | ||
1008 | return 0; | ||
1009 | |||
1010 | for_each_online_cpu(i) | ||
1011 | if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) | ||
1012 | return cpu_data(i).loops_per_jiffy; | ||
1013 | return 0; | ||
1014 | } | ||
1015 | #endif | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0aa5fed8b9e6..9eba29b46cb7 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
113 | if (unsynchronized_tsc()) | 113 | if (unsynchronized_tsc()) |
114 | return; | 114 | return; |
115 | 115 | ||
116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (tsc_clocksource_reliable) { |
117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) | 117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) |
118 | pr_info( | 118 | pr_info( |
119 | "Skipped synchronization checks as TSC is reliable.\n"); | 119 | "Skipped synchronization checks as TSC is reliable.\n"); |
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void) | |||
172 | { | 172 | { |
173 | int cpus = 2; | 173 | int cpus = 2; |
174 | 174 | ||
175 | if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | 175 | if (unsynchronized_tsc() || tsc_clocksource_reliable) |
176 | return; | 176 | return; |
177 | 177 | ||
178 | /* | 178 | /* |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 863f8753ab0a..b466cab5ba15 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -335,9 +335,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
335 | if (info->flags & VM86_SCREEN_BITMAP) | 335 | if (info->flags & VM86_SCREEN_BITMAP) |
336 | mark_screen_rdonly(tsk->mm); | 336 | mark_screen_rdonly(tsk->mm); |
337 | 337 | ||
338 | /*call audit_syscall_exit since we do not exit via the normal paths */ | 338 | /*call __audit_syscall_exit since we do not exit via the normal paths */ |
339 | #ifdef CONFIG_AUDITSYSCALL | ||
339 | if (unlikely(current->audit_context)) | 340 | if (unlikely(current->audit_context)) |
340 | audit_syscall_exit(AUDITSC_RESULT(0), 0); | 341 | __audit_syscall_exit(1, 0); |
342 | #endif | ||
341 | 343 | ||
342 | __asm__ __volatile__( | 344 | __asm__ __volatile__( |
343 | "movl %0,%%esp\n\t" | 345 | "movl %0,%%esp\n\t" |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22e8b94..b07ba9393564 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | |||
57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; | 60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
61 | 61 | ||
62 | static int __init vsyscall_setup(char *str) | 62 | static int __init vsyscall_setup(char *str) |
63 | { | 63 | { |
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
140 | return nr; | 140 | return nr; |
141 | } | 141 | } |
142 | 142 | ||
143 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | ||
144 | { | ||
145 | /* | ||
146 | * XXX: if access_ok, get_user, and put_user handled | ||
147 | * sig_on_uaccess_error, this could go away. | ||
148 | */ | ||
149 | |||
150 | if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { | ||
151 | siginfo_t info; | ||
152 | struct thread_struct *thread = ¤t->thread; | ||
153 | |||
154 | thread->error_code = 6; /* user fault, no page, write */ | ||
155 | thread->cr2 = ptr; | ||
156 | thread->trap_no = 14; | ||
157 | |||
158 | memset(&info, 0, sizeof(info)); | ||
159 | info.si_signo = SIGSEGV; | ||
160 | info.si_errno = 0; | ||
161 | info.si_code = SEGV_MAPERR; | ||
162 | info.si_addr = (void __user *)ptr; | ||
163 | |||
164 | force_sig_info(SIGSEGV, &info, current); | ||
165 | return false; | ||
166 | } else { | ||
167 | return true; | ||
168 | } | ||
169 | } | ||
170 | |||
143 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | 171 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
144 | { | 172 | { |
145 | struct task_struct *tsk; | 173 | struct task_struct *tsk; |
146 | unsigned long caller; | 174 | unsigned long caller; |
147 | int vsyscall_nr; | 175 | int vsyscall_nr; |
176 | int prev_sig_on_uaccess_error; | ||
148 | long ret; | 177 | long ret; |
149 | 178 | ||
150 | /* | 179 | /* |
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
180 | if (seccomp_mode(&tsk->seccomp)) | 209 | if (seccomp_mode(&tsk->seccomp)) |
181 | do_exit(SIGKILL); | 210 | do_exit(SIGKILL); |
182 | 211 | ||
212 | /* | ||
213 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
214 | * preserve that behavior to make writing exploits harder. | ||
215 | */ | ||
216 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
217 | current_thread_info()->sig_on_uaccess_error = 1; | ||
218 | |||
219 | /* | ||
220 | * 0 is a valid user pointer (in the access_ok sense) on 32-bit and | ||
221 | * 64-bit, so we don't need to special-case it here. For all the | ||
222 | * vsyscalls, 0 means "don't write anything" not "write it at | ||
223 | * address 0". | ||
224 | */ | ||
225 | ret = -EFAULT; | ||
183 | switch (vsyscall_nr) { | 226 | switch (vsyscall_nr) { |
184 | case 0: | 227 | case 0: |
228 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | ||
229 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | ||
230 | break; | ||
231 | |||
185 | ret = sys_gettimeofday( | 232 | ret = sys_gettimeofday( |
186 | (struct timeval __user *)regs->di, | 233 | (struct timeval __user *)regs->di, |
187 | (struct timezone __user *)regs->si); | 234 | (struct timezone __user *)regs->si); |
188 | break; | 235 | break; |
189 | 236 | ||
190 | case 1: | 237 | case 1: |
238 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
239 | break; | ||
240 | |||
191 | ret = sys_time((time_t __user *)regs->di); | 241 | ret = sys_time((time_t __user *)regs->di); |
192 | break; | 242 | break; |
193 | 243 | ||
194 | case 2: | 244 | case 2: |
245 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
246 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
247 | break; | ||
248 | |||
195 | ret = sys_getcpu((unsigned __user *)regs->di, | 249 | ret = sys_getcpu((unsigned __user *)regs->di, |
196 | (unsigned __user *)regs->si, | 250 | (unsigned __user *)regs->si, |
197 | 0); | 251 | 0); |
198 | break; | 252 | break; |
199 | } | 253 | } |
200 | 254 | ||
255 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | ||
256 | |||
201 | if (ret == -EFAULT) { | 257 | if (ret == -EFAULT) { |
202 | /* | 258 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
203 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
204 | * | ||
205 | * With a real vsyscall, that would have caused SIGSEGV. | ||
206 | * To make writing reliable exploits using the emulated | ||
207 | * vsyscalls harder, generate SIGSEGV here as well. | ||
208 | */ | ||
209 | warn_bad_vsyscall(KERN_INFO, regs, | 259 | warn_bad_vsyscall(KERN_INFO, regs, |
210 | "vsyscall fault (exploit attempt?)"); | 260 | "vsyscall fault (exploit attempt?)"); |
211 | goto sigsegv; | 261 | |
262 | /* | ||
263 | * If we failed to generate a signal for any reason, | ||
264 | * generate one here. (This should be impossible.) | ||
265 | */ | ||
266 | if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && | ||
267 | !sigismember(&tsk->pending.signal, SIGSEGV))) | ||
268 | goto sigsegv; | ||
269 | |||
270 | return true; /* Don't emulate the ret. */ | ||
212 | } | 271 | } |
213 | 272 | ||
214 | regs->ax = ret; | 273 | regs->ax = ret; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c1d6cd549397..947a06ccc673 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = { | |||
92 | 92 | ||
93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
94 | .setup_percpu_clockev = setup_secondary_APIC_clock, | 94 | .setup_percpu_clockev = setup_secondary_APIC_clock, |
95 | .fixup_cpu_id = x86_default_fixup_cpu_id, | ||
95 | }; | 96 | }; |
96 | 97 | ||
97 | static void default_nmi_init(void) { }; | 98 | static void default_nmi_init(void) { }; |
@@ -114,4 +115,5 @@ struct x86_msi_ops x86_msi = { | |||
114 | .setup_msi_irqs = native_setup_msi_irqs, | 115 | .setup_msi_irqs = native_setup_msi_irqs, |
115 | .teardown_msi_irq = native_teardown_msi_irq, | 116 | .teardown_msi_irq = native_teardown_msi_irq, |
116 | .teardown_msi_irqs = default_teardown_msi_irqs, | 117 | .teardown_msi_irqs = default_teardown_msi_irqs, |
118 | .restore_msi_irqs = default_restore_msi_irqs, | ||
117 | }; | 119 | }; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a3911343976b..711091114119 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
47 | if (!fx) | 47 | if (!fx) |
48 | return; | 48 | return; |
49 | 49 | ||
50 | BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); | 50 | BUG_ON(__thread_has_fpu(tsk)); |
51 | 51 | ||
52 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | 52 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; |
53 | 53 | ||
@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf) | |||
168 | if (!used_math()) | 168 | if (!used_math()) |
169 | return 0; | 169 | return 0; |
170 | 170 | ||
171 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 171 | if (user_has_fpu()) { |
172 | if (use_xsave()) | 172 | if (use_xsave()) |
173 | err = xsave_user(buf); | 173 | err = xsave_user(buf); |
174 | else | 174 | else |
@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf) | |||
176 | 176 | ||
177 | if (err) | 177 | if (err) |
178 | return err; | 178 | return err; |
179 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 179 | user_fpu_end(); |
180 | stts(); | ||
181 | } else { | 180 | } else { |
182 | sanitize_i387_state(tsk); | 181 | sanitize_i387_state(tsk); |
183 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | 182 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf) | |||
292 | return err; | 291 | return err; |
293 | } | 292 | } |
294 | 293 | ||
295 | if (!(task_thread_info(current)->status & TS_USEDFPU)) { | 294 | user_fpu_begin(); |
296 | clts(); | ||
297 | task_thread_info(current)->status |= TS_USEDFPU; | ||
298 | } | ||
299 | if (use_xsave()) | 295 | if (use_xsave()) |
300 | err = restore_user_xstate(buf); | 296 | err = restore_user_xstate(buf); |
301 | else | 297 | else |