diff options
Diffstat (limited to 'arch/x86/kernel')
30 files changed, 383 insertions, 164 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676266bd..fedf32a8c3ec 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER | |||
| 11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
| 12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
| 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
| 14 | CFLAGS_REMOVE_pvclock.o = -pg | ||
| 15 | CFLAGS_REMOVE_kvmclock.o = -pg | ||
| 14 | CFLAGS_REMOVE_ftrace.o = -pg | 16 | CFLAGS_REMOVE_ftrace.o = -pg |
| 15 | CFLAGS_REMOVE_early_printk.o = -pg | 17 | CFLAGS_REMOVE_early_printk.o = -pg |
| 16 | endif | 18 | endif |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb7a5f052e2b..fb16f17e59be 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
| @@ -61,7 +61,7 @@ struct cstate_entry { | |||
| 61 | unsigned int ecx; | 61 | unsigned int ecx; |
| 62 | } states[ACPI_PROCESSOR_MAX_POWER]; | 62 | } states[ACPI_PROCESSOR_MAX_POWER]; |
| 63 | }; | 63 | }; |
| 64 | static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ | 64 | static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ |
| 65 | 65 | ||
| 66 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; | 66 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; |
| 67 | 67 | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa044e1e30a2..679b6450382b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
| 1953 | size_t size, | 1953 | size_t size, |
| 1954 | int dir) | 1954 | int dir) |
| 1955 | { | 1955 | { |
| 1956 | dma_addr_t flush_addr; | ||
| 1956 | dma_addr_t i, start; | 1957 | dma_addr_t i, start; |
| 1957 | unsigned int pages; | 1958 | unsigned int pages; |
| 1958 | 1959 | ||
| @@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
| 1960 | (dma_addr + size > dma_dom->aperture_size)) | 1961 | (dma_addr + size > dma_dom->aperture_size)) |
| 1961 | return; | 1962 | return; |
| 1962 | 1963 | ||
| 1964 | flush_addr = dma_addr; | ||
| 1963 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); | 1965 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); |
| 1964 | dma_addr &= PAGE_MASK; | 1966 | dma_addr &= PAGE_MASK; |
| 1965 | start = dma_addr; | 1967 | start = dma_addr; |
| @@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
| 1974 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1976 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
| 1975 | 1977 | ||
| 1976 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1978 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
| 1977 | iommu_flush_pages(&dma_dom->domain, dma_addr, size); | 1979 | iommu_flush_pages(&dma_dom->domain, flush_addr, size); |
| 1978 | dma_dom->need_flush = false; | 1980 | dma_dom->need_flush = false; |
| 1979 | } | 1981 | } |
| 1980 | } | 1982 | } |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd4..5a170cbbbed8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
| 632 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | 632 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), |
| 633 | MMIO_GET_LD(range)); | 633 | MMIO_GET_LD(range)); |
| 634 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | 634 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); |
| 635 | |||
| 636 | if (is_rd890_iommu(iommu->dev)) { | ||
| 637 | pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); | ||
| 638 | pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); | ||
| 639 | pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); | ||
| 640 | pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); | ||
| 641 | } | ||
| 635 | } | 642 | } |
| 636 | 643 | ||
| 637 | /* | 644 | /* |
| @@ -649,29 +656,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 649 | struct ivhd_entry *e; | 656 | struct ivhd_entry *e; |
| 650 | 657 | ||
| 651 | /* | 658 | /* |
| 652 | * First set the recommended feature enable bits from ACPI | 659 | * First save the recommended feature enable bits from ACPI |
| 653 | * into the IOMMU control registers | ||
| 654 | */ | 660 | */ |
| 655 | h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? | 661 | iommu->acpi_flags = h->flags; |
| 656 | iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : | ||
| 657 | iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); | ||
| 658 | |||
| 659 | h->flags & IVHD_FLAG_PASSPW_EN_MASK ? | ||
| 660 | iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : | ||
| 661 | iommu_feature_disable(iommu, CONTROL_PASSPW_EN); | ||
| 662 | |||
| 663 | h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? | ||
| 664 | iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : | ||
| 665 | iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); | ||
| 666 | |||
| 667 | h->flags & IVHD_FLAG_ISOC_EN_MASK ? | ||
| 668 | iommu_feature_enable(iommu, CONTROL_ISOC_EN) : | ||
| 669 | iommu_feature_disable(iommu, CONTROL_ISOC_EN); | ||
| 670 | |||
| 671 | /* | ||
| 672 | * make IOMMU memory accesses cache coherent | ||
| 673 | */ | ||
| 674 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | ||
| 675 | 662 | ||
| 676 | /* | 663 | /* |
| 677 | * Done. Now parse the device entries | 664 | * Done. Now parse the device entries |
| @@ -1116,6 +1103,40 @@ static void init_device_table(void) | |||
| 1116 | } | 1103 | } |
| 1117 | } | 1104 | } |
| 1118 | 1105 | ||
| 1106 | static void iommu_init_flags(struct amd_iommu *iommu) | ||
| 1107 | { | ||
| 1108 | iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? | ||
| 1109 | iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : | ||
| 1110 | iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); | ||
| 1111 | |||
| 1112 | iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? | ||
| 1113 | iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : | ||
| 1114 | iommu_feature_disable(iommu, CONTROL_PASSPW_EN); | ||
| 1115 | |||
| 1116 | iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? | ||
| 1117 | iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : | ||
| 1118 | iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); | ||
| 1119 | |||
| 1120 | iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? | ||
| 1121 | iommu_feature_enable(iommu, CONTROL_ISOC_EN) : | ||
| 1122 | iommu_feature_disable(iommu, CONTROL_ISOC_EN); | ||
| 1123 | |||
| 1124 | /* | ||
| 1125 | * make IOMMU memory accesses cache coherent | ||
| 1126 | */ | ||
| 1127 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | static void iommu_apply_quirks(struct amd_iommu *iommu) | ||
| 1131 | { | ||
| 1132 | if (is_rd890_iommu(iommu->dev)) { | ||
| 1133 | pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); | ||
| 1134 | pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); | ||
| 1135 | pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); | ||
| 1136 | pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); | ||
| 1137 | } | ||
| 1138 | } | ||
| 1139 | |||
| 1119 | /* | 1140 | /* |
| 1120 | * This function finally enables all IOMMUs found in the system after | 1141 | * This function finally enables all IOMMUs found in the system after |
| 1121 | * they have been initialized | 1142 | * they have been initialized |
| @@ -1126,6 +1147,8 @@ static void enable_iommus(void) | |||
| 1126 | 1147 | ||
| 1127 | for_each_iommu(iommu) { | 1148 | for_each_iommu(iommu) { |
| 1128 | iommu_disable(iommu); | 1149 | iommu_disable(iommu); |
| 1150 | iommu_apply_quirks(iommu); | ||
| 1151 | iommu_init_flags(iommu); | ||
| 1129 | iommu_set_device_table(iommu); | 1152 | iommu_set_device_table(iommu); |
| 1130 | iommu_enable_command_buffer(iommu); | 1153 | iommu_enable_command_buffer(iommu); |
| 1131 | iommu_enable_event_buffer(iommu); | 1154 | iommu_enable_event_buffer(iommu); |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4dc0084ec1b1..5c5b8f3dddb5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, | |||
| 306 | 306 | ||
| 307 | old_cfg = old_desc->chip_data; | 307 | old_cfg = old_desc->chip_data; |
| 308 | 308 | ||
| 309 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | 309 | cfg->vector = old_cfg->vector; |
| 310 | cfg->move_in_progress = old_cfg->move_in_progress; | ||
| 311 | cpumask_copy(cfg->domain, old_cfg->domain); | ||
| 312 | cpumask_copy(cfg->old_domain, old_cfg->old_domain); | ||
| 310 | 313 | ||
| 311 | init_copy_irq_2_pin(old_cfg, cfg, node); | 314 | init_copy_irq_2_pin(old_cfg, cfg, node); |
| 312 | } | 315 | } |
| 313 | 316 | ||
| 314 | static void free_irq_cfg(struct irq_cfg *old_cfg) | 317 | static void free_irq_cfg(struct irq_cfg *cfg) |
| 315 | { | 318 | { |
| 316 | kfree(old_cfg); | 319 | free_cpumask_var(cfg->domain); |
| 320 | free_cpumask_var(cfg->old_domain); | ||
| 321 | kfree(cfg); | ||
| 317 | } | 322 | } |
| 318 | 323 | ||
| 319 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | 324 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) |
| @@ -1728,6 +1733,8 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1728 | struct irq_pin_list *entry; | 1733 | struct irq_pin_list *entry; |
| 1729 | 1734 | ||
| 1730 | cfg = desc->chip_data; | 1735 | cfg = desc->chip_data; |
| 1736 | if (!cfg) | ||
| 1737 | continue; | ||
| 1731 | entry = cfg->irq_2_pin; | 1738 | entry = cfg->irq_2_pin; |
| 1732 | if (!entry) | 1739 | if (!entry) |
| 1733 | continue; | 1740 | continue; |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7b598b84c902..f744f54cb248 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
| @@ -698,9 +698,11 @@ void __init uv_system_init(void) | |||
| 698 | for (j = 0; j < 64; j++) { | 698 | for (j = 0; j < 64; j++) { |
| 699 | if (!test_bit(j, &present)) | 699 | if (!test_bit(j, &present)) |
| 700 | continue; | 700 | continue; |
| 701 | uv_blade_info[blade].pnode = (i * 64 + j); | 701 | pnode = (i * 64 + j); |
| 702 | uv_blade_info[blade].pnode = pnode; | ||
| 702 | uv_blade_info[blade].nr_possible_cpus = 0; | 703 | uv_blade_info[blade].nr_possible_cpus = 0; |
| 703 | uv_blade_info[blade].nr_online_cpus = 0; | 704 | uv_blade_info[blade].nr_online_cpus = 0; |
| 705 | max_pnode = max(pnode, max_pnode); | ||
| 704 | blade++; | 706 | blade++; |
| 705 | } | 707 | } |
| 706 | } | 708 | } |
| @@ -738,7 +740,6 @@ void __init uv_system_init(void) | |||
| 738 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); | 740 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); |
| 739 | uv_node_to_blade[nid] = blade; | 741 | uv_node_to_blade[nid] = blade; |
| 740 | uv_cpu_to_blade[cpu] = blade; | 742 | uv_cpu_to_blade[cpu] = blade; |
| 741 | max_pnode = max(pnode, max_pnode); | ||
| 742 | } | 743 | } |
| 743 | 744 | ||
| 744 | /* Add blade/pnode info for nodes without cpus */ | 745 | /* Add blade/pnode info for nodes without cpus */ |
| @@ -750,7 +751,6 @@ void __init uv_system_init(void) | |||
| 750 | pnode = (paddr >> m_val) & pnode_mask; | 751 | pnode = (paddr >> m_val) & pnode_mask; |
| 751 | blade = boot_pnode_to_blade(pnode); | 752 | blade = boot_pnode_to_blade(pnode); |
| 752 | uv_node_to_blade[nid] = blade; | 753 | uv_node_to_blade[nid] = blade; |
| 753 | max_pnode = max(pnode, max_pnode); | ||
| 754 | } | 754 | } |
| 755 | 755 | ||
| 756 | map_gru_high(max_pnode); | 756 | map_gru_high(max_pnode); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60a57b13082d..ba5f62f45f01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -669,7 +669,7 @@ bool cpu_has_amd_erratum(const int *erratum) | |||
| 669 | } | 669 | } |
| 670 | 670 | ||
| 671 | /* OSVW unavailable or ID unknown, match family-model-stepping range */ | 671 | /* OSVW unavailable or ID unknown, match family-model-stepping range */ |
| 672 | ms = (cpu->x86_model << 8) | cpu->x86_mask; | 672 | ms = (cpu->x86_model << 4) | cpu->x86_mask; |
| 673 | while ((range = *erratum++)) | 673 | while ((range = *erratum++)) |
| 674 | if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && | 674 | if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && |
| 675 | (ms >= AMD_MODEL_RANGE_START(range)) && | 675 | (ms >= AMD_MODEL_RANGE_START(range)) && |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 490dac63c2d2..f2f9ac7da25c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | |||
| 545 | } | 545 | } |
| 546 | } | 546 | } |
| 547 | 547 | ||
| 548 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | 548 | void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) |
| 549 | { | 549 | { |
| 550 | u32 tfms, xlvl; | 550 | u32 tfms, xlvl; |
| 551 | u32 ebx; | 551 | u32 ebx; |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a0f71b..f668bb1f7d43 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
| @@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], | |||
| 33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
| 34 | 34 | ||
| 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
| 36 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | ||
| 36 | 37 | ||
| 37 | #endif | 38 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 994230d4dc4e..4f6f679f2799 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
| @@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | |||
| 368 | return -ENODEV; | 368 | return -ENODEV; |
| 369 | 369 | ||
| 370 | out_obj = output.pointer; | 370 | out_obj = output.pointer; |
| 371 | if (out_obj->type != ACPI_TYPE_BUFFER) | 371 | if (out_obj->type != ACPI_TYPE_BUFFER) { |
| 372 | return -ENODEV; | 372 | ret = -ENODEV; |
| 373 | goto out_free; | ||
| 374 | } | ||
| 373 | 375 | ||
| 374 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | 376 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); |
| 375 | if (errors) | 377 | if (errors) { |
| 376 | return -ENODEV; | 378 | ret = -ENODEV; |
| 379 | goto out_free; | ||
| 380 | } | ||
| 377 | 381 | ||
| 378 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | 382 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); |
| 379 | if (!(supported & 0x1)) | 383 | if (!(supported & 0x1)) { |
| 380 | return -ENODEV; | 384 | ret = -ENODEV; |
| 385 | goto out_free; | ||
| 386 | } | ||
| 381 | 387 | ||
| 382 | out_free: | 388 | out_free: |
| 383 | kfree(output.pointer); | 389 | kfree(output.pointer); |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 85f69cdeae10..b4389441efbb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; | 39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; |
| 40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
| 41 | c->cpuid_level = cpuid_eax(0); | 41 | c->cpuid_level = cpuid_eax(0); |
| 42 | get_cpu_cap(c); | ||
| 42 | } | 43 | } |
| 43 | } | 44 | } |
| 44 | 45 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 224392d8fe8c..5e975298fa81 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
| @@ -530,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 530 | err = -ENOMEM; | 530 | err = -ENOMEM; |
| 531 | goto out; | 531 | goto out; |
| 532 | } | 532 | } |
| 533 | if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | 533 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { |
| 534 | kfree(b); | 534 | kfree(b); |
| 535 | err = -ENOMEM; | 535 | err = -ENOMEM; |
| 536 | goto out; | 536 | goto out; |
| @@ -543,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 543 | #ifndef CONFIG_SMP | 543 | #ifndef CONFIG_SMP |
| 544 | cpumask_setall(b->cpus); | 544 | cpumask_setall(b->cpus); |
| 545 | #else | 545 | #else |
| 546 | cpumask_copy(b->cpus, c->llc_shared_map); | 546 | cpumask_set_cpu(cpu, b->cpus); |
| 547 | #endif | 547 | #endif |
| 548 | 548 | ||
| 549 | per_cpu(threshold_banks, cpu)[bank] = b; | 549 | per_cpu(threshold_banks, cpu)[bank] = b; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index c2a8b26d4fea..d9368eeda309 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
| 202 | 202 | ||
| 203 | #ifdef CONFIG_SYSFS | 203 | #ifdef CONFIG_SYSFS |
| 204 | /* Add/Remove thermal_throttle interface for CPU device: */ | 204 | /* Add/Remove thermal_throttle interface for CPU device: */ |
| 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, |
| 206 | unsigned int cpu) | ||
| 206 | { | 207 | { |
| 207 | int err; | 208 | int err; |
| 208 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | 209 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 209 | 210 | ||
| 210 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | 211 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); |
| 211 | if (err) | 212 | if (err) |
| @@ -251,7 +252,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
| 251 | case CPU_UP_PREPARE: | 252 | case CPU_UP_PREPARE: |
| 252 | case CPU_UP_PREPARE_FROZEN: | 253 | case CPU_UP_PREPARE_FROZEN: |
| 253 | mutex_lock(&therm_cpu_lock); | 254 | mutex_lock(&therm_cpu_lock); |
| 254 | err = thermal_throttle_add_dev(sys_dev); | 255 | err = thermal_throttle_add_dev(sys_dev, cpu); |
| 255 | mutex_unlock(&therm_cpu_lock); | 256 | mutex_unlock(&therm_cpu_lock); |
| 256 | WARN_ON(err); | 257 | WARN_ON(err); |
| 257 | break; | 258 | break; |
| @@ -287,7 +288,7 @@ static __init int thermal_throttle_init_device(void) | |||
| 287 | #endif | 288 | #endif |
| 288 | /* connect live CPUs to sysfs */ | 289 | /* connect live CPUs to sysfs */ |
| 289 | for_each_online_cpu(cpu) { | 290 | for_each_online_cpu(cpu) { |
| 290 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | 291 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); |
| 291 | WARN_ON(err); | 292 | WARN_ON(err); |
| 292 | } | 293 | } |
| 293 | #ifdef CONFIG_HOTPLUG_CPU | 294 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f2da20fda02d..03a5b0385ad6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -102,6 +102,7 @@ struct cpu_hw_events { | |||
| 102 | */ | 102 | */ |
| 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
| 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 105 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 105 | int enabled; | 106 | int enabled; |
| 106 | 107 | ||
| 107 | int n_events; | 108 | int n_events; |
| @@ -1010,6 +1011,7 @@ static int x86_pmu_start(struct perf_event *event) | |||
| 1010 | x86_perf_event_set_period(event); | 1011 | x86_perf_event_set_period(event); |
| 1011 | cpuc->events[idx] = event; | 1012 | cpuc->events[idx] = event; |
| 1012 | __set_bit(idx, cpuc->active_mask); | 1013 | __set_bit(idx, cpuc->active_mask); |
| 1014 | __set_bit(idx, cpuc->running); | ||
| 1013 | x86_pmu.enable(event); | 1015 | x86_pmu.enable(event); |
| 1014 | perf_event_update_userpage(event); | 1016 | perf_event_update_userpage(event); |
| 1015 | 1017 | ||
| @@ -1141,8 +1143,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1141 | cpuc = &__get_cpu_var(cpu_hw_events); | 1143 | cpuc = &__get_cpu_var(cpu_hw_events); |
| 1142 | 1144 | ||
| 1143 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1145 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1144 | if (!test_bit(idx, cpuc->active_mask)) | 1146 | if (!test_bit(idx, cpuc->active_mask)) { |
| 1147 | /* | ||
| 1148 | * Though we deactivated the counter some cpus | ||
| 1149 | * might still deliver spurious interrupts still | ||
| 1150 | * in flight. Catch them: | ||
| 1151 | */ | ||
| 1152 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
| 1153 | handled++; | ||
| 1145 | continue; | 1154 | continue; |
| 1155 | } | ||
| 1146 | 1156 | ||
| 1147 | event = cpuc->events[idx]; | 1157 | event = cpuc->events[idx]; |
| 1148 | hwc = &event->hw; | 1158 | hwc = &event->hw; |
| @@ -1154,7 +1164,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1154 | /* | 1164 | /* |
| 1155 | * event overflow | 1165 | * event overflow |
| 1156 | */ | 1166 | */ |
| 1157 | handled = 1; | 1167 | handled++; |
| 1158 | data.period = event->hw.last_period; | 1168 | data.period = event->hw.last_period; |
| 1159 | 1169 | ||
| 1160 | if (!x86_perf_event_set_period(event)) | 1170 | if (!x86_perf_event_set_period(event)) |
| @@ -1200,12 +1210,20 @@ void perf_events_lapic_init(void) | |||
| 1200 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1210 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1201 | } | 1211 | } |
| 1202 | 1212 | ||
| 1213 | struct pmu_nmi_state { | ||
| 1214 | unsigned int marked; | ||
| 1215 | int handled; | ||
| 1216 | }; | ||
| 1217 | |||
| 1218 | static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); | ||
| 1219 | |||
| 1203 | static int __kprobes | 1220 | static int __kprobes |
| 1204 | perf_event_nmi_handler(struct notifier_block *self, | 1221 | perf_event_nmi_handler(struct notifier_block *self, |
| 1205 | unsigned long cmd, void *__args) | 1222 | unsigned long cmd, void *__args) |
| 1206 | { | 1223 | { |
| 1207 | struct die_args *args = __args; | 1224 | struct die_args *args = __args; |
| 1208 | struct pt_regs *regs; | 1225 | unsigned int this_nmi; |
| 1226 | int handled; | ||
| 1209 | 1227 | ||
| 1210 | if (!atomic_read(&active_events)) | 1228 | if (!atomic_read(&active_events)) |
| 1211 | return NOTIFY_DONE; | 1229 | return NOTIFY_DONE; |
| @@ -1214,22 +1232,47 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
| 1214 | case DIE_NMI: | 1232 | case DIE_NMI: |
| 1215 | case DIE_NMI_IPI: | 1233 | case DIE_NMI_IPI: |
| 1216 | break; | 1234 | break; |
| 1217 | 1235 | case DIE_NMIUNKNOWN: | |
| 1236 | this_nmi = percpu_read(irq_stat.__nmi_count); | ||
| 1237 | if (this_nmi != __get_cpu_var(pmu_nmi).marked) | ||
| 1238 | /* let the kernel handle the unknown nmi */ | ||
| 1239 | return NOTIFY_DONE; | ||
| 1240 | /* | ||
| 1241 | * This one is a PMU back-to-back nmi. Two events | ||
| 1242 | * trigger 'simultaneously' raising two back-to-back | ||
| 1243 | * NMIs. If the first NMI handles both, the latter | ||
| 1244 | * will be empty and daze the CPU. So, we drop it to | ||
| 1245 | * avoid false-positive 'unknown nmi' messages. | ||
| 1246 | */ | ||
| 1247 | return NOTIFY_STOP; | ||
| 1218 | default: | 1248 | default: |
| 1219 | return NOTIFY_DONE; | 1249 | return NOTIFY_DONE; |
| 1220 | } | 1250 | } |
| 1221 | 1251 | ||
| 1222 | regs = args->regs; | ||
| 1223 | |||
| 1224 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1252 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1225 | /* | 1253 | |
| 1226 | * Can't rely on the handled return value to say it was our NMI, two | 1254 | handled = x86_pmu.handle_irq(args->regs); |
| 1227 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1255 | if (!handled) |
| 1228 | * | 1256 | return NOTIFY_DONE; |
| 1229 | * If the first NMI handles both, the latter will be empty and daze | 1257 | |
| 1230 | * the CPU. | 1258 | this_nmi = percpu_read(irq_stat.__nmi_count); |
| 1231 | */ | 1259 | if ((handled > 1) || |
| 1232 | x86_pmu.handle_irq(regs); | 1260 | /* the next nmi could be a back-to-back nmi */ |
| 1261 | ((__get_cpu_var(pmu_nmi).marked == this_nmi) && | ||
| 1262 | (__get_cpu_var(pmu_nmi).handled > 1))) { | ||
| 1263 | /* | ||
| 1264 | * We could have two subsequent back-to-back nmis: The | ||
| 1265 | * first handles more than one counter, the 2nd | ||
| 1266 | * handles only one counter and the 3rd handles no | ||
| 1267 | * counter. | ||
| 1268 | * | ||
| 1269 | * This is the 2nd nmi because the previous was | ||
| 1270 | * handling more than one counter. We will mark the | ||
| 1271 | * next (3rd) and then drop it if unhandled. | ||
| 1272 | */ | ||
| 1273 | __get_cpu_var(pmu_nmi).marked = this_nmi + 1; | ||
| 1274 | __get_cpu_var(pmu_nmi).handled = handled; | ||
| 1275 | } | ||
| 1233 | 1276 | ||
| 1234 | return NOTIFY_STOP; | 1277 | return NOTIFY_STOP; |
| 1235 | } | 1278 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac860ebe0..ee05c90012d2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) | |||
| 491 | * Intel Errata AAP53 (model 30) | 491 | * Intel Errata AAP53 (model 30) |
| 492 | * Intel Errata BD53 (model 44) | 492 | * Intel Errata BD53 (model 44) |
| 493 | * | 493 | * |
| 494 | * These chips need to be 'reset' when adding counters by programming | 494 | * The official story: |
| 495 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | 495 | * These chips need to be 'reset' when adding counters by programming the |
| 496 | * either in sequence on the same PMC or on different PMCs. | 496 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either |
| 497 | * in sequence on the same PMC or on different PMCs. | ||
| 498 | * | ||
| 499 | * In practise it appears some of these events do in fact count, and | ||
| 500 | * we need to programm all 4 events. | ||
| 497 | */ | 501 | */ |
| 498 | static void intel_pmu_nhm_enable_all(int added) | 502 | static void intel_pmu_nhm_workaround(void) |
| 499 | { | 503 | { |
| 500 | if (added) { | 504 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 505 | static const unsigned long nhm_magic[4] = { |
| 502 | int i; | 506 | 0x4300B5, |
| 507 | 0x4300D2, | ||
| 508 | 0x4300B1, | ||
| 509 | 0x4300B1 | ||
| 510 | }; | ||
| 511 | struct perf_event *event; | ||
| 512 | int i; | ||
| 513 | |||
| 514 | /* | ||
| 515 | * The Errata requires below steps: | ||
| 516 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 517 | * 2) Configure 4 PERFEVTSELx with the magic events and clear | ||
| 518 | * the corresponding PMCx; | ||
| 519 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 520 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 521 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; | ||
| 522 | */ | ||
| 503 | 523 | ||
| 504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | 524 | /* |
| 505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | 525 | * The real steps we choose are a little different from above. |
| 506 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | 526 | * A) To reduce MSR operations, we don't run step 1) as they |
| 527 | * are already cleared before this function is called; | ||
| 528 | * B) Call x86_perf_event_update to save PMCx before configuring | ||
| 529 | * PERFEVTSELx with magic number; | ||
| 530 | * C) With step 5), we do clear only when the PERFEVTSELx is | ||
| 531 | * not used currently. | ||
| 532 | * D) Call x86_perf_event_set_period to restore PMCx; | ||
| 533 | */ | ||
| 534 | |||
| 535 | /* We always operate 4 pairs of PERF Counters */ | ||
| 536 | for (i = 0; i < 4; i++) { | ||
| 537 | event = cpuc->events[i]; | ||
| 538 | if (event) | ||
| 539 | x86_perf_event_update(event); | ||
| 540 | } | ||
| 507 | 541 | ||
| 508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | 542 | for (i = 0; i < 4; i++) { |
| 509 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | 543 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); |
| 544 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); | ||
| 545 | } | ||
| 510 | 546 | ||
| 511 | for (i = 0; i < 3; i++) { | 547 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); |
| 512 | struct perf_event *event = cpuc->events[i]; | 548 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); |
| 513 | 549 | ||
| 514 | if (!event) | 550 | for (i = 0; i < 4; i++) { |
| 515 | continue; | 551 | event = cpuc->events[i]; |
| 516 | 552 | ||
| 553 | if (event) { | ||
| 554 | x86_perf_event_set_period(event); | ||
| 517 | __x86_pmu_enable_event(&event->hw, | 555 | __x86_pmu_enable_event(&event->hw, |
| 518 | ARCH_PERFMON_EVENTSEL_ENABLE); | 556 | ARCH_PERFMON_EVENTSEL_ENABLE); |
| 519 | } | 557 | } else |
| 558 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); | ||
| 520 | } | 559 | } |
| 560 | } | ||
| 561 | |||
| 562 | static void intel_pmu_nhm_enable_all(int added) | ||
| 563 | { | ||
| 564 | if (added) | ||
| 565 | intel_pmu_nhm_workaround(); | ||
| 521 | intel_pmu_enable_all(added); | 566 | intel_pmu_enable_all(added); |
| 522 | } | 567 | } |
| 523 | 568 | ||
| @@ -667,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
| 667 | struct perf_sample_data data; | 712 | struct perf_sample_data data; |
| 668 | struct cpu_hw_events *cpuc; | 713 | struct cpu_hw_events *cpuc; |
| 669 | int bit, loops; | 714 | int bit, loops; |
| 670 | u64 ack, status; | 715 | u64 status; |
| 716 | int handled = 0; | ||
| 671 | 717 | ||
| 672 | perf_sample_data_init(&data, 0); | 718 | perf_sample_data_init(&data, 0); |
| 673 | 719 | ||
| @@ -683,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
| 683 | 729 | ||
| 684 | loops = 0; | 730 | loops = 0; |
| 685 | again: | 731 | again: |
| 732 | intel_pmu_ack_status(status); | ||
| 686 | if (++loops > 100) { | 733 | if (++loops > 100) { |
| 687 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | 734 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); |
| 688 | perf_event_print_debug(); | 735 | perf_event_print_debug(); |
| @@ -691,19 +738,22 @@ again: | |||
| 691 | } | 738 | } |
| 692 | 739 | ||
| 693 | inc_irq_stat(apic_perf_irqs); | 740 | inc_irq_stat(apic_perf_irqs); |
| 694 | ack = status; | ||
| 695 | 741 | ||
| 696 | intel_pmu_lbr_read(); | 742 | intel_pmu_lbr_read(); |
| 697 | 743 | ||
| 698 | /* | 744 | /* |
| 699 | * PEBS overflow sets bit 62 in the global status register | 745 | * PEBS overflow sets bit 62 in the global status register |
| 700 | */ | 746 | */ |
| 701 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | 747 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { |
| 748 | handled++; | ||
| 702 | x86_pmu.drain_pebs(regs); | 749 | x86_pmu.drain_pebs(regs); |
| 750 | } | ||
| 703 | 751 | ||
| 704 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 752 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
| 705 | struct perf_event *event = cpuc->events[bit]; | 753 | struct perf_event *event = cpuc->events[bit]; |
| 706 | 754 | ||
| 755 | handled++; | ||
| 756 | |||
| 707 | if (!test_bit(bit, cpuc->active_mask)) | 757 | if (!test_bit(bit, cpuc->active_mask)) |
| 708 | continue; | 758 | continue; |
| 709 | 759 | ||
| @@ -716,8 +766,6 @@ again: | |||
| 716 | x86_pmu_stop(event); | 766 | x86_pmu_stop(event); |
| 717 | } | 767 | } |
| 718 | 768 | ||
| 719 | intel_pmu_ack_status(ack); | ||
| 720 | |||
| 721 | /* | 769 | /* |
| 722 | * Repeat if there is more work to be done: | 770 | * Repeat if there is more work to be done: |
| 723 | */ | 771 | */ |
| @@ -727,7 +775,7 @@ again: | |||
| 727 | 775 | ||
| 728 | done: | 776 | done: |
| 729 | intel_pmu_enable_all(0); | 777 | intel_pmu_enable_all(0); |
| 730 | return 1; | 778 | return handled; |
| 731 | } | 779 | } |
| 732 | 780 | ||
| 733 | static struct event_constraint * | 781 | static struct event_constraint * |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index febb12cea795..b560db3305be 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -497,6 +497,8 @@ static int p4_hw_config(struct perf_event *event) | |||
| 497 | event->hw.config |= event->attr.config & | 497 | event->hw.config |= event->attr.config & |
| 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | |
| 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); | 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); |
| 500 | |||
| 501 | event->hw.config &= ~P4_CCCR_FORCE_OVF; | ||
| 500 | } | 502 | } |
| 501 | 503 | ||
| 502 | rc = x86_setup_perfctr(event); | 504 | rc = x86_setup_perfctr(event); |
| @@ -690,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
| 690 | inc_irq_stat(apic_perf_irqs); | 692 | inc_irq_stat(apic_perf_irqs); |
| 691 | } | 693 | } |
| 692 | 694 | ||
| 693 | return handled > 0; | 695 | return handled; |
| 694 | } | 696 | } |
| 695 | 697 | ||
| 696 | /* | 698 | /* |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 34b4dad6f0b8..d49079515122 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c | |||
| @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 31 | const struct cpuid_bit *cb; | 31 | const struct cpuid_bit *cb; |
| 32 | 32 | ||
| 33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
| 34 | { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, | ||
| 34 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | 35 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, |
| 35 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | 36 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, |
| 36 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | 37 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index e5cc7e82e60d..ebdb85cf2686 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
| 19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
| 20 | #include <asm/gart.h> | 20 | #include <asm/gart.h> |
| 21 | #include <asm/hpet.h> | ||
| 22 | 21 | ||
| 23 | static void __init fix_hypertransport_config(int num, int slot, int func) | 22 | static void __init fix_hypertransport_config(int num, int slot, int func) |
| 24 | { | 23 | { |
| @@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
| 192 | } | 191 | } |
| 193 | #endif | 192 | #endif |
| 194 | 193 | ||
| 195 | /* | ||
| 196 | * Force the read back of the CMP register in hpet_next_event() | ||
| 197 | * to work around the problem that the CMP register write seems to be | ||
| 198 | * delayed. See hpet_next_event() for details. | ||
| 199 | * | ||
| 200 | * We do this on all SMBUS incarnations for now until we have more | ||
| 201 | * information about the affected chipsets. | ||
| 202 | */ | ||
| 203 | static void __init ati_hpet_bugs(int num, int slot, int func) | ||
| 204 | { | ||
| 205 | #ifdef CONFIG_HPET_TIMER | ||
| 206 | hpet_readback_cmp = 1; | ||
| 207 | #endif | ||
| 208 | } | ||
| 209 | |||
| 210 | #define QFLAG_APPLY_ONCE 0x1 | 194 | #define QFLAG_APPLY_ONCE 0x1 |
| 211 | #define QFLAG_APPLIED 0x2 | 195 | #define QFLAG_APPLIED 0x2 |
| 212 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 196 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
| @@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = { | |||
| 236 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, | 220 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, |
| 237 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 221 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
| 238 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, | 222 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, |
| 239 | { PCI_VENDOR_ID_ATI, PCI_ANY_ID, | ||
| 240 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, | ||
| 241 | {} | 223 | {} |
| 242 | }; | 224 | }; |
| 243 | 225 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ff4c453e13f3..fa8c1b8e09fb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -334,7 +334,7 @@ ENTRY(startup_32_smp) | |||
| 334 | /* | 334 | /* |
| 335 | * Enable paging | 335 | * Enable paging |
| 336 | */ | 336 | */ |
| 337 | movl $pa(swapper_pg_dir),%eax | 337 | movl pa(initial_page_table), %eax |
| 338 | movl %eax,%cr3 /* set the page table pointer.. */ | 338 | movl %eax,%cr3 /* set the page table pointer.. */ |
| 339 | movl %cr0,%eax | 339 | movl %cr0,%eax |
| 340 | orl $X86_CR0_PG,%eax | 340 | orl $X86_CR0_PG,%eax |
| @@ -614,6 +614,8 @@ ignore_int: | |||
| 614 | .align 4 | 614 | .align 4 |
| 615 | ENTRY(initial_code) | 615 | ENTRY(initial_code) |
| 616 | .long i386_start_kernel | 616 | .long i386_start_kernel |
| 617 | ENTRY(initial_page_table) | ||
| 618 | .long pa(swapper_pg_dir) | ||
| 617 | 619 | ||
| 618 | /* | 620 | /* |
| 619 | * BSS section | 621 | * BSS section |
| @@ -629,6 +631,10 @@ ENTRY(swapper_pg_dir) | |||
| 629 | #endif | 631 | #endif |
| 630 | swapper_pg_fixmap: | 632 | swapper_pg_fixmap: |
| 631 | .fill 1024,4,0 | 633 | .fill 1024,4,0 |
| 634 | #ifdef CONFIG_X86_TRAMPOLINE | ||
| 635 | ENTRY(trampoline_pg_dir) | ||
| 636 | .fill 1024,4,0 | ||
| 637 | #endif | ||
| 632 | ENTRY(empty_zero_page) | 638 | ENTRY(empty_zero_page) |
| 633 | .fill 4096,1,0 | 639 | .fill 4096,1,0 |
| 634 | 640 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 351f9c0fea1f..7494999141b3 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -35,7 +35,6 @@ | |||
| 35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
| 36 | u8 hpet_blockid; /* OS timer block num */ | 36 | u8 hpet_blockid; /* OS timer block num */ |
| 37 | u8 hpet_msi_disable; | 37 | u8 hpet_msi_disable; |
| 38 | u8 hpet_readback_cmp; | ||
| 39 | 38 | ||
| 40 | #ifdef CONFIG_PCI_MSI | 39 | #ifdef CONFIG_PCI_MSI |
| 41 | static unsigned long hpet_num_timers; | 40 | static unsigned long hpet_num_timers; |
| @@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta, | |||
| 395 | * at that point and we would wait for the next hpet interrupt | 394 | * at that point and we would wait for the next hpet interrupt |
| 396 | * forever. We found out that reading the CMP register back | 395 | * forever. We found out that reading the CMP register back |
| 397 | * forces the transfer so we can rely on the comparison with | 396 | * forces the transfer so we can rely on the comparison with |
| 398 | * the counter register below. | 397 | * the counter register below. If the read back from the |
| 398 | * compare register does not match the value we programmed | ||
| 399 | * then we might have a real hardware problem. We can not do | ||
| 400 | * much about it here, but at least alert the user/admin with | ||
| 401 | * a prominent warning. | ||
| 399 | * | 402 | * |
| 400 | * That works fine on those ATI chipsets, but on newer Intel | 403 | * An erratum on some chipsets (ICH9,..), results in |
| 401 | * chipsets (ICH9...) this triggers due to an erratum: Reading | 404 | * comparator read immediately following a write returning old |
| 402 | * the comparator immediately following a write is returning | 405 | * value. Workaround for this is to read this value second |
| 403 | * the old value. | 406 | * time, when first read returns old value. |
| 404 | * | 407 | * |
| 405 | * We restrict the read back to the affected ATI chipsets (set | 408 | * In fact the write to the comparator register is delayed up |
| 406 | * by quirks) and also run it with hpet=verbose for debugging | 409 | * to two HPET cycles so the workaround we tried to restrict |
| 407 | * purposes. | 410 | * the readback to those known to be borked ATI chipsets |
| 411 | * failed miserably. So we give up on optimizations forever | ||
| 412 | * and penalize all HPET incarnations unconditionally. | ||
| 408 | */ | 413 | */ |
| 409 | if (hpet_readback_cmp || hpet_verbose) { | 414 | if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { |
| 410 | u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); | 415 | if (hpet_readl(HPET_Tn_CMP(timer)) != cnt) |
| 411 | |||
| 412 | if (cmp != cnt) | ||
| 413 | printk_once(KERN_WARNING | 416 | printk_once(KERN_WARNING |
| 414 | "hpet: compare register read back failed.\n"); | 417 | "hpet: compare register read back failed.\n"); |
| 415 | } | 418 | } |
| 416 | 419 | ||
| 417 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 420 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
| @@ -503,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) | |||
| 503 | { | 506 | { |
| 504 | unsigned int irq; | 507 | unsigned int irq; |
| 505 | 508 | ||
| 506 | irq = create_irq(); | 509 | irq = create_irq_nr(0, -1); |
| 507 | if (!irq) | 510 | if (!irq) |
| 508 | return -EINVAL; | 511 | return -EINVAL; |
| 509 | 512 | ||
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32f..ff15c9dcc25d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
| @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) | |||
| 206 | int arch_bp_generic_fields(int x86_len, int x86_type, | 206 | int arch_bp_generic_fields(int x86_len, int x86_type, |
| 207 | int *gen_len, int *gen_type) | 207 | int *gen_len, int *gen_type) |
| 208 | { | 208 | { |
| 209 | /* Len */ | 209 | /* Type */ |
| 210 | switch (x86_len) { | 210 | switch (x86_type) { |
| 211 | case X86_BREAKPOINT_LEN_X: | 211 | case X86_BREAKPOINT_EXECUTE: |
| 212 | if (x86_len != X86_BREAKPOINT_LEN_X) | ||
| 213 | return -EINVAL; | ||
| 214 | |||
| 215 | *gen_type = HW_BREAKPOINT_X; | ||
| 212 | *gen_len = sizeof(long); | 216 | *gen_len = sizeof(long); |
| 217 | return 0; | ||
| 218 | case X86_BREAKPOINT_WRITE: | ||
| 219 | *gen_type = HW_BREAKPOINT_W; | ||
| 213 | break; | 220 | break; |
| 221 | case X86_BREAKPOINT_RW: | ||
| 222 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
| 223 | break; | ||
| 224 | default: | ||
| 225 | return -EINVAL; | ||
| 226 | } | ||
| 227 | |||
| 228 | /* Len */ | ||
| 229 | switch (x86_len) { | ||
| 214 | case X86_BREAKPOINT_LEN_1: | 230 | case X86_BREAKPOINT_LEN_1: |
| 215 | *gen_len = HW_BREAKPOINT_LEN_1; | 231 | *gen_len = HW_BREAKPOINT_LEN_1; |
| 216 | break; | 232 | break; |
| @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, | |||
| 229 | return -EINVAL; | 245 | return -EINVAL; |
| 230 | } | 246 | } |
| 231 | 247 | ||
| 232 | /* Type */ | ||
| 233 | switch (x86_type) { | ||
| 234 | case X86_BREAKPOINT_EXECUTE: | ||
| 235 | *gen_type = HW_BREAKPOINT_X; | ||
| 236 | break; | ||
| 237 | case X86_BREAKPOINT_WRITE: | ||
| 238 | *gen_type = HW_BREAKPOINT_W; | ||
| 239 | break; | ||
| 240 | case X86_BREAKPOINT_RW: | ||
| 241 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
| 242 | break; | ||
| 243 | default: | ||
| 244 | return -EINVAL; | ||
| 245 | } | ||
| 246 | |||
| 247 | return 0; | 248 | return 0; |
| 248 | } | 249 | } |
| 249 | 250 | ||
| @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
| 316 | ret = -EINVAL; | 317 | ret = -EINVAL; |
| 317 | 318 | ||
| 318 | switch (info->len) { | 319 | switch (info->len) { |
| 319 | case X86_BREAKPOINT_LEN_X: | ||
| 320 | align = sizeof(long) -1; | ||
| 321 | break; | ||
| 322 | case X86_BREAKPOINT_LEN_1: | 320 | case X86_BREAKPOINT_LEN_1: |
| 323 | align = 0; | 321 | align = 0; |
| 324 | break; | 322 | break; |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f11f5ce668f..a46cb3522c0c 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | 40 | ||
| 41 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | 41 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
| 42 | unsigned int xstate_size; | 42 | unsigned int xstate_size; |
| 43 | EXPORT_SYMBOL_GPL(xstate_size); | ||
| 43 | unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); | 44 | unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); |
| 44 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; | 45 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; |
| 45 | 46 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ef10940e1af0..852b81967a37 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -194,7 +194,7 @@ static struct hw_breakpoint { | |||
| 194 | unsigned long addr; | 194 | unsigned long addr; |
| 195 | int len; | 195 | int len; |
| 196 | int type; | 196 | int type; |
| 197 | struct perf_event **pev; | 197 | struct perf_event * __percpu *pev; |
| 198 | } breakinfo[HBP_NUM]; | 198 | } breakinfo[HBP_NUM]; |
| 199 | 199 | ||
| 200 | static unsigned long early_dr7; | 200 | static unsigned long early_dr7; |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1bfb6cf4dd55..770ebfb349e9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 709 | struct hlist_node *node, *tmp; | 709 | struct hlist_node *node, *tmp; |
| 710 | unsigned long flags, orig_ret_address = 0; | 710 | unsigned long flags, orig_ret_address = 0; |
| 711 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 711 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
| 712 | kprobe_opcode_t *correct_ret_addr = NULL; | ||
| 712 | 713 | ||
| 713 | INIT_HLIST_HEAD(&empty_rp); | 714 | INIT_HLIST_HEAD(&empty_rp); |
| 714 | kretprobe_hash_lock(current, &head, &flags); | 715 | kretprobe_hash_lock(current, &head, &flags); |
| @@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 740 | /* another task is sharing our hash bucket */ | 741 | /* another task is sharing our hash bucket */ |
| 741 | continue; | 742 | continue; |
| 742 | 743 | ||
| 744 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
| 745 | |||
| 746 | if (orig_ret_address != trampoline_address) | ||
| 747 | /* | ||
| 748 | * This is the real return address. Any other | ||
| 749 | * instances associated with this task are for | ||
| 750 | * other calls deeper on the call stack | ||
| 751 | */ | ||
| 752 | break; | ||
| 753 | } | ||
| 754 | |||
| 755 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
| 756 | |||
| 757 | correct_ret_addr = ri->ret_addr; | ||
| 758 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
| 759 | if (ri->task != current) | ||
| 760 | /* another task is sharing our hash bucket */ | ||
| 761 | continue; | ||
| 762 | |||
| 763 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
| 743 | if (ri->rp && ri->rp->handler) { | 764 | if (ri->rp && ri->rp->handler) { |
| 744 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | 765 | __get_cpu_var(current_kprobe) = &ri->rp->kp; |
| 745 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | 766 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; |
| 767 | ri->ret_addr = correct_ret_addr; | ||
| 746 | ri->rp->handler(ri, regs); | 768 | ri->rp->handler(ri, regs); |
| 747 | __get_cpu_var(current_kprobe) = NULL; | 769 | __get_cpu_var(current_kprobe) = NULL; |
| 748 | } | 770 | } |
| 749 | 771 | ||
| 750 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
| 751 | recycle_rp_inst(ri, &empty_rp); | 772 | recycle_rp_inst(ri, &empty_rp); |
| 752 | 773 | ||
| 753 | if (orig_ret_address != trampoline_address) | 774 | if (orig_ret_address != trampoline_address) |
| @@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 759 | break; | 780 | break; |
| 760 | } | 781 | } |
| 761 | 782 | ||
| 762 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
| 763 | |||
| 764 | kretprobe_hash_unlock(current, &flags); | 783 | kretprobe_hash_unlock(current, &flags); |
| 765 | 784 | ||
| 766 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 785 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 64ecaf0af9af..57d1868a86aa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread); | |||
| 301 | /* | 301 | /* |
| 302 | * sys_execve() executes a new program. | 302 | * sys_execve() executes a new program. |
| 303 | */ | 303 | */ |
| 304 | long sys_execve(const char __user *name, char __user * __user *argv, | 304 | long sys_execve(const char __user *name, |
| 305 | char __user * __user *envp, struct pt_regs *regs) | 305 | const char __user *const __user *argv, |
| 306 | const char __user *const __user *envp, struct pt_regs *regs) | ||
| 306 | { | 307 | { |
| 307 | long error; | 308 | long error; |
| 308 | char *filename; | 309 | char *filename; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b008e7883207..c3a4fbb2b996 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -1014,6 +1014,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 1014 | paging_init(); | 1014 | paging_init(); |
| 1015 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); | 1015 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); |
| 1016 | 1016 | ||
| 1017 | setup_trampoline_page_table(); | ||
| 1018 | |||
| 1017 | tboot_probe(); | 1019 | tboot_probe(); |
| 1018 | 1020 | ||
| 1019 | #ifdef CONFIG_X86_64 | 1021 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a5e928b0cb5f..8b3bfc4dd708 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -73,7 +73,6 @@ | |||
| 73 | 73 | ||
| 74 | #ifdef CONFIG_X86_32 | 74 | #ifdef CONFIG_X86_32 |
| 75 | u8 apicid_2_node[MAX_APICID]; | 75 | u8 apicid_2_node[MAX_APICID]; |
| 76 | static int low_mappings; | ||
| 77 | #endif | 76 | #endif |
| 78 | 77 | ||
| 79 | /* State of each CPU */ | 78 | /* State of each CPU */ |
| @@ -91,6 +90,25 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; | |||
| 91 | static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); | 90 | static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); |
| 92 | #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) | 91 | #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) |
| 93 | #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) | 92 | #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) |
| 93 | |||
| 94 | /* | ||
| 95 | * We need this for trampoline_base protection from concurrent accesses when | ||
| 96 | * off- and onlining cores wildly. | ||
| 97 | */ | ||
| 98 | static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); | ||
| 99 | |||
| 100 | void cpu_hotplug_driver_lock() | ||
| 101 | { | ||
| 102 | mutex_lock(&x86_cpu_hotplug_driver_mutex); | ||
| 103 | } | ||
| 104 | |||
| 105 | void cpu_hotplug_driver_unlock() | ||
| 106 | { | ||
| 107 | mutex_unlock(&x86_cpu_hotplug_driver_mutex); | ||
| 108 | } | ||
| 109 | |||
| 110 | ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } | ||
| 111 | ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } | ||
| 94 | #else | 112 | #else |
| 95 | static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; | 113 | static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; |
| 96 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) | 114 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) |
| @@ -281,6 +299,18 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 281 | * fragile that we want to limit the things done here to the | 299 | * fragile that we want to limit the things done here to the |
| 282 | * most necessary things. | 300 | * most necessary things. |
| 283 | */ | 301 | */ |
| 302 | |||
| 303 | #ifdef CONFIG_X86_32 | ||
| 304 | /* | ||
| 305 | * Switch away from the trampoline page-table | ||
| 306 | * | ||
| 307 | * Do this before cpu_init() because it needs to access per-cpu | ||
| 308 | * data which may not be mapped in the trampoline page-table. | ||
| 309 | */ | ||
| 310 | load_cr3(swapper_pg_dir); | ||
| 311 | __flush_tlb_all(); | ||
| 312 | #endif | ||
| 313 | |||
| 284 | vmi_bringup(); | 314 | vmi_bringup(); |
| 285 | cpu_init(); | 315 | cpu_init(); |
| 286 | preempt_disable(); | 316 | preempt_disable(); |
| @@ -299,12 +329,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 299 | legacy_pic->chip->unmask(0); | 329 | legacy_pic->chip->unmask(0); |
| 300 | } | 330 | } |
| 301 | 331 | ||
| 302 | #ifdef CONFIG_X86_32 | ||
| 303 | while (low_mappings) | ||
| 304 | cpu_relax(); | ||
| 305 | __flush_tlb_all(); | ||
| 306 | #endif | ||
| 307 | |||
| 308 | /* This must be done before setting cpu_online_mask */ | 332 | /* This must be done before setting cpu_online_mask */ |
| 309 | set_cpu_sibling_map(raw_smp_processor_id()); | 333 | set_cpu_sibling_map(raw_smp_processor_id()); |
| 310 | wmb(); | 334 | wmb(); |
| @@ -750,6 +774,7 @@ do_rest: | |||
| 750 | #ifdef CONFIG_X86_32 | 774 | #ifdef CONFIG_X86_32 |
| 751 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 775 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
| 752 | irq_ctx_init(cpu); | 776 | irq_ctx_init(cpu); |
| 777 | initial_page_table = __pa(&trampoline_pg_dir); | ||
| 753 | #else | 778 | #else |
| 754 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 779 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
| 755 | initial_gs = per_cpu_offset(cpu); | 780 | initial_gs = per_cpu_offset(cpu); |
| @@ -897,20 +922,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 897 | 922 | ||
| 898 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 923 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
| 899 | 924 | ||
| 900 | #ifdef CONFIG_X86_32 | ||
| 901 | /* init low mem mapping */ | ||
| 902 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 903 | min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); | ||
| 904 | flush_tlb_all(); | ||
| 905 | low_mappings = 1; | ||
| 906 | |||
| 907 | err = do_boot_cpu(apicid, cpu); | 925 | err = do_boot_cpu(apicid, cpu); |
| 908 | 926 | ||
| 909 | zap_low_mappings(false); | ||
| 910 | low_mappings = 0; | ||
| 911 | #else | ||
| 912 | err = do_boot_cpu(apicid, cpu); | ||
| 913 | #endif | ||
| 914 | if (err) { | 927 | if (err) { |
| 915 | pr_debug("do_boot_cpu failed %d\n", err); | 928 | pr_debug("do_boot_cpu failed %d\n", err); |
| 916 | return -EIO; | 929 | return -EIO; |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 196552bb412c..d5e06624e34a 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
| @@ -28,7 +28,9 @@ | |||
| 28 | * Do a system call from kernel instead of calling sys_execve so we | 28 | * Do a system call from kernel instead of calling sys_execve so we |
| 29 | * end up with proper pt_regs. | 29 | * end up with proper pt_regs. |
| 30 | */ | 30 | */ |
| 31 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]) | 31 | int kernel_execve(const char *filename, |
| 32 | const char *const argv[], | ||
| 33 | const char *const envp[]) | ||
| 32 | { | 34 | { |
| 33 | long __res; | 35 | long __res; |
| 34 | asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" | 36 | asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index c652ef62742d..e2a595257390 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
| 2 | 2 | ||
| 3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
| 4 | #include <asm/pgtable.h> | ||
| 4 | #include <asm/e820.h> | 5 | #include <asm/e820.h> |
| 5 | 6 | ||
| 6 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | 7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) |
| @@ -37,3 +38,19 @@ unsigned long __trampinit setup_trampoline(void) | |||
| 37 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 38 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
| 38 | return virt_to_phys(trampoline_base); | 39 | return virt_to_phys(trampoline_base); |
| 39 | } | 40 | } |
| 41 | |||
| 42 | void __init setup_trampoline_page_table(void) | ||
| 43 | { | ||
| 44 | #ifdef CONFIG_X86_32 | ||
| 45 | /* Copy kernel address range */ | ||
| 46 | clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 47 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 48 | KERNEL_PGD_PTRS); | ||
| 49 | |||
| 50 | /* Initialize low mappings */ | ||
| 51 | clone_pgd_range(trampoline_pg_dir, | ||
| 52 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 53 | min_t(unsigned long, KERNEL_PGD_PTRS, | ||
| 54 | KERNEL_PGD_BOUNDARY)); | ||
| 55 | #endif | ||
| 56 | } | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ce8e50239332..26a863a9c2a8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | |||
| 626 | local_irq_restore(flags); | 626 | local_irq_restore(flags); |
| 627 | } | 627 | } |
| 628 | 628 | ||
| 629 | static unsigned long long cyc2ns_suspend; | ||
| 630 | |||
| 631 | void save_sched_clock_state(void) | ||
| 632 | { | ||
| 633 | if (!sched_clock_stable) | ||
| 634 | return; | ||
| 635 | |||
| 636 | cyc2ns_suspend = sched_clock(); | ||
| 637 | } | ||
| 638 | |||
| 639 | /* | ||
| 640 | * Even on processors with invariant TSC, TSC gets reset in some the | ||
| 641 | * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to | ||
| 642 | * arbitrary value (still sync'd across cpu's) during resume from such sleep | ||
| 643 | * states. To cope up with this, recompute the cyc2ns_offset for each cpu so | ||
| 644 | * that sched_clock() continues from the point where it was left off during | ||
| 645 | * suspend. | ||
| 646 | */ | ||
| 647 | void restore_sched_clock_state(void) | ||
| 648 | { | ||
| 649 | unsigned long long offset; | ||
| 650 | unsigned long flags; | ||
| 651 | int cpu; | ||
| 652 | |||
| 653 | if (!sched_clock_stable) | ||
| 654 | return; | ||
| 655 | |||
| 656 | local_irq_save(flags); | ||
| 657 | |||
| 658 | __get_cpu_var(cyc2ns_offset) = 0; | ||
| 659 | offset = cyc2ns_suspend - sched_clock(); | ||
| 660 | |||
| 661 | for_each_possible_cpu(cpu) | ||
| 662 | per_cpu(cyc2ns_offset, cpu) = offset; | ||
| 663 | |||
| 664 | local_irq_restore(flags); | ||
| 665 | } | ||
| 666 | |||
| 629 | #ifdef CONFIG_CPU_FREQ | 667 | #ifdef CONFIG_CPU_FREQ |
| 630 | 668 | ||
| 631 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | 669 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency |
