aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-08-15 06:06:12 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-15 06:06:12 -0400
commitbe750231ce1599b86fbba213e3da8344ece262e2 (patch)
treea506c461082692bb5bab8b9bb63a762816329454 /arch/x86
parent18408ddc01136f505ae357c03f0d8e50b10e0db6 (diff)
parent39e6dd73502f64e2ae3236b304e160ae30de9384 (diff)
Merge branch 'perfcounters/urgent' into perfcounters/core
Conflicts: kernel/perf_counter.c Merge reason: update to latest upstream (-rc6) and resolve the conflict with urgent fixes. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c44
16 files changed, 207 insertions, 80 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486)
27 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
28 select HAVE_KPROBES 29 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
742config X86_LOCAL_APIC 743config X86_LOCAL_APIC
743 def_bool y 744 def_bool y
744 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 745 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
745 select HAVE_PERF_COUNTERS if (!M386 && !M486)
746 746
747config X86_IO_APIC 747config X86_IO_APIC
748 def_bool y 748 def_bool y
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 2ed4e2bb3b32..a5371ec36776 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
17 return x2apic_enabled(); 17 return x2apic_enabled();
18} 18}
19 19
20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/*
21 21 * need to use more than cpu 0, because we need more vectors when
22 * MSI-X are used.
23 */
22static const struct cpumask *x2apic_target_cpus(void) 24static const struct cpumask *x2apic_target_cpus(void)
23{ 25{
24 return cpumask_of(0); 26 return cpu_online_mask;
25} 27}
26 28
27/* 29/*
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 0b631c6a2e00..a8989aadc99a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/*
31 31 * need to use more than cpu 0, because we need more vectors when
32 * MSI-X are used.
33 */
32static const struct cpumask *x2apic_target_cpus(void) 34static const struct cpumask *x2apic_target_cpus(void)
33{ 35{
34 return cpumask_of(0); 36 return cpu_online_mask;
35} 37}
36 38
37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) 39static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1cf..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
400 level = cpuid_eax(1); 400 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
403 } 410 }
404 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
405 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..8bc64cfbe936 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
39 40
40static atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
41 42
@@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
96{ 97{
97 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
98 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
99 102
100 if (curr) 103 if (is_throttled)
101 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
102 105
103 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
104 return 0; 108 return 0;
105 109
106 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
107 111
108 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
109 if (curr) { 113 if (is_throttled) {
110 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
111 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
112 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
113 117
114 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
115 } else { 119 } else if (was_throttled) {
116 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
117 } 121 }
118 122
119 return 1; 123 return 1;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b237c181aa41..396e35db7058 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -97,6 +97,7 @@ struct x86_pmu {
97 int num_counters_fixed; 97 int num_counters_fixed;
98 int counter_bits; 98 int counter_bits;
99 u64 counter_mask; 99 u64 counter_mask;
100 int apic;
100 u64 max_period; 101 u64 max_period;
101 u64 intel_ctrl; 102 u64 intel_ctrl;
102 void (*enable_bts)(u64 config); 103 void (*enable_bts)(u64 config);
@@ -116,8 +117,8 @@ static const u64 p6_perfmon_event_map[] =
116{ 117{
117 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 118 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
118 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 119 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
119 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 120 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
120 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 121 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
121 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 122 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
122 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 123 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
123 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 124 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -660,6 +661,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
660 661
661static bool reserve_pmc_hardware(void) 662static bool reserve_pmc_hardware(void)
662{ 663{
664#ifdef CONFIG_X86_LOCAL_APIC
663 int i; 665 int i;
664 666
665 if (nmi_watchdog == NMI_LOCAL_APIC) 667 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -674,9 +676,11 @@ static bool reserve_pmc_hardware(void)
674 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 676 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
675 goto eventsel_fail; 677 goto eventsel_fail;
676 } 678 }
679#endif
677 680
678 return true; 681 return true;
679 682
683#ifdef CONFIG_X86_LOCAL_APIC
680eventsel_fail: 684eventsel_fail:
681 for (i--; i >= 0; i--) 685 for (i--; i >= 0; i--)
682 release_evntsel_nmi(x86_pmu.eventsel + i); 686 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -691,10 +695,12 @@ perfctr_fail:
691 enable_lapic_nmi_watchdog(); 695 enable_lapic_nmi_watchdog();
692 696
693 return false; 697 return false;
698#endif
694} 699}
695 700
696static void release_pmc_hardware(void) 701static void release_pmc_hardware(void)
697{ 702{
703#ifdef CONFIG_X86_LOCAL_APIC
698 int i; 704 int i;
699 705
700 for (i = 0; i < x86_pmu.num_counters; i++) { 706 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -704,6 +710,7 @@ static void release_pmc_hardware(void)
704 710
705 if (nmi_watchdog == NMI_LOCAL_APIC) 711 if (nmi_watchdog == NMI_LOCAL_APIC)
706 enable_lapic_nmi_watchdog(); 712 enable_lapic_nmi_watchdog();
713#endif
707} 714}
708 715
709static inline bool bts_available(void) 716static inline bool bts_available(void)
@@ -934,6 +941,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
934 hwc->sample_period = x86_pmu.max_period; 941 hwc->sample_period = x86_pmu.max_period;
935 hwc->last_period = hwc->sample_period; 942 hwc->last_period = hwc->sample_period;
936 atomic64_set(&hwc->period_left, hwc->sample_period); 943 atomic64_set(&hwc->period_left, hwc->sample_period);
944 } else {
945 /*
946 * If we have a PMU initialized but no APIC
947 * interrupts, we cannot sample hardware
948 * counters (user-space has to fall back and
949 * sample via a hrtimer based software counter):
950 */
951 if (!x86_pmu.apic)
952 return -EOPNOTSUPP;
937 } 953 }
938 954
939 counter->destroy = hw_perf_counter_destroy; 955 counter->destroy = hw_perf_counter_destroy;
@@ -1755,18 +1771,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1755 1771
1756void set_perf_counter_pending(void) 1772void set_perf_counter_pending(void)
1757{ 1773{
1774#ifdef CONFIG_X86_LOCAL_APIC
1758 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1775 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1776#endif
1759} 1777}
1760 1778
1761void perf_counters_lapic_init(void) 1779void perf_counters_lapic_init(void)
1762{ 1780{
1763 if (!x86_pmu_initialized()) 1781#ifdef CONFIG_X86_LOCAL_APIC
1782 if (!x86_pmu.apic || !x86_pmu_initialized())
1764 return; 1783 return;
1765 1784
1766 /* 1785 /*
1767 * Always use NMI for PMU 1786 * Always use NMI for PMU
1768 */ 1787 */
1769 apic_write(APIC_LVTPC, APIC_DM_NMI); 1788 apic_write(APIC_LVTPC, APIC_DM_NMI);
1789#endif
1770} 1790}
1771 1791
1772static int __kprobes 1792static int __kprobes
@@ -1790,7 +1810,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1790 1810
1791 regs = args->regs; 1811 regs = args->regs;
1792 1812
1813#ifdef CONFIG_X86_LOCAL_APIC
1793 apic_write(APIC_LVTPC, APIC_DM_NMI); 1814 apic_write(APIC_LVTPC, APIC_DM_NMI);
1815#endif
1794 /* 1816 /*
1795 * Can't rely on the handled return value to say it was our NMI, two 1817 * Can't rely on the handled return value to say it was our NMI, two
1796 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1818 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1821,6 +1843,7 @@ static struct x86_pmu p6_pmu = {
1821 .event_map = p6_pmu_event_map, 1843 .event_map = p6_pmu_event_map,
1822 .raw_event = p6_pmu_raw_event, 1844 .raw_event = p6_pmu_raw_event,
1823 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1845 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1846 .apic = 1,
1824 .max_period = (1ULL << 31) - 1, 1847 .max_period = (1ULL << 31) - 1,
1825 .version = 0, 1848 .version = 0,
1826 .num_counters = 2, 1849 .num_counters = 2,
@@ -1847,6 +1870,7 @@ static struct x86_pmu intel_pmu = {
1847 .event_map = intel_pmu_event_map, 1870 .event_map = intel_pmu_event_map,
1848 .raw_event = intel_pmu_raw_event, 1871 .raw_event = intel_pmu_raw_event,
1849 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1872 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1873 .apic = 1,
1850 /* 1874 /*
1851 * Intel PMCs cannot be accessed sanely above 32 bit width, 1875 * Intel PMCs cannot be accessed sanely above 32 bit width,
1852 * so we install an artificial 1<<31 period regardless of 1876 * so we install an artificial 1<<31 period regardless of
@@ -1872,6 +1896,7 @@ static struct x86_pmu amd_pmu = {
1872 .num_counters = 4, 1896 .num_counters = 4,
1873 .counter_bits = 48, 1897 .counter_bits = 48,
1874 .counter_mask = (1ULL << 48) - 1, 1898 .counter_mask = (1ULL << 48) - 1,
1899 .apic = 1,
1875 /* use highest bit to detect overflow */ 1900 /* use highest bit to detect overflow */
1876 .max_period = (1ULL << 47) - 1, 1901 .max_period = (1ULL << 47) - 1,
1877}; 1902};
@@ -1897,13 +1922,14 @@ static int p6_pmu_init(void)
1897 return -ENODEV; 1922 return -ENODEV;
1898 } 1923 }
1899 1924
1925 x86_pmu = p6_pmu;
1926
1900 if (!cpu_has_apic) { 1927 if (!cpu_has_apic) {
1901 pr_info("no Local APIC, try rebooting with lapic"); 1928 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1902 return -ENODEV; 1929 pr_info("no hardware sampling interrupt available.\n");
1930 x86_pmu.apic = 0;
1903 } 1931 }
1904 1932
1905 x86_pmu = p6_pmu;
1906
1907 return 0; 1933 return 0;
1908} 1934}
1909 1935
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 19ccf6d0dccf..fe26ba3e3451 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
354 */ 354 */
355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); 355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
356 if (c16) { 356 if (c16) {
357 for (i = 0; i < sizeof(vendor) && *c16; ++i) 357 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
358 vendor[i] = *c16++; 358 vendor[i] = *c16++;
359 vendor[i] = '\0'; 359 vendor[i] = '\0';
360 } else 360 } else
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 834c9da8bf9d..a06e8d101844 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -405,7 +405,7 @@ EXPORT_SYMBOL(machine_real_restart);
405#endif /* CONFIG_X86_32 */ 405#endif /* CONFIG_X86_32 */
406 406
407/* 407/*
408 * Apple MacBook5,2 (2009 MacBook) needs reboot=p 408 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
409 */ 409 */
410static int __init set_pci_reboot(const struct dmi_system_id *d) 410static int __init set_pci_reboot(const struct dmi_system_id *d)
411{ 411{
@@ -418,12 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
418} 418}
419 419
420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { 420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
421 { /* Handle problems with rebooting on Apple MacBook5,2 */ 421 { /* Handle problems with rebooting on Apple MacBook5 */
422 .callback = set_pci_reboot, 422 .callback = set_pci_reboot,
423 .ident = "Apple MacBook", 423 .ident = "Apple MacBook5",
424 .matches = { 424 .matches = {
425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), 425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), 426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
427 },
428 },
429 { /* Handle problems with rebooting on Apple MacBookPro5 */
430 .callback = set_pci_reboot,
431 .ident = "Apple MacBookPro5",
432 .matches = {
433 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
434 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
427 }, 435 },
428 }, 436 },
429 { } 437 { }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d4..71f4368b357e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
275 * use the TSC value at the transitions to calculate a pretty 275 * use the TSC value at the transitions to calculate a pretty
276 * good value for the TSC frequencty. 276 * good value for the TSC frequencty.
277 */ 277 */
278static inline int pit_verify_msb(unsigned char val)
279{
280 /* Ignore LSB */
281 inb(0x42);
282 return inb(0x42) == val;
283}
284
278static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 285static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
279{ 286{
280 int count; 287 int count;
281 u64 tsc = 0; 288 u64 tsc = 0;
282 289
283 for (count = 0; count < 50000; count++) { 290 for (count = 0; count < 50000; count++) {
284 /* Ignore LSB */ 291 if (!pit_verify_msb(val))
285 inb(0x42);
286 if (inb(0x42) != val)
287 break; 292 break;
288 tsc = get_cycles(); 293 tsc = get_cycles();
289 } 294 }
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
336 * to do that is to just read back the 16-bit counter 341 * to do that is to just read back the 16-bit counter
337 * once from the PIT. 342 * once from the PIT.
338 */ 343 */
339 inb(0x42); 344 pit_verify_msb(0);
340 inb(0x42);
341 345
342 if (pit_expect_msb(0xff, &tsc, &d1)) { 346 if (pit_expect_msb(0xff, &tsc, &d1)) {
343 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 347 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
348 * Iterate until the error is less than 500 ppm 352 * Iterate until the error is less than 500 ppm
349 */ 353 */
350 delta -= tsc; 354 delta -= tsc;
351 if (d1+d2 < delta >> 11) 355 if (d1+d2 >= delta >> 11)
352 goto success; 356 continue;
357
358 /*
359 * Check the PIT one more time to verify that
360 * all TSC reads were stable wrt the PIT.
361 *
362 * This also guarantees serialization of the
363 * last cycle read ('d2') in pit_expect_msb.
364 */
365 if (!pit_verify_msb(0xfe - i))
366 break;
367 goto success;
353 } 368 }
354 } 369 }
355 printk("Fast TSC calibration failed\n"); 370 printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2a..95a7289e4b0c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
441 ap.ds = __USER_DS; 441 ap.ds = __USER_DS;
442 ap.es = __USER_DS; 442 ap.es = __USER_DS;
443 ap.fs = __KERNEL_PERCPU; 443 ap.fs = __KERNEL_PERCPU;
444 ap.gs = 0; 444 ap.gs = __KERNEL_STACK_CANARY;
445 445
446 ap.eflags = 0; 446 ap.eflags = 0;
447 447
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d293ee2..21f68e00524f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
104 ktime_t remaining; 104 ktime_t remaining;
105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
106 106
107 if (!ps->pit_timer.period)
108 return 0;
109
107 /* 110 /*
108 * The Counter does not stop when it reaches zero. In 111 * The Counter does not stop when it reaches zero. In
109 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to 112 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7030b5f911bf..0ef5bb2b4043 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
489 * 489 *
490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
491 * containing more mappings. 491 * containing more mappings.
492 *
493 * Returns the number of rmap entries before the spte was added or zero if
494 * the spte was not added.
495 *
492 */ 496 */
493static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 497static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
494{ 498{
495 struct kvm_mmu_page *sp; 499 struct kvm_mmu_page *sp;
496 struct kvm_rmap_desc *desc; 500 struct kvm_rmap_desc *desc;
497 unsigned long *rmapp; 501 unsigned long *rmapp;
498 int i; 502 int i, count = 0;
499 503
500 if (!is_rmap_pte(*spte)) 504 if (!is_rmap_pte(*spte))
501 return; 505 return count;
502 gfn = unalias_gfn(vcpu->kvm, gfn); 506 gfn = unalias_gfn(vcpu->kvm, gfn);
503 sp = page_header(__pa(spte)); 507 sp = page_header(__pa(spte));
504 sp->gfns[spte - sp->spt] = gfn; 508 sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
515 } else { 519 } else {
516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 520 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 521 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
518 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 522 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
519 desc = desc->more; 523 desc = desc->more;
524 count += RMAP_EXT;
525 }
520 if (desc->shadow_ptes[RMAP_EXT-1]) { 526 if (desc->shadow_ptes[RMAP_EXT-1]) {
521 desc->more = mmu_alloc_rmap_desc(vcpu); 527 desc->more = mmu_alloc_rmap_desc(vcpu);
522 desc = desc->more; 528 desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
525 ; 531 ;
526 desc->shadow_ptes[i] = spte; 532 desc->shadow_ptes[i] = spte;
527 } 533 }
534 return count;
528} 535}
529 536
530static void rmap_desc_remove_entry(unsigned long *rmapp, 537static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
754 return young; 761 return young;
755} 762}
756 763
764#define RMAP_RECYCLE_THRESHOLD 1000
765
766static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
767{
768 unsigned long *rmapp;
769
770 gfn = unalias_gfn(vcpu->kvm, gfn);
771 rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
772
773 kvm_unmap_rmapp(vcpu->kvm, rmapp);
774 kvm_flush_remote_tlbs(vcpu->kvm);
775}
776
757int kvm_age_hva(struct kvm *kvm, unsigned long hva) 777int kvm_age_hva(struct kvm *kvm, unsigned long hva)
758{ 778{
759 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 779 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1407 */ 1427 */
1408void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1428void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1409{ 1429{
1430 int used_pages;
1431
1432 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
1433 used_pages = max(0, used_pages);
1434
1410 /* 1435 /*
1411 * If we set the number of mmu pages to be smaller be than the 1436 * If we set the number of mmu pages to be smaller be than the
1412 * number of actived pages , we must to free some mmu pages before we 1437 * number of actived pages , we must to free some mmu pages before we
1413 * change the value 1438 * change the value
1414 */ 1439 */
1415 1440
1416 if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > 1441 if (used_pages > kvm_nr_mmu_pages) {
1417 kvm_nr_mmu_pages) { 1442 while (used_pages > kvm_nr_mmu_pages) {
1418 int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
1419 - kvm->arch.n_free_mmu_pages;
1420
1421 while (n_used_mmu_pages > kvm_nr_mmu_pages) {
1422 struct kvm_mmu_page *page; 1443 struct kvm_mmu_page *page;
1423 1444
1424 page = container_of(kvm->arch.active_mmu_pages.prev, 1445 page = container_of(kvm->arch.active_mmu_pages.prev,
1425 struct kvm_mmu_page, link); 1446 struct kvm_mmu_page, link);
1426 kvm_mmu_zap_page(kvm, page); 1447 kvm_mmu_zap_page(kvm, page);
1427 n_used_mmu_pages--; 1448 used_pages--;
1428 } 1449 }
1429 kvm->arch.n_free_mmu_pages = 0; 1450 kvm->arch.n_free_mmu_pages = 0;
1430 } 1451 }
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1740{ 1761{
1741 int was_rmapped = 0; 1762 int was_rmapped = 0;
1742 int was_writeble = is_writeble_pte(*shadow_pte); 1763 int was_writeble = is_writeble_pte(*shadow_pte);
1764 int rmap_count;
1743 1765
1744 pgprintk("%s: spte %llx access %x write_fault %d" 1766 pgprintk("%s: spte %llx access %x write_fault %d"
1745 " user_fault %d gfn %lx\n", 1767 " user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1781 1803
1782 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1804 page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
1783 if (!was_rmapped) { 1805 if (!was_rmapped) {
1784 rmap_add(vcpu, shadow_pte, gfn, largepage); 1806 rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
1785 if (!is_rmap_pte(*shadow_pte)) 1807 if (!is_rmap_pte(*shadow_pte))
1786 kvm_release_pfn_clean(pfn); 1808 kvm_release_pfn_clean(pfn);
1809 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1810 rmap_recycle(vcpu, gfn, largepage);
1787 } else { 1811 } else {
1788 if (was_writeble) 1812 if (was_writeble)
1789 kvm_release_pfn_dirty(pfn); 1813 kvm_release_pfn_dirty(pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 71510e07e69e..b1f658ad2f06 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
711 svm->vmcb->control.tsc_offset += delta; 711 svm->vmcb->control.tsc_offset += delta;
712 vcpu->cpu = cpu; 712 vcpu->cpu = cpu;
713 kvm_migrate_timers(vcpu); 713 kvm_migrate_timers(vcpu);
714 svm->asid_generation = 0;
714 } 715 }
715 716
716 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 717 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
1031 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1032 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1032 } 1033 }
1033 1034
1034 svm->vcpu.cpu = svm_data->cpu;
1035 svm->asid_generation = svm_data->asid_generation; 1035 svm->asid_generation = svm_data->asid_generation;
1036 svm->vmcb->control.asid = svm_data->next_asid++; 1036 svm->vmcb->control.asid = svm_data->next_asid++;
1037} 1037}
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
2301 2301
2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2303 if (svm->vcpu.cpu != cpu || 2303 /* FIXME: handle wraparound of asid_generation */
2304 svm->asid_generation != svm_data->asid_generation) 2304 if (svm->asid_generation != svm_data->asid_generation)
2305 new_asid(svm, svm_data); 2305 new_asid(svm, svm_data);
2306} 2306}
2307 2307
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 356a0ce85c68..29f912927a58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3157 struct vcpu_vmx *vmx = to_vmx(vcpu); 3157 struct vcpu_vmx *vmx = to_vmx(vcpu);
3158 enum emulation_result err = EMULATE_DONE; 3158 enum emulation_result err = EMULATE_DONE;
3159 3159
3160 preempt_enable();
3161 local_irq_enable(); 3160 local_irq_enable();
3161 preempt_enable();
3162 3162
3163 while (!guest_state_valid(vcpu)) { 3163 while (!guest_state_valid(vcpu)) {
3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3168 3168
3169 if (err != EMULATE_DONE) { 3169 if (err != EMULATE_DONE) {
3170 kvm_report_emulation_failure(vcpu, "emulation failure"); 3170 kvm_report_emulation_failure(vcpu, "emulation failure");
3171 return; 3171 break;
3172 } 3172 }
3173 3173
3174 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3177 schedule(); 3177 schedule();
3178 } 3178 }
3179 3179
3180 local_irq_disable();
3181 preempt_disable(); 3180 preempt_disable();
3181 local_irq_disable();
3182 3182
3183 vmx->invalid_state_emulation_result = err; 3183 vmx->invalid_state_emulation_result = err;
3184} 3184}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41a..3d4529011828 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
704 return false; 704 return false;
705} 705}
706 706
707static bool valid_pat_type(unsigned t)
708{
709 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
710}
711
712static bool valid_mtrr_type(unsigned t)
713{
714 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
715}
716
717static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
718{
719 int i;
720
721 if (!msr_mtrr_valid(msr))
722 return false;
723
724 if (msr == MSR_IA32_CR_PAT) {
725 for (i = 0; i < 8; i++)
726 if (!valid_pat_type((data >> (i * 8)) & 0xff))
727 return false;
728 return true;
729 } else if (msr == MSR_MTRRdefType) {
730 if (data & ~0xcff)
731 return false;
732 return valid_mtrr_type(data & 0xff);
733 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
734 for (i = 0; i < 8 ; i++)
735 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
736 return false;
737 return true;
738 }
739
740 /* variable MTRRs */
741 return valid_mtrr_type(data & 0xff);
742}
743
707static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 744static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
708{ 745{
709 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 746 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
710 747
711 if (!msr_mtrr_valid(msr)) 748 if (!mtrr_valid(vcpu, msr, data))
712 return 1; 749 return 1;
713 750
714 if (msr == MSR_MTRRdefType) { 751 if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
1079 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1116 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1080 goto out; 1117 goto out;
1081 r = -E2BIG; 1118 r = -E2BIG;
1082 if (n < num_msrs_to_save) 1119 if (n < msr_list.nmsrs)
1083 goto out; 1120 goto out;
1084 r = -EFAULT; 1121 r = -EFAULT;
1085 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1122 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1086 num_msrs_to_save * sizeof(u32))) 1123 num_msrs_to_save * sizeof(u32)))
1087 goto out; 1124 goto out;
1088 if (copy_to_user(user_msr_list->indices 1125 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
1089 + num_msrs_to_save * sizeof(u32),
1090 &emulated_msrs, 1126 &emulated_msrs,
1091 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1127 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
1092 goto out; 1128 goto out;