aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event.c59
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c4
-rw-r--r--arch/x86/kernel/i387.c1
-rw-r--r--arch/x86/kernel/trampoline.c3
-rw-r--r--arch/x86/kernel/tsc.c38
8 files changed, 105 insertions, 28 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 224392d8fe8c..5e975298fa81 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -530,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
530 err = -ENOMEM; 530 err = -ENOMEM;
531 goto out; 531 goto out;
532 } 532 }
533 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { 533 if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
534 kfree(b); 534 kfree(b);
535 err = -ENOMEM; 535 err = -ENOMEM;
536 goto out; 536 goto out;
@@ -543,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
543#ifndef CONFIG_SMP 543#ifndef CONFIG_SMP
544 cpumask_setall(b->cpus); 544 cpumask_setall(b->cpus);
545#else 545#else
546 cpumask_copy(b->cpus, c->llc_shared_map); 546 cpumask_set_cpu(cpu, b->cpus);
547#endif 547#endif
548 548
549 per_cpu(threshold_banks, cpu)[bank] = b; 549 per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index c2a8b26d4fea..d9368eeda309 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level)
202 202
203#ifdef CONFIG_SYSFS 203#ifdef CONFIG_SYSFS
204/* Add/Remove thermal_throttle interface for CPU device: */ 204/* Add/Remove thermal_throttle interface for CPU device: */
205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) 205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
206 unsigned int cpu)
206{ 207{
207 int err; 208 int err;
208 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); 209 struct cpuinfo_x86 *c = &cpu_data(cpu);
209 210
210 err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); 211 err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
211 if (err) 212 if (err)
@@ -251,7 +252,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
251 case CPU_UP_PREPARE: 252 case CPU_UP_PREPARE:
252 case CPU_UP_PREPARE_FROZEN: 253 case CPU_UP_PREPARE_FROZEN:
253 mutex_lock(&therm_cpu_lock); 254 mutex_lock(&therm_cpu_lock);
254 err = thermal_throttle_add_dev(sys_dev); 255 err = thermal_throttle_add_dev(sys_dev, cpu);
255 mutex_unlock(&therm_cpu_lock); 256 mutex_unlock(&therm_cpu_lock);
256 WARN_ON(err); 257 WARN_ON(err);
257 break; 258 break;
@@ -287,7 +288,7 @@ static __init int thermal_throttle_init_device(void)
287#endif 288#endif
288 /* connect live CPUs to sysfs */ 289 /* connect live CPUs to sysfs */
289 for_each_online_cpu(cpu) { 290 for_each_online_cpu(cpu) {
290 err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); 291 err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu);
291 WARN_ON(err); 292 WARN_ON(err);
292 } 293 }
293#ifdef CONFIG_HOTPLUG_CPU 294#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2da20fda02d..3efdf2870a35 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1154 /* 1154 /*
1155 * event overflow 1155 * event overflow
1156 */ 1156 */
1157 handled = 1; 1157 handled++;
1158 data.period = event->hw.last_period; 1158 data.period = event->hw.last_period;
1159 1159
1160 if (!x86_perf_event_set_period(event)) 1160 if (!x86_perf_event_set_period(event))
@@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void)
1200 apic_write(APIC_LVTPC, APIC_DM_NMI); 1200 apic_write(APIC_LVTPC, APIC_DM_NMI);
1201} 1201}
1202 1202
1203struct pmu_nmi_state {
1204 unsigned int marked;
1205 int handled;
1206};
1207
1208static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
1209
1203static int __kprobes 1210static int __kprobes
1204perf_event_nmi_handler(struct notifier_block *self, 1211perf_event_nmi_handler(struct notifier_block *self,
1205 unsigned long cmd, void *__args) 1212 unsigned long cmd, void *__args)
1206{ 1213{
1207 struct die_args *args = __args; 1214 struct die_args *args = __args;
1208 struct pt_regs *regs; 1215 unsigned int this_nmi;
1216 int handled;
1209 1217
1210 if (!atomic_read(&active_events)) 1218 if (!atomic_read(&active_events))
1211 return NOTIFY_DONE; 1219 return NOTIFY_DONE;
@@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self,
1214 case DIE_NMI: 1222 case DIE_NMI:
1215 case DIE_NMI_IPI: 1223 case DIE_NMI_IPI:
1216 break; 1224 break;
1217 1225 case DIE_NMIUNKNOWN:
1226 this_nmi = percpu_read(irq_stat.__nmi_count);
1227 if (this_nmi != __get_cpu_var(pmu_nmi).marked)
1228 /* let the kernel handle the unknown nmi */
1229 return NOTIFY_DONE;
1230 /*
1231 * This one is a PMU back-to-back nmi. Two events
1232 * trigger 'simultaneously' raising two back-to-back
1233 * NMIs. If the first NMI handles both, the latter
1234 * will be empty and daze the CPU. So, we drop it to
1235 * avoid false-positive 'unknown nmi' messages.
1236 */
1237 return NOTIFY_STOP;
1218 default: 1238 default:
1219 return NOTIFY_DONE; 1239 return NOTIFY_DONE;
1220 } 1240 }
1221 1241
1222 regs = args->regs;
1223
1224 apic_write(APIC_LVTPC, APIC_DM_NMI); 1242 apic_write(APIC_LVTPC, APIC_DM_NMI);
1225 /* 1243
1226 * Can't rely on the handled return value to say it was our NMI, two 1244 handled = x86_pmu.handle_irq(args->regs);
1227 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1245 if (!handled)
1228 * 1246 return NOTIFY_DONE;
1229 * If the first NMI handles both, the latter will be empty and daze 1247
1230 * the CPU. 1248 this_nmi = percpu_read(irq_stat.__nmi_count);
1231 */ 1249 if ((handled > 1) ||
1232 x86_pmu.handle_irq(regs); 1250 /* the next nmi could be a back-to-back nmi */
1251 ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
1252 (__get_cpu_var(pmu_nmi).handled > 1))) {
1253 /*
1254 * We could have two subsequent back-to-back nmis: The
1255 * first handles more than one counter, the 2nd
1256 * handles only one counter and the 3rd handles no
1257 * counter.
1258 *
1259 * This is the 2nd nmi because the previous was
1260 * handling more than one counter. We will mark the
1261 * next (3rd) and then drop it if unhandled.
1262 */
1263 __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
1264 __get_cpu_var(pmu_nmi).handled = handled;
1265 }
1233 1266
1234 return NOTIFY_STOP; 1267 return NOTIFY_STOP;
1235} 1268}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d8d86d014008..ee05c90012d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
712 struct perf_sample_data data; 712 struct perf_sample_data data;
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 ack, status; 715 u64 status;
716 int handled = 0;
716 717
717 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
718 719
@@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
728 729
729 loops = 0; 730 loops = 0;
730again: 731again:
732 intel_pmu_ack_status(status);
731 if (++loops > 100) { 733 if (++loops > 100) {
732 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 734 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
733 perf_event_print_debug(); 735 perf_event_print_debug();
@@ -736,19 +738,22 @@ again:
736 } 738 }
737 739
738 inc_irq_stat(apic_perf_irqs); 740 inc_irq_stat(apic_perf_irqs);
739 ack = status;
740 741
741 intel_pmu_lbr_read(); 742 intel_pmu_lbr_read();
742 743
743 /* 744 /*
744 * PEBS overflow sets bit 62 in the global status register 745 * PEBS overflow sets bit 62 in the global status register
745 */ 746 */
746 if (__test_and_clear_bit(62, (unsigned long *)&status)) 747 if (__test_and_clear_bit(62, (unsigned long *)&status)) {
748 handled++;
747 x86_pmu.drain_pebs(regs); 749 x86_pmu.drain_pebs(regs);
750 }
748 751
749 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 752 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
750 struct perf_event *event = cpuc->events[bit]; 753 struct perf_event *event = cpuc->events[bit];
751 754
755 handled++;
756
752 if (!test_bit(bit, cpuc->active_mask)) 757 if (!test_bit(bit, cpuc->active_mask))
753 continue; 758 continue;
754 759
@@ -761,8 +766,6 @@ again:
761 x86_pmu_stop(event); 766 x86_pmu_stop(event);
762 } 767 }
763 768
764 intel_pmu_ack_status(ack);
765
766 /* 769 /*
767 * Repeat if there is more work to be done: 770 * Repeat if there is more work to be done:
768 */ 771 */
@@ -772,7 +775,7 @@ again:
772 775
773done: 776done:
774 intel_pmu_enable_all(0); 777 intel_pmu_enable_all(0);
775 return 1; 778 return handled;
776} 779}
777 780
778static struct event_constraint * 781static struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index febb12cea795..b560db3305be 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -497,6 +497,8 @@ static int p4_hw_config(struct perf_event *event)
497 event->hw.config |= event->attr.config & 497 event->hw.config |= event->attr.config &
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) | 498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); 499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
500 } 502 }
501 503
502 rc = x86_setup_perfctr(event); 504 rc = x86_setup_perfctr(event);
@@ -690,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
690 inc_irq_stat(apic_perf_irqs); 692 inc_irq_stat(apic_perf_irqs);
691 } 693 }
692 694
693 return handled > 0; 695 return handled;
694} 696}
695 697
696/* 698/*
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f11f5ce668f..a46cb3522c0c 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -40,6 +40,7 @@
40 40
41static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 41static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
42unsigned int xstate_size; 42unsigned int xstate_size;
43EXPORT_SYMBOL_GPL(xstate_size);
43unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); 44unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
44static struct i387_fxsave_struct fx_scratch __cpuinitdata; 45static struct i387_fxsave_struct fx_scratch __cpuinitdata;
45 46
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a874495b3673..e2a595257390 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -45,8 +45,7 @@ void __init setup_trampoline_page_table(void)
45 /* Copy kernel address range */ 45 /* Copy kernel address range */
46 clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, 46 clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
47 swapper_pg_dir + KERNEL_PGD_BOUNDARY, 47 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
48 min_t(unsigned long, KERNEL_PGD_PTRS, 48 KERNEL_PGD_PTRS);
49 KERNEL_PGD_BOUNDARY));
50 49
51 /* Initialize low mappings */ 50 /* Initialize low mappings */
52 clone_pgd_range(trampoline_pg_dir, 51 clone_pgd_range(trampoline_pg_dir,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ce8e50239332..d632934cb638 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
626 local_irq_restore(flags); 626 local_irq_restore(flags);
627} 627}
628 628
629static unsigned long long cyc2ns_suspend;
630
631void save_sched_clock_state(void)
632{
633 if (!sched_clock_stable)
634 return;
635
636 cyc2ns_suspend = sched_clock();
637}
638
639/*
640 * Even on processors with invariant TSC, TSC gets reset in some the
641 * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
642 * arbitrary value (still sync'd across cpu's) during resume from such sleep
643 * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
644 * that sched_clock() continues from the point where it was left off during
645 * suspend.
646 */
647void restore_sched_clock_state(void)
648{
649 unsigned long long offset;
650 unsigned long flags;
651 int cpu;
652
653 if (!sched_clock_stable)
654 return;
655
656 local_irq_save(flags);
657
658 get_cpu_var(cyc2ns_offset) = 0;
659 offset = cyc2ns_suspend - sched_clock();
660
661 for_each_possible_cpu(cpu)
662 per_cpu(cyc2ns_offset, cpu) = offset;
663
664 local_irq_restore(flags);
665}
666
629#ifdef CONFIG_CPU_FREQ 667#ifdef CONFIG_CPU_FREQ
630 668
631/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 669/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency