aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-03-05 03:30:34 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-03-05 10:57:52 -0500
commit7507ba34e827ca3c6bbcd34d20a8df8ba365fca6 (patch)
treeafa186f38ab55667074a06ade7ad6a1d1ec84d9e
parent0dc952dc3e6d96d554a19fa7bee3f3b1d55e3cff (diff)
[PATCH] vmi: timer fixes round two
Critical bugfixes for the VMI-Timer code. 1) Do not setup a one shot alarm if we are keeping the periodic alarm armed. Additionally, since the periodic alarm can be run at a lower rate than HZ, let's fixup the guard to the no-idle-hz mode appropriately. This fixes the bug where the no-idle-hz mode might have a higher interrupt rate than the non-idle case. 2) The interrupt handler can no longer adjust xtime due to nested lock acquisition. Drop this. We don't need to check for wallclock time at every tick, it can be done in userspace instead. 3) Add a bypass to disable noidle operation. This is useful as a last minute workaround, or testing measure. 4) The code to skip the IO_APIC timer testing (no_timer_check) should be conditional on IO_APIC, not SMP, since UP kernels can have this configured in as well. Signed-off-by: Dan Hecht <dhecht@vmware.com> Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/i386/kernel/vmi.c23
-rw-r--r--arch/i386/kernel/vmitime.c30
2 files changed, 24 insertions, 29 deletions
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index bb5a7abf949c..8417f741fac8 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -54,6 +54,7 @@ static int disable_pse;
54static int disable_sep; 54static int disable_sep;
55static int disable_tsc; 55static int disable_tsc;
56static int disable_mtrr; 56static int disable_mtrr;
57static int disable_noidle;
57 58
58/* Cached VMI operations */ 59/* Cached VMI operations */
59struct { 60struct {
@@ -255,7 +256,6 @@ static void vmi_nop(void)
255} 256}
256 257
257/* For NO_IDLE_HZ, we stop the clock when halting the kernel */ 258/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
258#ifdef CONFIG_NO_IDLE_HZ
259static fastcall void vmi_safe_halt(void) 259static fastcall void vmi_safe_halt(void)
260{ 260{
261 int idle = vmi_stop_hz_timer(); 261 int idle = vmi_stop_hz_timer();
@@ -266,7 +266,6 @@ static fastcall void vmi_safe_halt(void)
266 local_irq_enable(); 266 local_irq_enable();
267 } 267 }
268} 268}
269#endif
270 269
271#ifdef CONFIG_DEBUG_PAGE_TYPE 270#ifdef CONFIG_DEBUG_PAGE_TYPE
272 271
@@ -742,12 +741,7 @@ static inline int __init activate_vmi(void)
742 (char *)paravirt_ops.save_fl); 741 (char *)paravirt_ops.save_fl);
743 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], 742 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
744 (char *)paravirt_ops.irq_disable); 743 (char *)paravirt_ops.irq_disable);
745#ifndef CONFIG_NO_IDLE_HZ 744
746 para_fill(safe_halt, Halt);
747#else
748 vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
749 paravirt_ops.safe_halt = vmi_safe_halt;
750#endif
751 para_fill(wbinvd, WBINVD); 745 para_fill(wbinvd, WBINVD);
752 /* paravirt_ops.read_msr = vmi_rdmsr */ 746 /* paravirt_ops.read_msr = vmi_rdmsr */
753 /* paravirt_ops.write_msr = vmi_wrmsr */ 747 /* paravirt_ops.write_msr = vmi_wrmsr */
@@ -881,6 +875,12 @@ static inline int __init activate_vmi(void)
881#endif 875#endif
882 custom_sched_clock = vmi_sched_clock; 876 custom_sched_clock = vmi_sched_clock;
883 } 877 }
878 if (!disable_noidle)
879 para_fill(safe_halt, Halt);
880 else {
881 vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
882 paravirt_ops.safe_halt = vmi_safe_halt;
883 }
884 884
885 /* 885 /*
886 * Alternative instruction rewriting doesn't happen soon enough 886 * Alternative instruction rewriting doesn't happen soon enough
@@ -914,9 +914,11 @@ void __init vmi_init(void)
914 914
915 local_irq_save(flags); 915 local_irq_save(flags);
916 activate_vmi(); 916 activate_vmi();
917#ifdef CONFIG_SMP 917
918#ifdef CONFIG_X86_IO_APIC
918 no_timer_check = 1; 919 no_timer_check = 1;
919#endif 920#endif
921
920 local_irq_restore(flags & X86_EFLAGS_IF); 922 local_irq_restore(flags & X86_EFLAGS_IF);
921} 923}
922 924
@@ -942,7 +944,8 @@ static int __init parse_vmi(char *arg)
942 } else if (!strcmp(arg, "disable_mtrr")) { 944 } else if (!strcmp(arg, "disable_mtrr")) {
943 clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); 945 clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
944 disable_mtrr = 1; 946 disable_mtrr = 1;
945 } 947 } else if (!strcmp(arg, "disable_noidle"))
948 disable_noidle = 1;
946 return 0; 949 return 0;
947} 950}
948 951
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 76d2adcae5a3..694aa85d22c2 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c
@@ -276,16 +276,13 @@ static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
276 276
277 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; 277 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
278 while (cycles_not_accounted >= cycles_per_jiffy) { 278 while (cycles_not_accounted >= cycles_per_jiffy) {
279 /* systems wide jiffies and wallclock. */ 279 /* systems wide jiffies. */
280 do_timer(1); 280 do_timer(1);
281 281
282 cycles_not_accounted -= cycles_per_jiffy; 282 cycles_not_accounted -= cycles_per_jiffy;
283 real_cycles_accounted_system += cycles_per_jiffy; 283 real_cycles_accounted_system += cycles_per_jiffy;
284 } 284 }
285 285
286 if (vmi_timer_ops.wallclock_updated())
287 update_xtime_from_wallclock();
288
289 write_sequnlock(&xtime_lock); 286 write_sequnlock(&xtime_lock);
290} 287}
291 288
@@ -380,7 +377,6 @@ int vmi_stop_hz_timer(void)
380 unsigned long seq, next; 377 unsigned long seq, next;
381 unsigned long long real_cycles_expiry; 378 unsigned long long real_cycles_expiry;
382 int cpu = smp_processor_id(); 379 int cpu = smp_processor_id();
383 int idle;
384 380
385 BUG_ON(!irqs_disabled()); 381 BUG_ON(!irqs_disabled());
386 if (sysctl_hz_timer != 0) 382 if (sysctl_hz_timer != 0)
@@ -388,13 +384,13 @@ int vmi_stop_hz_timer(void)
388 384
389 cpu_set(cpu, nohz_cpu_mask); 385 cpu_set(cpu, nohz_cpu_mask);
390 smp_mb(); 386 smp_mb();
387
391 if (rcu_needs_cpu(cpu) || local_softirq_pending() || 388 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
392 (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { 389 (next = next_timer_interrupt(),
390 time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
393 cpu_clear(cpu, nohz_cpu_mask); 391 cpu_clear(cpu, nohz_cpu_mask);
394 next = jiffies; 392 return 0;
395 idle = 0; 393 }
396 } else
397 idle = 1;
398 394
399 /* Convert jiffies to the real cycle counter. */ 395 /* Convert jiffies to the real cycle counter. */
400 do { 396 do {
@@ -404,17 +400,13 @@ int vmi_stop_hz_timer(void)
404 } while (read_seqretry(&xtime_lock, seq)); 400 } while (read_seqretry(&xtime_lock, seq));
405 401
406 /* This cpu is going idle. Disable the periodic alarm. */ 402 /* This cpu is going idle. Disable the periodic alarm. */
407 if (idle) { 403 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
408 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); 404 per_cpu(idle_start_jiffies, cpu) = jiffies;
409 per_cpu(idle_start_jiffies, cpu) = jiffies;
410 }
411
412 /* Set the real time alarm to expire at the next event. */ 405 /* Set the real time alarm to expire at the next event. */
413 vmi_timer_ops.set_alarm( 406 vmi_timer_ops.set_alarm(
414 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, 407 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
415 real_cycles_expiry, 0); 408 real_cycles_expiry, 0);
416 409 return 1;
417 return idle;
418} 410}
419 411
420static void vmi_reenable_hz_timer(int cpu) 412static void vmi_reenable_hz_timer(int cpu)