diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-07-26 23:42:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-28 22:54:33 -0400 |
commit | 6c8dca5d53f95009d4fff00195bf38f277dc4366 (patch) | |
tree | 60cc83cf949d6e598e6dc80dc668aebd42c65540 /drivers/lguest/lguest.c | |
parent | a8a11f06973fa63ad692a8f97694cb5eeb70b3f3 (diff) |
Provide timespec to guests rather than jiffies clock.
A non-periodic clock_event_device and the "jiffies" clock don't mix well:
tick_handle_periodic() can go into an infinite loop.
Currently lguest guests use the jiffies clock when the TSC is
unusable. Instead, make the Host write the current time into the lguest
page on every interrupt. This doesn't cost much but is more precise
and at least as accurate as the jiffies clock. It also gets rid of
the GET_WALLCLOCK hypercall.
Also, delay setting sched_clock until our clock is set up, otherwise
the early printk timestamps can go backwards (not harmful, just ugly).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/lguest/lguest.c')
-rw-r--r-- | drivers/lguest/lguest.c | 52 |
1 files changed, 36 insertions, 16 deletions
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 3386b0e76900..1bc1546c7fd0 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -643,21 +643,42 @@ static void __init lguest_init_IRQ(void) | |||
643 | * Time. | 643 | * Time. |
644 | * | 644 | * |
645 | * It would be far better for everyone if the Guest had its own clock, but | 645 | * It would be far better for everyone if the Guest had its own clock, but |
646 | * until then it must ask the Host for the time. | 646 | * until then the Host gives us the time on every interrupt. |
647 | */ | 647 | */ |
648 | static unsigned long lguest_get_wallclock(void) | 648 | static unsigned long lguest_get_wallclock(void) |
649 | { | 649 | { |
650 | return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); | 650 | return lguest_data.time.tv_sec; |
651 | } | 651 | } |
652 | 652 | ||
653 | /* If the Host tells us we can trust the TSC, we use that, otherwise we simply | ||
654 | * use the imprecise but reliable "jiffies" counter. */ | ||
655 | static cycle_t lguest_clock_read(void) | 653 | static cycle_t lguest_clock_read(void) |
656 | { | 654 | { |
655 | unsigned long sec, nsec; | ||
656 | |||
657 | /* If the Host tells the TSC speed, we can trust that. */ | ||
657 | if (lguest_data.tsc_khz) | 658 | if (lguest_data.tsc_khz) |
658 | return native_read_tsc(); | 659 | return native_read_tsc(); |
659 | else | 660 | |
660 | return jiffies; | 661 | /* If we can't use the TSC, we read the time value written by the Host. |
662 | * Since it's in two parts (seconds and nanoseconds), we risk reading | ||
663 | * it just as it's changing from 99 & 0.999999999 to 100 and 0, and | ||
664 | * getting 99 and 0. As Linux tends to come apart under the stress of | ||
665 | * time travel, we must be careful: */ | ||
666 | do { | ||
667 | /* First we read the seconds part. */ | ||
668 | sec = lguest_data.time.tv_sec; | ||
669 | /* This read memory barrier tells the compiler and the CPU that | ||
670 | * this can't be reordered: we have to complete the above | ||
671 | * before going on. */ | ||
672 | rmb(); | ||
673 | /* Now we read the nanoseconds part. */ | ||
674 | nsec = lguest_data.time.tv_nsec; | ||
675 | /* Make sure we've done that. */ | ||
676 | rmb(); | ||
677 | /* Now if the seconds part has changed, try again. */ | ||
678 | } while (unlikely(lguest_data.time.tv_sec != sec)); | ||
679 | |||
680 | /* Our non-TSC clock is in real nanoseconds. */ | ||
681 | return sec*1000000000ULL + nsec; | ||
661 | } | 682 | } |
662 | 683 | ||
663 | /* This is what we tell the kernel is our clocksource. */ | 684 | /* This is what we tell the kernel is our clocksource. */ |
@@ -665,8 +686,11 @@ static struct clocksource lguest_clock = { | |||
665 | .name = "lguest", | 686 | .name = "lguest", |
666 | .rating = 400, | 687 | .rating = 400, |
667 | .read = lguest_clock_read, | 688 | .read = lguest_clock_read, |
689 | .mask = CLOCKSOURCE_MASK(64), | ||
690 | .mult = 1, | ||
668 | }; | 691 | }; |
669 | 692 | ||
693 | /* The "scheduler clock" is just our real clock, adjusted to start at zero */ | ||
670 | static unsigned long long lguest_sched_clock(void) | 694 | static unsigned long long lguest_sched_clock(void) |
671 | { | 695 | { |
672 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); | 696 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); |
@@ -742,24 +766,21 @@ static void lguest_time_init(void) | |||
742 | set_irq_handler(0, lguest_time_irq); | 766 | set_irq_handler(0, lguest_time_irq); |
743 | 767 | ||
744 | /* Our clock structure look like arch/i386/kernel/tsc.c if we can use | 768 | /* Our clock structure look like arch/i386/kernel/tsc.c if we can use |
745 | * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way, | 769 | * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either |
746 | * the "rating" is initialized so high that it's always chosen over any | 770 | * way, the "rating" is initialized so high that it's always chosen |
747 | * other clocksource. */ | 771 | * over any other clocksource. */ |
748 | if (lguest_data.tsc_khz) { | 772 | if (lguest_data.tsc_khz) { |
749 | lguest_clock.shift = 22; | 773 | lguest_clock.shift = 22; |
750 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | 774 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, |
751 | lguest_clock.shift); | 775 | lguest_clock.shift); |
752 | lguest_clock.mask = CLOCKSOURCE_MASK(64); | ||
753 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; | 776 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; |
754 | } else { | ||
755 | /* To understand this, start at kernel/time/jiffies.c... */ | ||
756 | lguest_clock.shift = 8; | ||
757 | lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; | ||
758 | lguest_clock.mask = CLOCKSOURCE_MASK(32); | ||
759 | } | 777 | } |
760 | clock_base = lguest_clock_read(); | 778 | clock_base = lguest_clock_read(); |
761 | clocksource_register(&lguest_clock); | 779 | clocksource_register(&lguest_clock); |
762 | 780 | ||
781 | /* Now we've set up our clock, we can use it as the scheduler clock */ | ||
782 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
783 | |||
763 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 784 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
764 | * here and register our timer device. */ | 785 | * here and register our timer device. */ |
765 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 786 | lguest_clockevent.cpumask = cpumask_of_cpu(0); |
@@ -996,7 +1017,6 @@ __init void lguest_init(void *boot) | |||
996 | paravirt_ops.time_init = lguest_time_init; | 1017 | paravirt_ops.time_init = lguest_time_init; |
997 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | 1018 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; |
998 | paravirt_ops.wbinvd = lguest_wbinvd; | 1019 | paravirt_ops.wbinvd = lguest_wbinvd; |
999 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
1000 | /* Now is a good time to look at the implementations of these functions | 1020 | /* Now is a good time to look at the implementations of these functions |
1001 | * before returning to the rest of lguest_init(). */ | 1021 | * before returning to the rest of lguest_init(). */ |
1002 | 1022 | ||