diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-07-26 23:42:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-28 22:54:33 -0400 |
commit | 6c8dca5d53f95009d4fff00195bf38f277dc4366 (patch) | |
tree | 60cc83cf949d6e598e6dc80dc668aebd42c65540 | |
parent | a8a11f06973fa63ad692a8f97694cb5eeb70b3f3 (diff) |
Provide timespec to guests rather than jiffies clock.
A non-periodic clock_event_device and the "jiffies" clock don't mix well:
tick_handle_periodic() can go into an infinite loop.
Currently lguest guests use the jiffies clock when the TSC is
unusable. Instead, make the Host write the current time into the lguest
page on every interrupt. This doesn't cost much but is more precise
and at least as accurate as the jiffies clock. It also gets rid of
the GET_WALLCLOCK hypercall.
Also, delay setting sched_clock until our clock is set up, otherwise
the early printk timestamps can go backwards (not harmful, just ugly).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/lguest/hypercalls.c | 21 | ||||
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 7 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 1 | ||||
-rw-r--r-- | drivers/lguest/lguest.c | 52 | ||||
-rw-r--r-- | include/linux/lguest.h | 4 |
5 files changed, 60 insertions, 25 deletions
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 7a5299f9679d..db6caace3b9c 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -64,14 +64,6 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs) | |||
64 | else | 64 | else |
65 | guest_pagetable_flush_user(lg); | 65 | guest_pagetable_flush_user(lg); |
66 | break; | 66 | break; |
67 | case LHCALL_GET_WALLCLOCK: { | ||
68 | /* The Guest wants to know the real time in seconds since 1970, | ||
69 | * in good Unix tradition. */ | ||
70 | struct timespec ts; | ||
71 | ktime_get_real_ts(&ts); | ||
72 | regs->eax = ts.tv_sec; | ||
73 | break; | ||
74 | } | ||
75 | case LHCALL_BIND_DMA: | 67 | case LHCALL_BIND_DMA: |
76 | /* BIND_DMA really wants four arguments, but it's the only call | 68 | /* BIND_DMA really wants four arguments, but it's the only call |
77 | * which does. So the Guest packs the number of buffers and | 69 | * which does. So the Guest packs the number of buffers and |
@@ -235,6 +227,9 @@ static void initialize(struct lguest *lg) | |||
235 | || put_user(lg->guestid, &lg->lguest_data->guestid)) | 227 | || put_user(lg->guestid, &lg->lguest_data->guestid)) |
236 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | 228 | kill_guest(lg, "bad guest page %p", lg->lguest_data); |
237 | 229 | ||
230 | /* We write the current time into the Guest's data page once now. */ | ||
231 | write_timestamp(lg); | ||
232 | |||
238 | /* This is the one case where the above accesses might have been the | 233 | /* This is the one case where the above accesses might have been the |
239 | * first write to a Guest page. This may have caused a copy-on-write | 234 | * first write to a Guest page. This may have caused a copy-on-write |
240 | * fault, but the Guest might be referring to the old (read-only) | 235 | * fault, but the Guest might be referring to the old (read-only) |
@@ -293,3 +288,13 @@ void do_hypercalls(struct lguest *lg) | |||
293 | clear_hcall(lg); | 288 | clear_hcall(lg); |
294 | } | 289 | } |
295 | } | 290 | } |
291 | |||
292 | /* This routine supplies the Guest with time: it's used for wallclock time at | ||
293 | * initial boot and as a rough time source if the TSC isn't available. */ | ||
294 | void write_timestamp(struct lguest *lg) | ||
295 | { | ||
296 | struct timespec now; | ||
297 | ktime_get_real_ts(&now); | ||
298 | if (put_user(now, &lg->lguest_data->time)) | ||
299 | kill_guest(lg, "Writing timestamp"); | ||
300 | } | ||
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index bd0091bf79ec..49787e964a0d 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -175,6 +175,13 @@ void maybe_do_interrupt(struct lguest *lg) | |||
175 | * the stack as well: virtual interrupts never do. */ | 175 | * the stack as well: virtual interrupts never do. */ |
176 | set_guest_interrupt(lg, idt->a, idt->b, 0); | 176 | set_guest_interrupt(lg, idt->a, idt->b, 0); |
177 | } | 177 | } |
178 | |||
179 | /* Every time we deliver an interrupt, we update the timestamp in the | ||
180 | * Guest's lguest_data struct. It would be better for the Guest if we | ||
181 | * did this more often, but it can actually be quite slow: doing it | ||
182 | * here is a compromise which means at least it gets updated every | ||
183 | * timer interrupt. */ | ||
184 | write_timestamp(lg); | ||
178 | } | 185 | } |
179 | 186 | ||
180 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps | 187 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 269116eee85f..64f0abed317c 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -256,6 +256,7 @@ unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, | |||
256 | 256 | ||
257 | /* hypercalls.c: */ | 257 | /* hypercalls.c: */ |
258 | void do_hypercalls(struct lguest *lg); | 258 | void do_hypercalls(struct lguest *lg); |
259 | void write_timestamp(struct lguest *lg); | ||
259 | 260 | ||
260 | /*L:035 | 261 | /*L:035 |
261 | * Let's step aside for the moment, to study one important routine that's used | 262 | * Let's step aside for the moment, to study one important routine that's used |
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 3386b0e76900..1bc1546c7fd0 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -643,21 +643,42 @@ static void __init lguest_init_IRQ(void) | |||
643 | * Time. | 643 | * Time. |
644 | * | 644 | * |
645 | * It would be far better for everyone if the Guest had its own clock, but | 645 | * It would be far better for everyone if the Guest had its own clock, but |
646 | * until then it must ask the Host for the time. | 646 | * until then the Host gives us the time on every interrupt. |
647 | */ | 647 | */ |
648 | static unsigned long lguest_get_wallclock(void) | 648 | static unsigned long lguest_get_wallclock(void) |
649 | { | 649 | { |
650 | return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); | 650 | return lguest_data.time.tv_sec; |
651 | } | 651 | } |
652 | 652 | ||
653 | /* If the Host tells us we can trust the TSC, we use that, otherwise we simply | ||
654 | * use the imprecise but reliable "jiffies" counter. */ | ||
655 | static cycle_t lguest_clock_read(void) | 653 | static cycle_t lguest_clock_read(void) |
656 | { | 654 | { |
655 | unsigned long sec, nsec; | ||
656 | |||
657 | /* If the Host tells the TSC speed, we can trust that. */ | ||
657 | if (lguest_data.tsc_khz) | 658 | if (lguest_data.tsc_khz) |
658 | return native_read_tsc(); | 659 | return native_read_tsc(); |
659 | else | 660 | |
660 | return jiffies; | 661 | /* If we can't use the TSC, we read the time value written by the Host. |
662 | * Since it's in two parts (seconds and nanoseconds), we risk reading | ||
663 | * it just as it's changing from 99 & 0.999999999 to 100 and 0, and | ||
664 | * getting 99 and 0. As Linux tends to come apart under the stress of | ||
665 | * time travel, we must be careful: */ | ||
666 | do { | ||
667 | /* First we read the seconds part. */ | ||
668 | sec = lguest_data.time.tv_sec; | ||
669 | /* This read memory barrier tells the compiler and the CPU that | ||
670 | * this can't be reordered: we have to complete the above | ||
671 | * before going on. */ | ||
672 | rmb(); | ||
673 | /* Now we read the nanoseconds part. */ | ||
674 | nsec = lguest_data.time.tv_nsec; | ||
675 | /* Make sure we've done that. */ | ||
676 | rmb(); | ||
677 | /* Now if the seconds part has changed, try again. */ | ||
678 | } while (unlikely(lguest_data.time.tv_sec != sec)); | ||
679 | |||
680 | /* Our non-TSC clock is in real nanoseconds. */ | ||
681 | return sec*1000000000ULL + nsec; | ||
661 | } | 682 | } |
662 | 683 | ||
663 | /* This is what we tell the kernel is our clocksource. */ | 684 | /* This is what we tell the kernel is our clocksource. */ |
@@ -665,8 +686,11 @@ static struct clocksource lguest_clock = { | |||
665 | .name = "lguest", | 686 | .name = "lguest", |
666 | .rating = 400, | 687 | .rating = 400, |
667 | .read = lguest_clock_read, | 688 | .read = lguest_clock_read, |
689 | .mask = CLOCKSOURCE_MASK(64), | ||
690 | .mult = 1, | ||
668 | }; | 691 | }; |
669 | 692 | ||
693 | /* The "scheduler clock" is just our real clock, adjusted to start at zero */ | ||
670 | static unsigned long long lguest_sched_clock(void) | 694 | static unsigned long long lguest_sched_clock(void) |
671 | { | 695 | { |
672 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); | 696 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); |
@@ -742,24 +766,21 @@ static void lguest_time_init(void) | |||
742 | set_irq_handler(0, lguest_time_irq); | 766 | set_irq_handler(0, lguest_time_irq); |
743 | 767 | ||
744 | /* Our clock structure look like arch/i386/kernel/tsc.c if we can use | 768 | /* Our clock structure look like arch/i386/kernel/tsc.c if we can use |
745 | * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way, | 769 | * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either |
746 | * the "rating" is initialized so high that it's always chosen over any | 770 | * way, the "rating" is initialized so high that it's always chosen |
747 | * other clocksource. */ | 771 | * over any other clocksource. */ |
748 | if (lguest_data.tsc_khz) { | 772 | if (lguest_data.tsc_khz) { |
749 | lguest_clock.shift = 22; | 773 | lguest_clock.shift = 22; |
750 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | 774 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, |
751 | lguest_clock.shift); | 775 | lguest_clock.shift); |
752 | lguest_clock.mask = CLOCKSOURCE_MASK(64); | ||
753 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; | 776 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; |
754 | } else { | ||
755 | /* To understand this, start at kernel/time/jiffies.c... */ | ||
756 | lguest_clock.shift = 8; | ||
757 | lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; | ||
758 | lguest_clock.mask = CLOCKSOURCE_MASK(32); | ||
759 | } | 777 | } |
760 | clock_base = lguest_clock_read(); | 778 | clock_base = lguest_clock_read(); |
761 | clocksource_register(&lguest_clock); | 779 | clocksource_register(&lguest_clock); |
762 | 780 | ||
781 | /* Now we've set up our clock, we can use it as the scheduler clock */ | ||
782 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
783 | |||
763 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 784 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
764 | * here and register our timer device. */ | 785 | * here and register our timer device. */ |
765 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 786 | lguest_clockevent.cpumask = cpumask_of_cpu(0); |
@@ -996,7 +1017,6 @@ __init void lguest_init(void *boot) | |||
996 | paravirt_ops.time_init = lguest_time_init; | 1017 | paravirt_ops.time_init = lguest_time_init; |
997 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | 1018 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; |
998 | paravirt_ops.wbinvd = lguest_wbinvd; | 1019 | paravirt_ops.wbinvd = lguest_wbinvd; |
999 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
1000 | /* Now is a good time to look at the implementations of these functions | 1020 | /* Now is a good time to look at the implementations of these functions |
1001 | * before returning to the rest of lguest_init(). */ | 1021 | * before returning to the rest of lguest_init(). */ |
1002 | 1022 | ||
diff --git a/include/linux/lguest.h b/include/linux/lguest.h index e76c151c7129..157ad64aa7ce 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h | |||
@@ -17,7 +17,6 @@ | |||
17 | #define LHCALL_TS 8 | 17 | #define LHCALL_TS 8 |
18 | #define LHCALL_SET_CLOCKEVENT 9 | 18 | #define LHCALL_SET_CLOCKEVENT 9 |
19 | #define LHCALL_HALT 10 | 19 | #define LHCALL_HALT 10 |
20 | #define LHCALL_GET_WALLCLOCK 11 | ||
21 | #define LHCALL_BIND_DMA 12 | 20 | #define LHCALL_BIND_DMA 12 |
22 | #define LHCALL_SEND_DMA 13 | 21 | #define LHCALL_SEND_DMA 13 |
23 | #define LHCALL_SET_PTE 14 | 22 | #define LHCALL_SET_PTE 14 |
@@ -88,6 +87,9 @@ struct lguest_data | |||
88 | * this address would normally be found. */ | 87 | * this address would normally be found. */ |
89 | unsigned long cr2; | 88 | unsigned long cr2; |
90 | 89 | ||
90 | /* Wallclock time set by the Host. */ | ||
91 | struct timespec time; | ||
92 | |||
91 | /* Async hypercall ring. Instead of directly making hypercalls, we can | 93 | /* Async hypercall ring. Instead of directly making hypercalls, we can |
92 | * place them in here for processing the next time the Host wants. | 94 | * place them in here for processing the next time the Host wants. |
93 | * This batching can be quite efficient. */ | 95 | * This batching can be quite efficient. */ |