aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2007-07-26 23:42:52 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-28 22:54:33 -0400
commit6c8dca5d53f95009d4fff00195bf38f277dc4366 (patch)
tree60cc83cf949d6e598e6dc80dc668aebd42c65540
parenta8a11f06973fa63ad692a8f97694cb5eeb70b3f3 (diff)
Provide timespec to guests rather than jiffies clock.
A non-periodic clock_event_device and the "jiffies" clock don't mix well: tick_handle_periodic() can go into an infinite loop. Currently lguest guests use the jiffies clock when the TSC is unusable. Instead, make the Host write the current time into the lguest page on every interrupt. This doesn't cost much but is more precise and at least as accurate as the jiffies clock. It also gets rid of the GET_WALLCLOCK hypercall. Also, delay setting sched_clock until our clock is set up, otherwise the early printk timestamps can go backwards (not harmful, just ugly). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/lguest/hypercalls.c21
-rw-r--r--drivers/lguest/interrupts_and_traps.c7
-rw-r--r--drivers/lguest/lg.h1
-rw-r--r--drivers/lguest/lguest.c52
-rw-r--r--include/linux/lguest.h4
5 files changed, 60 insertions, 25 deletions
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 7a5299f9679d..db6caace3b9c 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -64,14 +64,6 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
64 else 64 else
65 guest_pagetable_flush_user(lg); 65 guest_pagetable_flush_user(lg);
66 break; 66 break;
67 case LHCALL_GET_WALLCLOCK: {
68 /* The Guest wants to know the real time in seconds since 1970,
69 * in good Unix tradition. */
70 struct timespec ts;
71 ktime_get_real_ts(&ts);
72 regs->eax = ts.tv_sec;
73 break;
74 }
75 case LHCALL_BIND_DMA: 67 case LHCALL_BIND_DMA:
76 /* BIND_DMA really wants four arguments, but it's the only call 68 /* BIND_DMA really wants four arguments, but it's the only call
77 * which does. So the Guest packs the number of buffers and 69 * which does. So the Guest packs the number of buffers and
@@ -235,6 +227,9 @@ static void initialize(struct lguest *lg)
235 || put_user(lg->guestid, &lg->lguest_data->guestid)) 227 || put_user(lg->guestid, &lg->lguest_data->guestid))
236 kill_guest(lg, "bad guest page %p", lg->lguest_data); 228 kill_guest(lg, "bad guest page %p", lg->lguest_data);
237 229
230 /* We write the current time into the Guest's data page once now. */
231 write_timestamp(lg);
232
238 /* This is the one case where the above accesses might have been the 233 /* This is the one case where the above accesses might have been the
239 * first write to a Guest page. This may have caused a copy-on-write 234 * first write to a Guest page. This may have caused a copy-on-write
240 * fault, but the Guest might be referring to the old (read-only) 235 * fault, but the Guest might be referring to the old (read-only)
@@ -293,3 +288,13 @@ void do_hypercalls(struct lguest *lg)
293 clear_hcall(lg); 288 clear_hcall(lg);
294 } 289 }
295} 290}
291
292/* This routine supplies the Guest with time: it's used for wallclock time at
293 * initial boot and as a rough time source if the TSC isn't available. */
294void write_timestamp(struct lguest *lg)
295{
296 struct timespec now;
297 ktime_get_real_ts(&now);
298 if (put_user(now, &lg->lguest_data->time))
299 kill_guest(lg, "Writing timestamp");
300}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index bd0091bf79ec..49787e964a0d 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -175,6 +175,13 @@ void maybe_do_interrupt(struct lguest *lg)
175 * the stack as well: virtual interrupts never do. */ 175 * the stack as well: virtual interrupts never do. */
176 set_guest_interrupt(lg, idt->a, idt->b, 0); 176 set_guest_interrupt(lg, idt->a, idt->b, 0);
177 } 177 }
178
179 /* Every time we deliver an interrupt, we update the timestamp in the
180 * Guest's lguest_data struct. It would be better for the Guest if we
181 * did this more often, but it can actually be quite slow: doing it
182 * here is a compromise which means at least it gets updated every
183 * timer interrupt. */
184 write_timestamp(lg);
178} 185}
179 186
180/*H:220 Now we've got the routines to deliver interrupts, delivering traps 187/*H:220 Now we've got the routines to deliver interrupts, delivering traps
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 269116eee85f..64f0abed317c 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -256,6 +256,7 @@ unsigned long get_dma_buffer(struct lguest *lg, unsigned long key,
256 256
257/* hypercalls.c: */ 257/* hypercalls.c: */
258void do_hypercalls(struct lguest *lg); 258void do_hypercalls(struct lguest *lg);
259void write_timestamp(struct lguest *lg);
259 260
260/*L:035 261/*L:035
261 * Let's step aside for the moment, to study one important routine that's used 262 * Let's step aside for the moment, to study one important routine that's used
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 3386b0e76900..1bc1546c7fd0 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -643,21 +643,42 @@ static void __init lguest_init_IRQ(void)
643 * Time. 643 * Time.
644 * 644 *
645 * It would be far better for everyone if the Guest had its own clock, but 645 * It would be far better for everyone if the Guest had its own clock, but
646 * until then it must ask the Host for the time. 646 * until then the Host gives us the time on every interrupt.
647 */ 647 */
648static unsigned long lguest_get_wallclock(void) 648static unsigned long lguest_get_wallclock(void)
649{ 649{
650 return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); 650 return lguest_data.time.tv_sec;
651} 651}
652 652
653/* If the Host tells us we can trust the TSC, we use that, otherwise we simply
654 * use the imprecise but reliable "jiffies" counter. */
655static cycle_t lguest_clock_read(void) 653static cycle_t lguest_clock_read(void)
656{ 654{
655 unsigned long sec, nsec;
656
657 /* If the Host tells the TSC speed, we can trust that. */
657 if (lguest_data.tsc_khz) 658 if (lguest_data.tsc_khz)
658 return native_read_tsc(); 659 return native_read_tsc();
659 else 660
660 return jiffies; 661 /* If we can't use the TSC, we read the time value written by the Host.
662 * Since it's in two parts (seconds and nanoseconds), we risk reading
663 * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
664 * getting 99 and 0. As Linux tends to come apart under the stress of
665 * time travel, we must be careful: */
666 do {
667 /* First we read the seconds part. */
668 sec = lguest_data.time.tv_sec;
669 /* This read memory barrier tells the compiler and the CPU that
670 * this can't be reordered: we have to complete the above
671 * before going on. */
672 rmb();
673 /* Now we read the nanoseconds part. */
674 nsec = lguest_data.time.tv_nsec;
675 /* Make sure we've done that. */
676 rmb();
677 /* Now if the seconds part has changed, try again. */
678 } while (unlikely(lguest_data.time.tv_sec != sec));
679
680 /* Our non-TSC clock is in real nanoseconds. */
681 return sec*1000000000ULL + nsec;
661} 682}
662 683
663/* This is what we tell the kernel is our clocksource. */ 684/* This is what we tell the kernel is our clocksource. */
@@ -665,8 +686,11 @@ static struct clocksource lguest_clock = {
665 .name = "lguest", 686 .name = "lguest",
666 .rating = 400, 687 .rating = 400,
667 .read = lguest_clock_read, 688 .read = lguest_clock_read,
689 .mask = CLOCKSOURCE_MASK(64),
690 .mult = 1,
668}; 691};
669 692
693/* The "scheduler clock" is just our real clock, adjusted to start at zero */
670static unsigned long long lguest_sched_clock(void) 694static unsigned long long lguest_sched_clock(void)
671{ 695{
672 return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); 696 return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
@@ -742,24 +766,21 @@ static void lguest_time_init(void)
742 set_irq_handler(0, lguest_time_irq); 766 set_irq_handler(0, lguest_time_irq);
743 767
744 /* Our clock structure look like arch/i386/kernel/tsc.c if we can use 768 /* Our clock structure look like arch/i386/kernel/tsc.c if we can use
745 * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way, 769 * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either
746 * the "rating" is initialized so high that it's always chosen over any 770 * way, the "rating" is initialized so high that it's always chosen
747 * other clocksource. */ 771 * over any other clocksource. */
748 if (lguest_data.tsc_khz) { 772 if (lguest_data.tsc_khz) {
749 lguest_clock.shift = 22; 773 lguest_clock.shift = 22;
750 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, 774 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
751 lguest_clock.shift); 775 lguest_clock.shift);
752 lguest_clock.mask = CLOCKSOURCE_MASK(64);
753 lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; 776 lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS;
754 } else {
755 /* To understand this, start at kernel/time/jiffies.c... */
756 lguest_clock.shift = 8;
757 lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8;
758 lguest_clock.mask = CLOCKSOURCE_MASK(32);
759 } 777 }
760 clock_base = lguest_clock_read(); 778 clock_base = lguest_clock_read();
761 clocksource_register(&lguest_clock); 779 clocksource_register(&lguest_clock);
762 780
781 /* Now we've set up our clock, we can use it as the scheduler clock */
782 paravirt_ops.sched_clock = lguest_sched_clock;
783
763 /* We can't set cpumask in the initializer: damn C limitations! Set it 784 /* We can't set cpumask in the initializer: damn C limitations! Set it
764 * here and register our timer device. */ 785 * here and register our timer device. */
765 lguest_clockevent.cpumask = cpumask_of_cpu(0); 786 lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -996,7 +1017,6 @@ __init void lguest_init(void *boot)
996 paravirt_ops.time_init = lguest_time_init; 1017 paravirt_ops.time_init = lguest_time_init;
997 paravirt_ops.set_lazy_mode = lguest_lazy_mode; 1018 paravirt_ops.set_lazy_mode = lguest_lazy_mode;
998 paravirt_ops.wbinvd = lguest_wbinvd; 1019 paravirt_ops.wbinvd = lguest_wbinvd;
999 paravirt_ops.sched_clock = lguest_sched_clock;
1000 /* Now is a good time to look at the implementations of these functions 1020 /* Now is a good time to look at the implementations of these functions
1001 * before returning to the rest of lguest_init(). */ 1021 * before returning to the rest of lguest_init(). */
1002 1022
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index e76c151c7129..157ad64aa7ce 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -17,7 +17,6 @@
17#define LHCALL_TS 8 17#define LHCALL_TS 8
18#define LHCALL_SET_CLOCKEVENT 9 18#define LHCALL_SET_CLOCKEVENT 9
19#define LHCALL_HALT 10 19#define LHCALL_HALT 10
20#define LHCALL_GET_WALLCLOCK 11
21#define LHCALL_BIND_DMA 12 20#define LHCALL_BIND_DMA 12
22#define LHCALL_SEND_DMA 13 21#define LHCALL_SEND_DMA 13
23#define LHCALL_SET_PTE 14 22#define LHCALL_SET_PTE 14
@@ -88,6 +87,9 @@ struct lguest_data
88 * this address would normally be found. */ 87 * this address would normally be found. */
89 unsigned long cr2; 88 unsigned long cr2;
90 89
90 /* Wallclock time set by the Host. */
91 struct timespec time;
92
91 /* Async hypercall ring. Instead of directly making hypercalls, we can 93 /* Async hypercall ring. Instead of directly making hypercalls, we can
92 * place them in here for processing the next time the Host wants. 94 * place them in here for processing the next time the Host wants.
93 * This batching can be quite efficient. */ 95 * This batching can be quite efficient. */