diff options
-rw-r--r-- | drivers/lguest/hypercalls.c | 21 | ||||
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 7 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 1 | ||||
-rw-r--r-- | drivers/lguest/lguest.c | 52 | ||||
-rw-r--r-- | include/linux/lguest.h | 4 |
5 files changed, 60 insertions, 25 deletions
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 7a5299f9679d..db6caace3b9c 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -64,14 +64,6 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs) | |||
64 | else | 64 | else |
65 | guest_pagetable_flush_user(lg); | 65 | guest_pagetable_flush_user(lg); |
66 | break; | 66 | break; |
67 | case LHCALL_GET_WALLCLOCK: { | ||
68 | /* The Guest wants to know the real time in seconds since 1970, | ||
69 | * in good Unix tradition. */ | ||
70 | struct timespec ts; | ||
71 | ktime_get_real_ts(&ts); | ||
72 | regs->eax = ts.tv_sec; | ||
73 | break; | ||
74 | } | ||
75 | case LHCALL_BIND_DMA: | 67 | case LHCALL_BIND_DMA: |
76 | /* BIND_DMA really wants four arguments, but it's the only call | 68 | /* BIND_DMA really wants four arguments, but it's the only call |
77 | * which does. So the Guest packs the number of buffers and | 69 | * which does. So the Guest packs the number of buffers and |
@@ -235,6 +227,9 @@ static void initialize(struct lguest *lg) | |||
235 | || put_user(lg->guestid, &lg->lguest_data->guestid)) | 227 | || put_user(lg->guestid, &lg->lguest_data->guestid)) |
236 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | 228 | kill_guest(lg, "bad guest page %p", lg->lguest_data); |
237 | 229 | ||
230 | /* We write the current time into the Guest's data page once now. */ | ||
231 | write_timestamp(lg); | ||
232 | |||
238 | /* This is the one case where the above accesses might have been the | 233 | /* This is the one case where the above accesses might have been the |
239 | * first write to a Guest page. This may have caused a copy-on-write | 234 | * first write to a Guest page. This may have caused a copy-on-write |
240 | * fault, but the Guest might be referring to the old (read-only) | 235 | * fault, but the Guest might be referring to the old (read-only) |
@@ -293,3 +288,13 @@ void do_hypercalls(struct lguest *lg) | |||
293 | clear_hcall(lg); | 288 | clear_hcall(lg); |
294 | } | 289 | } |
295 | } | 290 | } |
291 | |||
292 | /* This routine supplies the Guest with time: it's used for wallclock time at | ||
293 | * initial boot and as a rough time source if the TSC isn't available. */ | ||
294 | void write_timestamp(struct lguest *lg) | ||
295 | { | ||
296 | struct timespec now; | ||
297 | ktime_get_real_ts(&now); | ||
298 | if (put_user(now, &lg->lguest_data->time)) | ||
299 | kill_guest(lg, "Writing timestamp"); | ||
300 | } | ||
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index bd0091bf79ec..49787e964a0d 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -175,6 +175,13 @@ void maybe_do_interrupt(struct lguest *lg) | |||
175 | * the stack as well: virtual interrupts never do. */ | 175 | * the stack as well: virtual interrupts never do. */ |
176 | set_guest_interrupt(lg, idt->a, idt->b, 0); | 176 | set_guest_interrupt(lg, idt->a, idt->b, 0); |
177 | } | 177 | } |
178 | |||
179 | /* Every time we deliver an interrupt, we update the timestamp in the | ||
180 | * Guest's lguest_data struct. It would be better for the Guest if we | ||
181 | * did this more often, but it can actually be quite slow: doing it | ||
182 | * here is a compromise which means at least it gets updated every | ||
183 | * timer interrupt. */ | ||
184 | write_timestamp(lg); | ||
178 | } | 185 | } |
179 | 186 | ||
180 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps | 187 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 269116eee85f..64f0abed317c 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -256,6 +256,7 @@ unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, | |||
256 | 256 | ||
257 | /* hypercalls.c: */ | 257 | /* hypercalls.c: */ |
258 | void do_hypercalls(struct lguest *lg); | 258 | void do_hypercalls(struct lguest *lg); |
259 | void write_timestamp(struct lguest *lg); | ||
259 | 260 | ||
260 | /*L:035 | 261 | /*L:035 |
261 | * Let's step aside for the moment, to study one important routine that's used | 262 | * Let's step aside for the moment, to study one important routine that's used |
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 3386b0e76900..1bc1546c7fd0 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -643,21 +643,42 @@ static void __init lguest_init_IRQ(void) | |||
643 | * Time. | 643 | * Time. |
644 | * | 644 | * |
645 | * It would be far better for everyone if the Guest had its own clock, but | 645 | * It would be far better for everyone if the Guest had its own clock, but |
646 | * until then it must ask the Host for the time. | 646 | * until then the Host gives us the time on every interrupt. |
647 | */ | 647 | */ |
648 | static unsigned long lguest_get_wallclock(void) | 648 | static unsigned long lguest_get_wallclock(void) |
649 | { | 649 | { |
650 | return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); | 650 | return lguest_data.time.tv_sec; |
651 | } | 651 | } |
652 | 652 | ||
653 | /* If the Host tells us we can trust the TSC, we use that, otherwise we simply | ||
654 | * use the imprecise but reliable "jiffies" counter. */ | ||
655 | static cycle_t lguest_clock_read(void) | 653 | static cycle_t lguest_clock_read(void) |
656 | { | 654 | { |
655 | unsigned long sec, nsec; | ||
656 | |||
657 | /* If the Host tells the TSC speed, we can trust that. */ | ||
657 | if (lguest_data.tsc_khz) | 658 | if (lguest_data.tsc_khz) |
658 | return native_read_tsc(); | 659 | return native_read_tsc(); |
659 | else | 660 | |
660 | return jiffies; | 661 | /* If we can't use the TSC, we read the time value written by the Host. |
662 | * Since it's in two parts (seconds and nanoseconds), we risk reading | ||
663 | * it just as it's changing from 99 & 0.999999999 to 100 and 0, and | ||
664 | * getting 99 and 0. As Linux tends to come apart under the stress of | ||
665 | * time travel, we must be careful: */ | ||
666 | do { | ||
667 | /* First we read the seconds part. */ | ||
668 | sec = lguest_data.time.tv_sec; | ||
669 | /* This read memory barrier tells the compiler and the CPU that | ||
670 | * this can't be reordered: we have to complete the above | ||
671 | * before going on. */ | ||
672 | rmb(); | ||
673 | /* Now we read the nanoseconds part. */ | ||
674 | nsec = lguest_data.time.tv_nsec; | ||
675 | /* Make sure we've done that. */ | ||
676 | rmb(); | ||
677 | /* Now if the seconds part has changed, try again. */ | ||
678 | } while (unlikely(lguest_data.time.tv_sec != sec)); | ||
679 | |||
680 | /* Our non-TSC clock is in real nanoseconds. */ | ||
681 | return sec*1000000000ULL + nsec; | ||
661 | } | 682 | } |
662 | 683 | ||
663 | /* This is what we tell the kernel is our clocksource. */ | 684 | /* This is what we tell the kernel is our clocksource. */ |
@@ -665,8 +686,11 @@ static struct clocksource lguest_clock = { | |||
665 | .name = "lguest", | 686 | .name = "lguest", |
666 | .rating = 400, | 687 | .rating = 400, |
667 | .read = lguest_clock_read, | 688 | .read = lguest_clock_read, |
689 | .mask = CLOCKSOURCE_MASK(64), | ||
690 | .mult = 1, | ||
668 | }; | 691 | }; |
669 | 692 | ||
693 | /* The "scheduler clock" is just our real clock, adjusted to start at zero */ | ||
670 | static unsigned long long lguest_sched_clock(void) | 694 | static unsigned long long lguest_sched_clock(void) |
671 | { | 695 | { |
672 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); | 696 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); |
@@ -742,24 +766,21 @@ static void lguest_time_init(void) | |||
742 | set_irq_handler(0, lguest_time_irq); | 766 | set_irq_handler(0, lguest_time_irq); |
743 | 767 | ||
744 | /* Our clock structure look like arch/i386/kernel/tsc.c if we can use | 768 | /* Our clock structure look like arch/i386/kernel/tsc.c if we can use |
745 | * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way, | 769 | * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either |
746 | * the "rating" is initialized so high that it's always chosen over any | 770 | * way, the "rating" is initialized so high that it's always chosen |
747 | * other clocksource. */ | 771 | * over any other clocksource. */ |
748 | if (lguest_data.tsc_khz) { | 772 | if (lguest_data.tsc_khz) { |
749 | lguest_clock.shift = 22; | 773 | lguest_clock.shift = 22; |
750 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | 774 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, |
751 | lguest_clock.shift); | 775 | lguest_clock.shift); |
752 | lguest_clock.mask = CLOCKSOURCE_MASK(64); | ||
753 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; | 776 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; |
754 | } else { | ||
755 | /* To understand this, start at kernel/time/jiffies.c... */ | ||
756 | lguest_clock.shift = 8; | ||
757 | lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; | ||
758 | lguest_clock.mask = CLOCKSOURCE_MASK(32); | ||
759 | } | 777 | } |
760 | clock_base = lguest_clock_read(); | 778 | clock_base = lguest_clock_read(); |
761 | clocksource_register(&lguest_clock); | 779 | clocksource_register(&lguest_clock); |
762 | 780 | ||
781 | /* Now we've set up our clock, we can use it as the scheduler clock */ | ||
782 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
783 | |||
763 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 784 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
764 | * here and register our timer device. */ | 785 | * here and register our timer device. */ |
765 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 786 | lguest_clockevent.cpumask = cpumask_of_cpu(0); |
@@ -996,7 +1017,6 @@ __init void lguest_init(void *boot) | |||
996 | paravirt_ops.time_init = lguest_time_init; | 1017 | paravirt_ops.time_init = lguest_time_init; |
997 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | 1018 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; |
998 | paravirt_ops.wbinvd = lguest_wbinvd; | 1019 | paravirt_ops.wbinvd = lguest_wbinvd; |
999 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
1000 | /* Now is a good time to look at the implementations of these functions | 1020 | /* Now is a good time to look at the implementations of these functions |
1001 | * before returning to the rest of lguest_init(). */ | 1021 | * before returning to the rest of lguest_init(). */ |
1002 | 1022 | ||
diff --git a/include/linux/lguest.h b/include/linux/lguest.h index e76c151c7129..157ad64aa7ce 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h | |||
@@ -17,7 +17,6 @@ | |||
17 | #define LHCALL_TS 8 | 17 | #define LHCALL_TS 8 |
18 | #define LHCALL_SET_CLOCKEVENT 9 | 18 | #define LHCALL_SET_CLOCKEVENT 9 |
19 | #define LHCALL_HALT 10 | 19 | #define LHCALL_HALT 10 |
20 | #define LHCALL_GET_WALLCLOCK 11 | ||
21 | #define LHCALL_BIND_DMA 12 | 20 | #define LHCALL_BIND_DMA 12 |
22 | #define LHCALL_SEND_DMA 13 | 21 | #define LHCALL_SEND_DMA 13 |
23 | #define LHCALL_SET_PTE 14 | 22 | #define LHCALL_SET_PTE 14 |
@@ -88,6 +87,9 @@ struct lguest_data | |||
88 | * this address would normally be found. */ | 87 | * this address would normally be found. */ |
89 | unsigned long cr2; | 88 | unsigned long cr2; |
90 | 89 | ||
90 | /* Wallclock time set by the Host. */ | ||
91 | struct timespec time; | ||
92 | |||
91 | /* Async hypercall ring. Instead of directly making hypercalls, we can | 93 | /* Async hypercall ring. Instead of directly making hypercalls, we can |
92 | * place them in here for processing the next time the Host wants. | 94 | * place them in here for processing the next time the Host wants. |
93 | * This batching can be quite efficient. */ | 95 | * This batching can be quite efficient. */ |