diff options
Diffstat (limited to 'arch/x86/lguest/boot.c')
-rw-r--r-- | arch/x86/lguest/boot.c | 63 |
1 files changed, 22 insertions, 41 deletions
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5afdde4895dc..a104c532ff70 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/lguest_launcher.h> | 57 | #include <linux/lguest_launcher.h> |
58 | #include <linux/virtio_console.h> | 58 | #include <linux/virtio_console.h> |
59 | #include <linux/pm.h> | 59 | #include <linux/pm.h> |
60 | #include <asm/lguest.h> | ||
60 | #include <asm/paravirt.h> | 61 | #include <asm/paravirt.h> |
61 | #include <asm/param.h> | 62 | #include <asm/param.h> |
62 | #include <asm/page.h> | 63 | #include <asm/page.h> |
@@ -75,15 +76,6 @@ | |||
75 | * behaving in simplified but equivalent ways. In particular, the Guest is the | 76 | * behaving in simplified but equivalent ways. In particular, the Guest is the |
76 | * same kernel as the Host (or at least, built from the same source code). :*/ | 77 | * same kernel as the Host (or at least, built from the same source code). :*/ |
77 | 78 | ||
78 | /* Declarations for definitions in lguest_guest.S */ | ||
79 | extern char lguest_noirq_start[], lguest_noirq_end[]; | ||
80 | extern const char lgstart_cli[], lgend_cli[]; | ||
81 | extern const char lgstart_sti[], lgend_sti[]; | ||
82 | extern const char lgstart_popf[], lgend_popf[]; | ||
83 | extern const char lgstart_pushf[], lgend_pushf[]; | ||
84 | extern const char lgstart_iret[], lgend_iret[]; | ||
85 | extern void lguest_iret(void); | ||
86 | |||
87 | struct lguest_data lguest_data = { | 79 | struct lguest_data lguest_data = { |
88 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, | 80 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, |
89 | .noirq_start = (u32)lguest_noirq_start, | 81 | .noirq_start = (u32)lguest_noirq_start, |
@@ -92,7 +84,6 @@ struct lguest_data lguest_data = { | |||
92 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ | 84 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ |
93 | .syscall_vec = SYSCALL_VECTOR, | 85 | .syscall_vec = SYSCALL_VECTOR, |
94 | }; | 86 | }; |
95 | static cycle_t clock_base; | ||
96 | 87 | ||
97 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a | 88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a |
98 | * ring buffer of stored hypercalls which the Host will run though next time we | 89 | * ring buffer of stored hypercalls which the Host will run though next time we |
@@ -335,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
335 | case 1: /* Basic feature request. */ | 326 | case 1: /* Basic feature request. */ |
336 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 327 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ |
337 | *cx &= 0x00002201; | 328 | *cx &= 0x00002201; |
338 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ | 329 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ |
339 | *dx &= 0x07808101; | 330 | *dx &= 0x07808111; |
340 | /* The Host can do a nice optimization if it knows that the | 331 | /* The Host can do a nice optimization if it knows that the |
341 | * kernel mappings (addresses above 0xC0000000 or whatever | 332 | * kernel mappings (addresses above 0xC0000000 or whatever |
342 | * PAGE_OFFSET is set to) haven't changed. But Linux calls | 333 | * PAGE_OFFSET is set to) haven't changed. But Linux calls |
@@ -603,19 +594,25 @@ static unsigned long lguest_get_wallclock(void) | |||
603 | return lguest_data.time.tv_sec; | 594 | return lguest_data.time.tv_sec; |
604 | } | 595 | } |
605 | 596 | ||
597 | /* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, | ||
598 | * or 0 if it's unusable as a reliable clock source. This matches what we want | ||
599 | * here: if we return 0 from this function, the x86 TSC clock will not register | ||
600 | * itself. */ | ||
601 | static unsigned long lguest_cpu_khz(void) | ||
602 | { | ||
603 | return lguest_data.tsc_khz; | ||
604 | } | ||
605 | |||
606 | /* If we can't use the TSC, the kernel falls back to our "lguest_clock", where | ||
607 | * we read the time value given to us by the Host. */ | ||
606 | static cycle_t lguest_clock_read(void) | 608 | static cycle_t lguest_clock_read(void) |
607 | { | 609 | { |
608 | unsigned long sec, nsec; | 610 | unsigned long sec, nsec; |
609 | 611 | ||
610 | /* If the Host tells the TSC speed, we can trust that. */ | 612 | /* Since the time is in two parts (seconds and nanoseconds), we risk |
611 | if (lguest_data.tsc_khz) | 613 | * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, |
612 | return native_read_tsc(); | 614 | * and getting 99 and 0. As Linux tends to come apart under the stress |
613 | 615 | * of time travel, we must be careful: */ | |
614 | /* If we can't use the TSC, we read the time value written by the Host. | ||
615 | * Since it's in two parts (seconds and nanoseconds), we risk reading | ||
616 | * it just as it's changing from 99 & 0.999999999 to 100 and 0, and | ||
617 | * getting 99 and 0. As Linux tends to come apart under the stress of | ||
618 | * time travel, we must be careful: */ | ||
619 | do { | 616 | do { |
620 | /* First we read the seconds part. */ | 617 | /* First we read the seconds part. */ |
621 | sec = lguest_data.time.tv_sec; | 618 | sec = lguest_data.time.tv_sec; |
@@ -630,14 +627,14 @@ static cycle_t lguest_clock_read(void) | |||
630 | /* Now if the seconds part has changed, try again. */ | 627 | /* Now if the seconds part has changed, try again. */ |
631 | } while (unlikely(lguest_data.time.tv_sec != sec)); | 628 | } while (unlikely(lguest_data.time.tv_sec != sec)); |
632 | 629 | ||
633 | /* Our non-TSC clock is in real nanoseconds. */ | 630 | /* Our lguest clock is in real nanoseconds. */ |
634 | return sec*1000000000ULL + nsec; | 631 | return sec*1000000000ULL + nsec; |
635 | } | 632 | } |
636 | 633 | ||
637 | /* This is what we tell the kernel is our clocksource. */ | 634 | /* This is the fallback clocksource: lower priority than the TSC clocksource. */ |
638 | static struct clocksource lguest_clock = { | 635 | static struct clocksource lguest_clock = { |
639 | .name = "lguest", | 636 | .name = "lguest", |
640 | .rating = 400, | 637 | .rating = 200, |
641 | .read = lguest_clock_read, | 638 | .read = lguest_clock_read, |
642 | .mask = CLOCKSOURCE_MASK(64), | 639 | .mask = CLOCKSOURCE_MASK(64), |
643 | .mult = 1 << 22, | 640 | .mult = 1 << 22, |
@@ -645,12 +642,6 @@ static struct clocksource lguest_clock = { | |||
645 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 642 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
646 | }; | 643 | }; |
647 | 644 | ||
648 | /* The "scheduler clock" is just our real clock, adjusted to start at zero */ | ||
649 | static unsigned long long lguest_sched_clock(void) | ||
650 | { | ||
651 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); | ||
652 | } | ||
653 | |||
654 | /* We also need a "struct clock_event_device": Linux asks us to set it to go | 645 | /* We also need a "struct clock_event_device": Linux asks us to set it to go |
655 | * off some time in the future. Actually, James Morris figured all this out, I | 646 | * off some time in the future. Actually, James Morris figured all this out, I |
656 | * just applied the patch. */ | 647 | * just applied the patch. */ |
@@ -720,19 +711,8 @@ static void lguest_time_init(void) | |||
720 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 711 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
721 | set_irq_handler(0, lguest_time_irq); | 712 | set_irq_handler(0, lguest_time_irq); |
722 | 713 | ||
723 | /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can | ||
724 | * use the TSC, otherwise it's a dumb nanosecond-resolution clock. | ||
725 | * Either way, the "rating" is set so high that it's always chosen over | ||
726 | * any other clocksource. */ | ||
727 | if (lguest_data.tsc_khz) | ||
728 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | ||
729 | lguest_clock.shift); | ||
730 | clock_base = lguest_clock_read(); | ||
731 | clocksource_register(&lguest_clock); | 714 | clocksource_register(&lguest_clock); |
732 | 715 | ||
733 | /* Now we've set up our clock, we can use it as the scheduler clock */ | ||
734 | pv_time_ops.sched_clock = lguest_sched_clock; | ||
735 | |||
736 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 716 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
737 | * here and register our timer device. */ | 717 | * here and register our timer device. */ |
738 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 718 | lguest_clockevent.cpumask = cpumask_of_cpu(0); |
@@ -1003,6 +983,7 @@ __init void lguest_init(void) | |||
1003 | /* time operations */ | 983 | /* time operations */ |
1004 | pv_time_ops.get_wallclock = lguest_get_wallclock; | 984 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
1005 | pv_time_ops.time_init = lguest_time_init; | 985 | pv_time_ops.time_init = lguest_time_init; |
986 | pv_time_ops.get_cpu_khz = lguest_cpu_khz; | ||
1006 | 987 | ||
1007 | /* Now is a good time to look at the implementations of these functions | 988 | /* Now is a good time to look at the implementations of these functions |
1008 | * before returning to the rest of lguest_init(). */ | 989 | * before returning to the rest of lguest_init(). */ |