diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-07-19 04:49:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-19 13:04:52 -0400 |
commit | d7e28ffe6c74416b54345d6004fd0964c115b12c (patch) | |
tree | 844beb4f400d5400098538e0c1e5f12d20a9504a /drivers/lguest/lguest.c | |
parent | 07ad157f6e5d228be78acd5cea0291e5d0360398 (diff) |
lguest: the host code
This is the code for the "lg.ko" module, which allows lguest guests to
be launched.
[akpm@linux-foundation.org: update for futex-new-private-futexes]
[akpm@linux-foundation.org: build fix]
[jmorris@namei.org: lguest: use hrtimers]
[akpm@linux-foundation.org: x86_64 build fix]
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/lguest/lguest.c')
-rw-r--r-- | drivers/lguest/lguest.c | 125 |
1 files changed, 101 insertions, 24 deletions
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index b3a72bd8d6f5..b9a58b78c990 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/screen_info.h> | 25 | #include <linux/screen_info.h> |
26 | #include <linux/irq.h> | 26 | #include <linux/irq.h> |
27 | #include <linux/interrupt.h> | 27 | #include <linux/interrupt.h> |
28 | #include <linux/clocksource.h> | ||
29 | #include <linux/clockchips.h> | ||
28 | #include <linux/lguest.h> | 30 | #include <linux/lguest.h> |
29 | #include <linux/lguest_launcher.h> | 31 | #include <linux/lguest_launcher.h> |
30 | #include <linux/lguest_bus.h> | 32 | #include <linux/lguest_bus.h> |
@@ -37,6 +39,7 @@ | |||
37 | #include <asm/e820.h> | 39 | #include <asm/e820.h> |
38 | #include <asm/mce.h> | 40 | #include <asm/mce.h> |
39 | #include <asm/io.h> | 41 | #include <asm/io.h> |
42 | //#include <asm/sched-clock.h> | ||
40 | 43 | ||
41 | /* Declarations for definitions in lguest_guest.S */ | 44 | /* Declarations for definitions in lguest_guest.S */ |
42 | extern char lguest_noirq_start[], lguest_noirq_end[]; | 45 | extern char lguest_noirq_start[], lguest_noirq_end[]; |
@@ -54,7 +57,6 @@ struct lguest_data lguest_data = { | |||
54 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ | 57 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ |
55 | }; | 58 | }; |
56 | struct lguest_device_desc *lguest_devices; | 59 | struct lguest_device_desc *lguest_devices; |
57 | static __initdata const struct lguest_boot_info *boot = __va(0); | ||
58 | 60 | ||
59 | static enum paravirt_lazy_mode lazy_mode; | 61 | static enum paravirt_lazy_mode lazy_mode; |
60 | static void lguest_lazy_mode(enum paravirt_lazy_mode mode) | 62 | static void lguest_lazy_mode(enum paravirt_lazy_mode mode) |
@@ -210,7 +212,7 @@ static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, | |||
210 | case 1: /* Basic feature request. */ | 212 | case 1: /* Basic feature request. */ |
211 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 213 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ |
212 | *ecx &= 0x00002201; | 214 | *ecx &= 0x00002201; |
213 | /* Similarly: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ | 215 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ |
214 | *edx &= 0x07808101; | 216 | *edx &= 0x07808101; |
215 | /* Host wants to know when we flush kernel pages: set PGE. */ | 217 | /* Host wants to know when we flush kernel pages: set PGE. */ |
216 | *edx |= 0x00002000; | 218 | *edx |= 0x00002000; |
@@ -346,24 +348,104 @@ static unsigned long lguest_get_wallclock(void) | |||
346 | return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); | 348 | return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); |
347 | } | 349 | } |
348 | 350 | ||
351 | static cycle_t lguest_clock_read(void) | ||
352 | { | ||
353 | if (lguest_data.tsc_khz) | ||
354 | return native_read_tsc(); | ||
355 | else | ||
356 | return jiffies; | ||
357 | } | ||
358 | |||
359 | /* This is what we tell the kernel is our clocksource. */ | ||
360 | static struct clocksource lguest_clock = { | ||
361 | .name = "lguest", | ||
362 | .rating = 400, | ||
363 | .read = lguest_clock_read, | ||
364 | }; | ||
365 | |||
366 | /* We also need a "struct clock_event_device": Linux asks us to set it to go | ||
367 | * off some time in the future. Actually, James Morris figured all this out, I | ||
368 | * just applied the patch. */ | ||
369 | static int lguest_clockevent_set_next_event(unsigned long delta, | ||
370 | struct clock_event_device *evt) | ||
371 | { | ||
372 | if (delta < LG_CLOCK_MIN_DELTA) { | ||
373 | if (printk_ratelimit()) | ||
374 | printk(KERN_DEBUG "%s: small delta %lu ns\n", | ||
375 | __FUNCTION__, delta); | ||
376 | return -ETIME; | ||
377 | } | ||
378 | hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | static void lguest_clockevent_set_mode(enum clock_event_mode mode, | ||
383 | struct clock_event_device *evt) | ||
384 | { | ||
385 | switch (mode) { | ||
386 | case CLOCK_EVT_MODE_UNUSED: | ||
387 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
388 | /* A 0 argument shuts the clock down. */ | ||
389 | hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0); | ||
390 | break; | ||
391 | case CLOCK_EVT_MODE_ONESHOT: | ||
392 | /* This is what we expect. */ | ||
393 | break; | ||
394 | case CLOCK_EVT_MODE_PERIODIC: | ||
395 | BUG(); | ||
396 | } | ||
397 | } | ||
398 | |||
399 | /* This describes our primitive timer chip. */ | ||
400 | static struct clock_event_device lguest_clockevent = { | ||
401 | .name = "lguest", | ||
402 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
403 | .set_next_event = lguest_clockevent_set_next_event, | ||
404 | .set_mode = lguest_clockevent_set_mode, | ||
405 | .rating = INT_MAX, | ||
406 | .mult = 1, | ||
407 | .shift = 0, | ||
408 | .min_delta_ns = LG_CLOCK_MIN_DELTA, | ||
409 | .max_delta_ns = LG_CLOCK_MAX_DELTA, | ||
410 | }; | ||
411 | |||
412 | /* This is the Guest timer interrupt handler (hardware interrupt 0). We just | ||
413 | * call the clockevent infrastructure and it does whatever needs doing. */ | ||
349 | static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) | 414 | static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) |
350 | { | 415 | { |
351 | do_timer(hcall(LHCALL_TIMER_READ, 0, 0, 0)); | 416 | unsigned long flags; |
352 | update_process_times(user_mode_vm(get_irq_regs())); | 417 | |
418 | /* Don't interrupt us while this is running. */ | ||
419 | local_irq_save(flags); | ||
420 | lguest_clockevent.event_handler(&lguest_clockevent); | ||
421 | local_irq_restore(flags); | ||
353 | } | 422 | } |
354 | 423 | ||
355 | static u64 sched_clock_base; | ||
356 | static void lguest_time_init(void) | 424 | static void lguest_time_init(void) |
357 | { | 425 | { |
358 | set_irq_handler(0, lguest_time_irq); | 426 | set_irq_handler(0, lguest_time_irq); |
359 | hcall(LHCALL_TIMER_READ, 0, 0, 0); | ||
360 | sched_clock_base = jiffies_64; | ||
361 | enable_lguest_irq(0); | ||
362 | } | ||
363 | 427 | ||
364 | static unsigned long long lguest_sched_clock(void) | 428 | /* We use the TSC if the Host tells us we can, otherwise a dumb |
365 | { | 429 | * jiffies-based clock. */ |
366 | return (jiffies_64 - sched_clock_base) * (1000000000 / HZ); | 430 | if (lguest_data.tsc_khz) { |
431 | lguest_clock.shift = 22; | ||
432 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | ||
433 | lguest_clock.shift); | ||
434 | lguest_clock.mask = CLOCKSOURCE_MASK(64); | ||
435 | lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; | ||
436 | } else { | ||
437 | /* To understand this, start at kernel/time/jiffies.c... */ | ||
438 | lguest_clock.shift = 8; | ||
439 | lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; | ||
440 | lguest_clock.mask = CLOCKSOURCE_MASK(32); | ||
441 | } | ||
442 | clocksource_register(&lguest_clock); | ||
443 | |||
444 | /* We can't set cpumask in the initializer: damn C limitations! */ | ||
445 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | ||
446 | clockevents_register_device(&lguest_clockevent); | ||
447 | |||
448 | enable_lguest_irq(0); | ||
367 | } | 449 | } |
368 | 450 | ||
369 | static void lguest_load_esp0(struct tss_struct *tss, | 451 | static void lguest_load_esp0(struct tss_struct *tss, |
@@ -418,8 +500,7 @@ static __init char *lguest_memory_setup(void) | |||
418 | /* We do this here because lockcheck barfs if before start_kernel */ | 500 | /* We do this here because lockcheck barfs if before start_kernel */ |
419 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); | 501 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); |
420 | 502 | ||
421 | e820.nr_map = 0; | 503 | add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type); |
422 | add_memory_region(0, PFN_PHYS(boot->max_pfn), E820_RAM); | ||
423 | return "LGUEST"; | 504 | return "LGUEST"; |
424 | } | 505 | } |
425 | 506 | ||
@@ -450,8 +531,13 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) | |||
450 | return insn_len; | 531 | return insn_len; |
451 | } | 532 | } |
452 | 533 | ||
453 | __init void lguest_init(void) | 534 | __init void lguest_init(void *boot) |
454 | { | 535 | { |
536 | /* Copy boot parameters first. */ | ||
537 | memcpy(&boot_params, boot, PARAM_SIZE); | ||
538 | memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr), | ||
539 | COMMAND_LINE_SIZE); | ||
540 | |||
455 | paravirt_ops.name = "lguest"; | 541 | paravirt_ops.name = "lguest"; |
456 | paravirt_ops.paravirt_enabled = 1; | 542 | paravirt_ops.paravirt_enabled = 1; |
457 | paravirt_ops.kernel_rpl = 1; | 543 | paravirt_ops.kernel_rpl = 1; |
@@ -498,10 +584,8 @@ __init void lguest_init(void) | |||
498 | paravirt_ops.time_init = lguest_time_init; | 584 | paravirt_ops.time_init = lguest_time_init; |
499 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | 585 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; |
500 | paravirt_ops.wbinvd = lguest_wbinvd; | 586 | paravirt_ops.wbinvd = lguest_wbinvd; |
501 | paravirt_ops.sched_clock = lguest_sched_clock; | ||
502 | 587 | ||
503 | hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); | 588 | hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); |
504 | strncpy(boot_command_line, boot->cmdline, COMMAND_LINE_SIZE); | ||
505 | 589 | ||
506 | /* We use top of mem for initial pagetables. */ | 590 | /* We use top of mem for initial pagetables. */ |
507 | init_pg_tables_end = __pa(pg0); | 591 | init_pg_tables_end = __pa(pg0); |
@@ -532,13 +616,6 @@ __init void lguest_init(void) | |||
532 | 616 | ||
533 | add_preferred_console("hvc", 0, NULL); | 617 | add_preferred_console("hvc", 0, NULL); |
534 | 618 | ||
535 | if (boot->initrd_size) { | ||
536 | /* We stash this at top of memory. */ | ||
537 | INITRD_START = boot->max_pfn*PAGE_SIZE - boot->initrd_size; | ||
538 | INITRD_SIZE = boot->initrd_size; | ||
539 | LOADER_TYPE = 0xFF; | ||
540 | } | ||
541 | |||
542 | pm_power_off = lguest_power_off; | 619 | pm_power_off = lguest_power_off; |
543 | start_kernel(); | 620 | start_kernel(); |
544 | } | 621 | } |