diff options
| -rw-r--r-- | Documentation/lguest/lguest.c | 7 | ||||
| -rw-r--r-- | arch/x86/lguest/boot.c | 55 | ||||
| -rw-r--r-- | drivers/lguest/core.c | 15 | ||||
| -rw-r--r-- | drivers/lguest/lguest_user.c | 15 | ||||
| -rw-r--r-- | drivers/lguest/page_tables.c | 2 |
5 files changed, 49 insertions, 45 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 0f23d67f958f..bec5a32e4095 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
| @@ -486,9 +486,12 @@ static void concat(char *dst, char *args[]) | |||
| 486 | unsigned int i, len = 0; | 486 | unsigned int i, len = 0; |
| 487 | 487 | ||
| 488 | for (i = 0; args[i]; i++) { | 488 | for (i = 0; args[i]; i++) { |
| 489 | if (i) { | ||
| 490 | strcat(dst+len, " "); | ||
| 491 | len++; | ||
| 492 | } | ||
| 489 | strcpy(dst+len, args[i]); | 493 | strcpy(dst+len, args[i]); |
| 490 | strcat(dst+len, " "); | 494 | len += strlen(args[i]); |
| 491 | len += strlen(args[i]) + 1; | ||
| 492 | } | 495 | } |
| 493 | /* In case it's empty. */ | 496 | /* In case it's empty. */ |
| 494 | dst[len] = '\0'; | 497 | dst[len] = '\0'; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cccb38a59653..a104c532ff70 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
| @@ -84,7 +84,6 @@ struct lguest_data lguest_data = { | |||
| 84 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ | 84 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ |
| 85 | .syscall_vec = SYSCALL_VECTOR, | 85 | .syscall_vec = SYSCALL_VECTOR, |
| 86 | }; | 86 | }; |
| 87 | static cycle_t clock_base; | ||
| 88 | 87 | ||
| 89 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a | 88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a |
| 90 | * ring buffer of stored hypercalls which the Host will run though next time we | 89 | * ring buffer of stored hypercalls which the Host will run though next time we |
| @@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
| 327 | case 1: /* Basic feature request. */ | 326 | case 1: /* Basic feature request. */ |
| 328 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 327 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ |
| 329 | *cx &= 0x00002201; | 328 | *cx &= 0x00002201; |
| 330 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ | 329 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ |
| 331 | *dx &= 0x07808101; | 330 | *dx &= 0x07808111; |
| 332 | /* The Host can do a nice optimization if it knows that the | 331 | /* The Host can do a nice optimization if it knows that the |
| 333 | * kernel mappings (addresses above 0xC0000000 or whatever | 332 | * kernel mappings (addresses above 0xC0000000 or whatever |
| 334 | * PAGE_OFFSET is set to) haven't changed. But Linux calls | 333 | * PAGE_OFFSET is set to) haven't changed. But Linux calls |
| @@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
| 481 | { | 480 | { |
| 482 | *pmdp = pmdval; | 481 | *pmdp = pmdval; |
| 483 | lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, | 482 | lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, |
| 484 | (__pa(pmdp)&(PAGE_SIZE-1)), 0); | 483 | (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); |
| 485 | } | 484 | } |
| 486 | 485 | ||
| 487 | /* There are a couple of legacy places where the kernel sets a PTE, but we | 486 | /* There are a couple of legacy places where the kernel sets a PTE, but we |
| @@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void) | |||
| 595 | return lguest_data.time.tv_sec; | 594 | return lguest_data.time.tv_sec; |
| 596 | } | 595 | } |
| 597 | 596 | ||
| 597 | /* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, | ||
| 598 | * or 0 if it's unusable as a reliable clock source. This matches what we want | ||
| 599 | * here: if we return 0 from this function, the x86 TSC clock will not register | ||
| 600 | * itself. */ | ||
| 601 | static unsigned long lguest_cpu_khz(void) | ||
| 602 | { | ||
| 603 | return lguest_data.tsc_khz; | ||
| 604 | } | ||
| 605 | |||
| 606 | /* If we can't use the TSC, the kernel falls back to our "lguest_clock", where | ||
| 607 | * we read the time value given to us by the Host. */ | ||
| 598 | static cycle_t lguest_clock_read(void) | 608 | static cycle_t lguest_clock_read(void) |
| 599 | { | 609 | { |
| 600 | unsigned long sec, nsec; | 610 | unsigned long sec, nsec; |
| 601 | 611 | ||
| 602 | /* If the Host tells the TSC speed, we can trust that. */ | 612 | /* Since the time is in two parts (seconds and nanoseconds), we risk |
| 603 | if (lguest_data.tsc_khz) | 613 | * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, |
| 604 | return native_read_tsc(); | 614 | * and getting 99 and 0. As Linux tends to come apart under the stress |
| 605 | 615 | * of time travel, we must be careful: */ | |
| 606 | /* If we can't use the TSC, we read the time value written by the Host. | ||
| 607 | * Since it's in two parts (seconds and nanoseconds), we risk reading | ||
| 608 | * it just as it's changing from 99 & 0.999999999 to 100 and 0, and | ||
| 609 | * getting 99 and 0. As Linux tends to come apart under the stress of | ||
| 610 | * time travel, we must be careful: */ | ||
| 611 | do { | 616 | do { |
| 612 | /* First we read the seconds part. */ | 617 | /* First we read the seconds part. */ |
| 613 | sec = lguest_data.time.tv_sec; | 618 | sec = lguest_data.time.tv_sec; |
| @@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void) | |||
| 622 | /* Now if the seconds part has changed, try again. */ | 627 | /* Now if the seconds part has changed, try again. */ |
| 623 | } while (unlikely(lguest_data.time.tv_sec != sec)); | 628 | } while (unlikely(lguest_data.time.tv_sec != sec)); |
| 624 | 629 | ||
| 625 | /* Our non-TSC clock is in real nanoseconds. */ | 630 | /* Our lguest clock is in real nanoseconds. */ |
| 626 | return sec*1000000000ULL + nsec; | 631 | return sec*1000000000ULL + nsec; |
| 627 | } | 632 | } |
| 628 | 633 | ||
| 629 | /* This is what we tell the kernel is our clocksource. */ | 634 | /* This is the fallback clocksource: lower priority than the TSC clocksource. */ |
| 630 | static struct clocksource lguest_clock = { | 635 | static struct clocksource lguest_clock = { |
| 631 | .name = "lguest", | 636 | .name = "lguest", |
| 632 | .rating = 400, | 637 | .rating = 200, |
| 633 | .read = lguest_clock_read, | 638 | .read = lguest_clock_read, |
| 634 | .mask = CLOCKSOURCE_MASK(64), | 639 | .mask = CLOCKSOURCE_MASK(64), |
| 635 | .mult = 1 << 22, | 640 | .mult = 1 << 22, |
| @@ -637,12 +642,6 @@ static struct clocksource lguest_clock = { | |||
| 637 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 642 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
| 638 | }; | 643 | }; |
| 639 | 644 | ||
| 640 | /* The "scheduler clock" is just our real clock, adjusted to start at zero */ | ||
| 641 | static unsigned long long lguest_sched_clock(void) | ||
| 642 | { | ||
| 643 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); | ||
| 644 | } | ||
| 645 | |||
| 646 | /* We also need a "struct clock_event_device": Linux asks us to set it to go | 645 | /* We also need a "struct clock_event_device": Linux asks us to set it to go |
| 647 | * off some time in the future. Actually, James Morris figured all this out, I | 646 | * off some time in the future. Actually, James Morris figured all this out, I |
| 648 | * just applied the patch. */ | 647 | * just applied the patch. */ |
| @@ -712,19 +711,8 @@ static void lguest_time_init(void) | |||
| 712 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 711 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
| 713 | set_irq_handler(0, lguest_time_irq); | 712 | set_irq_handler(0, lguest_time_irq); |
| 714 | 713 | ||
| 715 | /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can | ||
| 716 | * use the TSC, otherwise it's a dumb nanosecond-resolution clock. | ||
| 717 | * Either way, the "rating" is set so high that it's always chosen over | ||
| 718 | * any other clocksource. */ | ||
| 719 | if (lguest_data.tsc_khz) | ||
| 720 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | ||
| 721 | lguest_clock.shift); | ||
| 722 | clock_base = lguest_clock_read(); | ||
| 723 | clocksource_register(&lguest_clock); | 714 | clocksource_register(&lguest_clock); |
| 724 | 715 | ||
| 725 | /* Now we've set up our clock, we can use it as the scheduler clock */ | ||
| 726 | pv_time_ops.sched_clock = lguest_sched_clock; | ||
| 727 | |||
| 728 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 716 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
| 729 | * here and register our timer device. */ | 717 | * here and register our timer device. */ |
| 730 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 718 | lguest_clockevent.cpumask = cpumask_of_cpu(0); |
| @@ -995,6 +983,7 @@ __init void lguest_init(void) | |||
| 995 | /* time operations */ | 983 | /* time operations */ |
| 996 | pv_time_ops.get_wallclock = lguest_get_wallclock; | 984 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
| 997 | pv_time_ops.time_init = lguest_time_init; | 985 | pv_time_ops.time_init = lguest_time_init; |
| 986 | pv_time_ops.get_cpu_khz = lguest_cpu_khz; | ||
| 998 | 987 | ||
| 999 | /* Now is a good time to look at the implementations of these functions | 988 | /* Now is a good time to look at the implementations of these functions |
| 1000 | * before returning to the rest of lguest_init(). */ | 989 | * before returning to the rest of lguest_init(). */ |
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 7743d73768df..c632c08cbbdc 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
| @@ -69,11 +69,22 @@ static __init int map_switcher(void) | |||
| 69 | switcher_page[i] = virt_to_page(addr); | 69 | switcher_page[i] = virt_to_page(addr); |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | /* First we check that the Switcher won't overlap the fixmap area at | ||
| 73 | * the top of memory. It's currently nowhere near, but it could have | ||
| 74 | * very strange effects if it ever happened. */ | ||
| 75 | if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ | ||
| 76 | err = -ENOMEM; | ||
| 77 | printk("lguest: mapping switcher would thwack fixmap\n"); | ||
| 78 | goto free_pages; | ||
| 79 | } | ||
| 80 | |||
| 72 | /* Now we reserve the "virtual memory area" we want: 0xFFC00000 | 81 | /* Now we reserve the "virtual memory area" we want: 0xFFC00000 |
| 73 | * (SWITCHER_ADDR). We might not get it in theory, but in practice | 82 | * (SWITCHER_ADDR). We might not get it in theory, but in practice |
| 74 | * it's worked so far. */ | 83 | * it's worked so far. The end address needs +1 because __get_vm_area |
| 84 | * allocates an extra guard page, so we need space for that. */ | ||
| 75 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, | 85 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, |
| 76 | VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); | 86 | VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR |
| 87 | + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); | ||
| 77 | if (!switcher_vma) { | 88 | if (!switcher_vma) { |
| 78 | err = -ENOMEM; | 89 | err = -ENOMEM; |
| 79 | printk("lguest: could not map switcher pages high\n"); | 90 | printk("lguest: could not map switcher pages high\n"); |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 85d42d3d01a9..2221485b0773 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
| @@ -241,15 +241,16 @@ static ssize_t write(struct file *file, const char __user *in, | |||
| 241 | cpu = &lg->cpus[cpu_id]; | 241 | cpu = &lg->cpus[cpu_id]; |
| 242 | if (!cpu) | 242 | if (!cpu) |
| 243 | return -EINVAL; | 243 | return -EINVAL; |
| 244 | } | ||
| 245 | 244 | ||
| 246 | /* Once the Guest is dead, all you can do is read() why it died. */ | 245 | /* Once the Guest is dead, you can only read() why it died. */ |
| 247 | if (lg && lg->dead) | 246 | if (lg->dead) |
| 248 | return -ENOENT; | 247 | return -ENOENT; |
| 249 | 248 | ||
| 250 | /* If you're not the task which owns the Guest, you can only break */ | 249 | /* If you're not the task which owns the Guest, all you can do |
| 251 | if (lg && current != cpu->tsk && req != LHREQ_BREAK) | 250 | * is break the Launcher out of running the Guest. */ |
| 252 | return -EPERM; | 251 | if (current != cpu->tsk && req != LHREQ_BREAK) |
| 252 | return -EPERM; | ||
| 253 | } | ||
| 253 | 254 | ||
| 254 | switch (req) { | 255 | switch (req) { |
| 255 | case LHREQ_INITIALIZE: | 256 | case LHREQ_INITIALIZE: |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 275f23c2deb4..a7f64a9d67e0 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
| @@ -391,7 +391,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | |||
| 391 | { | 391 | { |
| 392 | unsigned int i; | 392 | unsigned int i; |
| 393 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) | 393 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) |
| 394 | if (lg->pgdirs[i].gpgdir == pgtable) | 394 | if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable) |
| 395 | break; | 395 | break; |
| 396 | return i; | 396 | return i; |
| 397 | } | 397 | } |
