aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-03-10 21:03:20 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-03-10 21:03:20 -0400
commitaeb24d2fb08653a39abb50281b1ffa2d2a6879ab (patch)
treed7fe503eb64ff1d9b76bf34095819f631a39b660
parent5c0dea0959356d77d985ecfb2911e7a9e23b95e3 (diff)
parent1ef36fa64e65079de18ff5179a51af58e44d49a6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: lguest: Do not append space to guests kernel command line lguest: Revert 1ce70c4fac3c3954bd48c035f448793867592bc0, fix real problem. lguest: Sanitize the lguest clock. lguest: fix __get_vm_area usage. lguest: make sure cpu is initialized before accessing it
-rw-r--r--Documentation/lguest/lguest.c7
-rw-r--r--arch/x86/lguest/boot.c55
-rw-r--r--drivers/lguest/core.c15
-rw-r--r--drivers/lguest/lguest_user.c15
-rw-r--r--drivers/lguest/page_tables.c2
5 files changed, 49 insertions, 45 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 0f23d67f958f..bec5a32e4095 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -486,9 +486,12 @@ static void concat(char *dst, char *args[])
486 unsigned int i, len = 0; 486 unsigned int i, len = 0;
487 487
488 for (i = 0; args[i]; i++) { 488 for (i = 0; args[i]; i++) {
489 if (i) {
490 strcat(dst+len, " ");
491 len++;
492 }
489 strcpy(dst+len, args[i]); 493 strcpy(dst+len, args[i]);
490 strcat(dst+len, " "); 494 len += strlen(args[i]);
491 len += strlen(args[i]) + 1;
492 } 495 }
493 /* In case it's empty. */ 496 /* In case it's empty. */
494 dst[len] = '\0'; 497 dst[len] = '\0';
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cccb38a59653..a104c532ff70 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -84,7 +84,6 @@ struct lguest_data lguest_data = {
84 .blocked_interrupts = { 1 }, /* Block timer interrupts */ 84 .blocked_interrupts = { 1 }, /* Block timer interrupts */
85 .syscall_vec = SYSCALL_VECTOR, 85 .syscall_vec = SYSCALL_VECTOR,
86}; 86};
87static cycle_t clock_base;
88 87
89/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a 88/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a
90 * ring buffer of stored hypercalls which the Host will run though next time we 89 * ring buffer of stored hypercalls which the Host will run though next time we
@@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
327 case 1: /* Basic feature request. */ 326 case 1: /* Basic feature request. */
328 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ 327 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
329 *cx &= 0x00002201; 328 *cx &= 0x00002201;
330 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ 329 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
331 *dx &= 0x07808101; 330 *dx &= 0x07808111;
332 /* The Host can do a nice optimization if it knows that the 331 /* The Host can do a nice optimization if it knows that the
333 * kernel mappings (addresses above 0xC0000000 or whatever 332 * kernel mappings (addresses above 0xC0000000 or whatever
334 * PAGE_OFFSET is set to) haven't changed. But Linux calls 333 * PAGE_OFFSET is set to) haven't changed. But Linux calls
@@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
481{ 480{
482 *pmdp = pmdval; 481 *pmdp = pmdval;
483 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, 482 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
484 (__pa(pmdp)&(PAGE_SIZE-1)), 0); 483 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
485} 484}
486 485
487/* There are a couple of legacy places where the kernel sets a PTE, but we 486/* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void)
595 return lguest_data.time.tv_sec; 594 return lguest_data.time.tv_sec;
596} 595}
597 596
597/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at,
598 * or 0 if it's unusable as a reliable clock source. This matches what we want
599 * here: if we return 0 from this function, the x86 TSC clock will not register
600 * itself. */
601static unsigned long lguest_cpu_khz(void)
602{
603 return lguest_data.tsc_khz;
604}
605
606/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
607 * we read the time value given to us by the Host. */
598static cycle_t lguest_clock_read(void) 608static cycle_t lguest_clock_read(void)
599{ 609{
600 unsigned long sec, nsec; 610 unsigned long sec, nsec;
601 611
602 /* If the Host tells the TSC speed, we can trust that. */ 612 /* Since the time is in two parts (seconds and nanoseconds), we risk
603 if (lguest_data.tsc_khz) 613 * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
604 return native_read_tsc(); 614 * and getting 99 and 0. As Linux tends to come apart under the stress
605 615 * of time travel, we must be careful: */
606 /* If we can't use the TSC, we read the time value written by the Host.
607 * Since it's in two parts (seconds and nanoseconds), we risk reading
608 * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
609 * getting 99 and 0. As Linux tends to come apart under the stress of
610 * time travel, we must be careful: */
611 do { 616 do {
612 /* First we read the seconds part. */ 617 /* First we read the seconds part. */
613 sec = lguest_data.time.tv_sec; 618 sec = lguest_data.time.tv_sec;
@@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void)
622 /* Now if the seconds part has changed, try again. */ 627 /* Now if the seconds part has changed, try again. */
623 } while (unlikely(lguest_data.time.tv_sec != sec)); 628 } while (unlikely(lguest_data.time.tv_sec != sec));
624 629
625 /* Our non-TSC clock is in real nanoseconds. */ 630 /* Our lguest clock is in real nanoseconds. */
626 return sec*1000000000ULL + nsec; 631 return sec*1000000000ULL + nsec;
627} 632}
628 633
629/* This is what we tell the kernel is our clocksource. */ 634/* This is the fallback clocksource: lower priority than the TSC clocksource. */
630static struct clocksource lguest_clock = { 635static struct clocksource lguest_clock = {
631 .name = "lguest", 636 .name = "lguest",
632 .rating = 400, 637 .rating = 200,
633 .read = lguest_clock_read, 638 .read = lguest_clock_read,
634 .mask = CLOCKSOURCE_MASK(64), 639 .mask = CLOCKSOURCE_MASK(64),
635 .mult = 1 << 22, 640 .mult = 1 << 22,
@@ -637,12 +642,6 @@ static struct clocksource lguest_clock = {
637 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 642 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
638}; 643};
639 644
640/* The "scheduler clock" is just our real clock, adjusted to start at zero */
641static unsigned long long lguest_sched_clock(void)
642{
643 return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
644}
645
646/* We also need a "struct clock_event_device": Linux asks us to set it to go 645/* We also need a "struct clock_event_device": Linux asks us to set it to go
647 * off some time in the future. Actually, James Morris figured all this out, I 646 * off some time in the future. Actually, James Morris figured all this out, I
648 * just applied the patch. */ 647 * just applied the patch. */
@@ -712,19 +711,8 @@ static void lguest_time_init(void)
712 /* Set up the timer interrupt (0) to go to our simple timer routine */ 711 /* Set up the timer interrupt (0) to go to our simple timer routine */
713 set_irq_handler(0, lguest_time_irq); 712 set_irq_handler(0, lguest_time_irq);
714 713
715 /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
716 * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
717 * Either way, the "rating" is set so high that it's always chosen over
718 * any other clocksource. */
719 if (lguest_data.tsc_khz)
720 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
721 lguest_clock.shift);
722 clock_base = lguest_clock_read();
723 clocksource_register(&lguest_clock); 714 clocksource_register(&lguest_clock);
724 715
725 /* Now we've set up our clock, we can use it as the scheduler clock */
726 pv_time_ops.sched_clock = lguest_sched_clock;
727
728 /* We can't set cpumask in the initializer: damn C limitations! Set it 716 /* We can't set cpumask in the initializer: damn C limitations! Set it
729 * here and register our timer device. */ 717 * here and register our timer device. */
730 lguest_clockevent.cpumask = cpumask_of_cpu(0); 718 lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -995,6 +983,7 @@ __init void lguest_init(void)
995 /* time operations */ 983 /* time operations */
996 pv_time_ops.get_wallclock = lguest_get_wallclock; 984 pv_time_ops.get_wallclock = lguest_get_wallclock;
997 pv_time_ops.time_init = lguest_time_init; 985 pv_time_ops.time_init = lguest_time_init;
986 pv_time_ops.get_cpu_khz = lguest_cpu_khz;
998 987
999 /* Now is a good time to look at the implementations of these functions 988 /* Now is a good time to look at the implementations of these functions
1000 * before returning to the rest of lguest_init(). */ 989 * before returning to the rest of lguest_init(). */
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 7743d73768df..c632c08cbbdc 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -69,11 +69,22 @@ static __init int map_switcher(void)
69 switcher_page[i] = virt_to_page(addr); 69 switcher_page[i] = virt_to_page(addr);
70 } 70 }
71 71
72 /* First we check that the Switcher won't overlap the fixmap area at
73 * the top of memory. It's currently nowhere near, but it could have
74 * very strange effects if it ever happened. */
75 if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
76 err = -ENOMEM;
77 printk("lguest: mapping switcher would thwack fixmap\n");
78 goto free_pages;
79 }
80
72 /* Now we reserve the "virtual memory area" we want: 0xFFC00000 81 /* Now we reserve the "virtual memory area" we want: 0xFFC00000
73 * (SWITCHER_ADDR). We might not get it in theory, but in practice 82 * (SWITCHER_ADDR). We might not get it in theory, but in practice
74 * it's worked so far. */ 83 * it's worked so far. The end address needs +1 because __get_vm_area
84 * allocates an extra guard page, so we need space for that. */
75 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, 85 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
76 VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); 86 VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
87 + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
77 if (!switcher_vma) { 88 if (!switcher_vma) {
78 err = -ENOMEM; 89 err = -ENOMEM;
79 printk("lguest: could not map switcher pages high\n"); 90 printk("lguest: could not map switcher pages high\n");
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 85d42d3d01a9..2221485b0773 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -241,15 +241,16 @@ static ssize_t write(struct file *file, const char __user *in,
241 cpu = &lg->cpus[cpu_id]; 241 cpu = &lg->cpus[cpu_id];
242 if (!cpu) 242 if (!cpu)
243 return -EINVAL; 243 return -EINVAL;
244 }
245 244
246 /* Once the Guest is dead, all you can do is read() why it died. */ 245 /* Once the Guest is dead, you can only read() why it died. */
247 if (lg && lg->dead) 246 if (lg->dead)
248 return -ENOENT; 247 return -ENOENT;
249 248
250 /* If you're not the task which owns the Guest, you can only break */ 249 /* If you're not the task which owns the Guest, all you can do
251 if (lg && current != cpu->tsk && req != LHREQ_BREAK) 250 * is break the Launcher out of running the Guest. */
252 return -EPERM; 251 if (current != cpu->tsk && req != LHREQ_BREAK)
252 return -EPERM;
253 }
253 254
254 switch (req) { 255 switch (req) {
255 case LHREQ_INITIALIZE: 256 case LHREQ_INITIALIZE:
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 275f23c2deb4..a7f64a9d67e0 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -391,7 +391,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
391{ 391{
392 unsigned int i; 392 unsigned int i;
393 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 393 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
394 if (lg->pgdirs[i].gpgdir == pgtable) 394 if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable)
395 break; 395 break;
396 return i; 396 return i;
397} 397}