5 files changed, 49 insertions, 45 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 0f23d67f958f..bec5a32e4095 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -486,9 +486,12 @@ static void concat(char *dst, char *args[])
        unsigned int i, len = 0;
        for (i = 0; args[i]; i++) {
+                if (i) {
+                        strcat(dst+len, " ");
+                        len++;
+                }
                strcpy(dst+len, args[i]);
-                strcat(dst+len, " ");
+                len += strlen(args[i]);
-                len += strlen(args[i]) + 1;
        }
        /* In case it's empty. */
        dst[len] = '\0';
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cccb38a59653..a104c532ff70 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -84,7 +84,6 @@ struct lguest_data lguest_data = {
        .blocked_interrupts = { 1 }, /* Block timer interrupts */
        .syscall_vec = SYSCALL_VECTOR,
 };
-static cycle_t clock_base;
 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
 * ring buffer of stored hypercalls which the Host will run though next time we
@@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
        case 1: /* Basic feature request. */
                /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
                *cx &= 0x00002201;
-                /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
+                /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
-                *dx &= 0x07808101;
+                *dx &= 0x07808111;
                /* The Host can do a nice optimization if it knows that the
                 * kernel mappings (addresses above 0xC0000000 or whatever
                 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
@@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
        *pmdp = pmdval;
        lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
-                   (__pa(pmdp)&(PAGE_SIZE-1)), 0);
+                   (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
 }
 /* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void)
        return lguest_data.time.tv_sec;
 }
+/* The TSC is a Time Stamp Counter.  The Host tells us what speed it runs at,
+ * or 0 if it's unusable as a reliable clock source.  This matches what we want
+ * here: if we return 0 from this function, the x86 TSC clock will not register
+ * itself. */
+static unsigned long lguest_cpu_khz(void)
+{
+        return lguest_data.tsc_khz;
+}
+/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
+ * we read the time value given to us by the Host. */
 static cycle_t lguest_clock_read(void)
 {
        unsigned long sec, nsec;
-        /* If the Host tells the TSC speed, we can trust that. */
+        /* Since the time is in two parts (seconds and nanoseconds), we risk
-        if (lguest_data.tsc_khz)
+         * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
-                return native_read_tsc();
+         * and getting 99 and 0.  As Linux tends to come apart under the stress
+         * of time travel, we must be careful: */
-        /* If we can't use the TSC, we read the time value written by the Host.
-         * Since it's in two parts (seconds and nanoseconds), we risk reading
-         * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
-         * getting 99 and 0.  As Linux tends to come apart under the stress of
-         * time travel, we must be careful: */
        do {
                /* First we read the seconds part. */
                sec = lguest_data.time.tv_sec;
@@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void)
                /* Now if the seconds part has changed, try again. */
        } while (unlikely(lguest_data.time.tv_sec != sec));
-        /* Our non-TSC clock is in real nanoseconds. */
+        /* Our lguest clock is in real nanoseconds. */
        return sec*1000000000ULL + nsec;
 }
-/* This is what we tell the kernel is our clocksource.  */
+/* This is the fallback clocksource: lower priority than the TSC clocksource. */
 static struct clocksource lguest_clock = {
        .name           = "lguest",
-        .rating         = 400,
+        .rating         = 200,
        .read           = lguest_clock_read,
        .mask           = CLOCKSOURCE_MASK(64),
        .mult           = 1 << 22,
@@ -637,12 +642,6 @@ static struct clocksource lguest_clock = {
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
-/* The "scheduler clock" is just our real clock, adjusted to start at zero */
-static unsigned long long lguest_sched_clock(void)
-{
-        return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
-}
 /* We also need a "struct clock_event_device": Linux asks us to set it to go
 * off some time in the future.  Actually, James Morris figured all this out, I
 * just applied the patch. */
@@ -712,19 +711,8 @@ static void lguest_time_init(void)
        /* Set up the timer interrupt (0) to go to our simple timer routine */
        set_irq_handler(0, lguest_time_irq);
-        /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
-         * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
-         * Either way, the "rating" is set so high that it's always chosen over
-         * any other clocksource. */
-        if (lguest_data.tsc_khz)
-                lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
-                                                         lguest_clock.shift);
-        clock_base = lguest_clock_read();
        clocksource_register(&lguest_clock);
-        /* Now we've set up our clock, we can use it as the scheduler clock */
-        pv_time_ops.sched_clock = lguest_sched_clock;
        /* We can't set cpumask in the initializer: damn C limitations!  Set it
         * here and register our timer device. */
        lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -995,6 +983,7 @@ __init void lguest_init(void)
        /* time operations */
        pv_time_ops.get_wallclock = lguest_get_wallclock;
        pv_time_ops.time_init = lguest_time_init;
+        pv_time_ops.get_cpu_khz = lguest_cpu_khz;
        /* Now is a good time to look at the implementations of these functions
         * before returning to the rest of lguest_init(). */
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 7743d73768df..c632c08cbbdc 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -69,11 +69,22 @@ static __init int map_switcher(void)
                switcher_page[i] = virt_to_page(addr);
        }
+        /* First we check that the Switcher won't overlap the fixmap area at
+         * the top of memory.  It's currently nowhere near, but it could have
+         * very strange effects if it ever happened. */
+        if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
+                err = -ENOMEM;
+                printk("lguest: mapping switcher would thwack fixmap\n");
+                goto free_pages;
+        }
        /* Now we reserve the "virtual memory area" we want: 0xFFC00000
         * (SWITCHER_ADDR).  We might not get it in theory, but in practice
-         * it's worked so far. */
+         * it's worked so far.  The end address needs +1 because __get_vm_area
+         * allocates an extra guard page, so we need space for that. */
        switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
-                                       VM_ALLOC, SWITCHER_ADDR, VMALLOC_END);
+                                     VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
+                                     + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
        if (!switcher_vma) {
                err = -ENOMEM;
                printk("lguest: could not map switcher pages high\n");
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 85d42d3d01a9..2221485b0773 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -241,15 +241,16 @@ static ssize_t write(struct file *file, const char __user *in,
                cpu = &lg->cpus[cpu_id];
                if (!cpu)
                        return -EINVAL;
-        }
-        /* Once the Guest is dead, all you can do is read() why it died. */
+                /* Once the Guest is dead, you can only read() why it died. */
-        if (lg && lg->dead)
+                if (lg->dead)
-                return -ENOENT;
+                        return -ENOENT;
-        /* If you're not the task which owns the Guest, you can only break */
+                /* If you're not the task which owns the Guest, all you can do
-        if (lg && current != cpu->tsk && req != LHREQ_BREAK)
+                 * is break the Launcher out of running the Guest. */
-                return -EPERM;
+                if (current != cpu->tsk && req != LHREQ_BREAK)
+                        return -EPERM;
+        }
        switch (req) {
        case LHREQ_INITIALIZE:
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 275f23c2deb4..a7f64a9d67e0 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -391,7 +391,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
 {
        unsigned int i;
        for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-                if (lg->pgdirs[i].gpgdir == pgtable)
+                if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable)
                        break;
        return i;
 }

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 0f23d67f958f..bec5a32e4095 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c
@@ -486,9 +486,12 @@ static void concat(char dst, char args[])
486	unsigned int i, len = 0;	486	unsigned int i, len = 0;
487		487
488	for (i = 0; args[i]; i++) {	488	for (i = 0; args[i]; i++) {
		489	if (i) {
		490	strcat(dst+len, " ");
		491	len++;
		492	}
489	strcpy(dst+len, args[i]);	493	strcpy(dst+len, args[i]);
490	strcat(dst+len, " ");	494	len += strlen(args[i]);
491	len += strlen(args[i]) + 1;
492	}	495	}
493	/* In case it's empty. */	496	/* In case it's empty. */
494	dst[len] = '\0';	497	dst[len] = '\0';


diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cccb38a59653..a104c532ff70 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c
@@ -84,7 +84,6 @@ struct lguest_data lguest_data = {
84	.blocked_interrupts = { 1 }, /* Block timer interrupts */	84	.blocked_interrupts = { 1 }, /* Block timer interrupts */
85	.syscall_vec = SYSCALL_VECTOR,	85	.syscall_vec = SYSCALL_VECTOR,
86	};	86	};
87	static cycle_t clock_base;
88		87
89	/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a	88	/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a
90	* ring buffer of stored hypercalls which the Host will run though next time we	89	* ring buffer of stored hypercalls which the Host will run though next time we
@@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int ax, unsigned int bx,
327	case 1: /* Basic feature request. */	326	case 1: /* Basic feature request. */
328	/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */	327	/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
329	*cx &= 0x00002201;	328	*cx &= 0x00002201;
330	/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */	329	/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
331	*dx &= 0x07808101;	330	*dx &= 0x07808111;
332	/* The Host can do a nice optimization if it knows that the	331	/* The Host can do a nice optimization if it knows that the
333	* kernel mappings (addresses above 0xC0000000 or whatever	332	* kernel mappings (addresses above 0xC0000000 or whatever
334	* PAGE_OFFSET is set to) haven't changed. But Linux calls	333	* PAGE_OFFSET is set to) haven't changed. But Linux calls
@@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
481	{	480	{
482	*pmdp = pmdval;	481	*pmdp = pmdval;
483	lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,	482	lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
484	(__pa(pmdp)&(PAGE_SIZE-1)), 0);	483	(__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
485	}	484	}
486		485
487	/* There are a couple of legacy places where the kernel sets a PTE, but we	486	/* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void)
595	return lguest_data.time.tv_sec;	594	return lguest_data.time.tv_sec;
596	}	595	}
597		596
		597	/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at,
		598	* or 0 if it's unusable as a reliable clock source. This matches what we want
		599	* here: if we return 0 from this function, the x86 TSC clock will not register
		600	* itself. */
		601	static unsigned long lguest_cpu_khz(void)
		602	{
		603	return lguest_data.tsc_khz;
		604	}
		605
		606	/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
		607	* we read the time value given to us by the Host. */
598	static cycle_t lguest_clock_read(void)	608	static cycle_t lguest_clock_read(void)
599	{	609	{
600	unsigned long sec, nsec;	610	unsigned long sec, nsec;
601		611
602	/* If the Host tells the TSC speed, we can trust that. */	612	/* Since the time is in two parts (seconds and nanoseconds), we risk
603	if (lguest_data.tsc_khz)	613	* reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
604	return native_read_tsc();	614	* and getting 99 and 0. As Linux tends to come apart under the stress
605		615	* of time travel, we must be careful: */
606	/* If we can't use the TSC, we read the time value written by the Host.
607	* Since it's in two parts (seconds and nanoseconds), we risk reading
608	* it just as it's changing from 99 & 0.999999999 to 100 and 0, and
609	* getting 99 and 0. As Linux tends to come apart under the stress of
610	* time travel, we must be careful: */
611	do {	616	do {
612	/* First we read the seconds part. */	617	/* First we read the seconds part. */
613	sec = lguest_data.time.tv_sec;	618	sec = lguest_data.time.tv_sec;
@@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void)
622	/* Now if the seconds part has changed, try again. */	627	/* Now if the seconds part has changed, try again. */
623	} while (unlikely(lguest_data.time.tv_sec != sec));	628	} while (unlikely(lguest_data.time.tv_sec != sec));
624		629
625	/* Our non-TSC clock is in real nanoseconds. */	630	/* Our lguest clock is in real nanoseconds. */
626	return sec*1000000000ULL + nsec;	631	return sec*1000000000ULL + nsec;
627	}	632	}
628		633
629	/* This is what we tell the kernel is our clocksource. */	634	/* This is the fallback clocksource: lower priority than the TSC clocksource. */
630	static struct clocksource lguest_clock = {	635	static struct clocksource lguest_clock = {
631	.name = "lguest",	636	.name = "lguest",
632	.rating = 400,	637	.rating = 200,
633	.read = lguest_clock_read,	638	.read = lguest_clock_read,
634	.mask = CLOCKSOURCE_MASK(64),	639	.mask = CLOCKSOURCE_MASK(64),
635	.mult = 1 << 22,	640	.mult = 1 << 22,
@@ -637,12 +642,6 @@ static struct clocksource lguest_clock = {
637	.flags = CLOCK_SOURCE_IS_CONTINUOUS,	642	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
638	};	643	};
639		644
640	/* The "scheduler clock" is just our real clock, adjusted to start at zero */
641	static unsigned long long lguest_sched_clock(void)
642	{
643	return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
644	}
645
646	/* We also need a "struct clock_event_device": Linux asks us to set it to go	645	/* We also need a "struct clock_event_device": Linux asks us to set it to go
647	* off some time in the future. Actually, James Morris figured all this out, I	646	* off some time in the future. Actually, James Morris figured all this out, I
648	* just applied the patch. */	647	* just applied the patch. */
@@ -712,19 +711,8 @@ static void lguest_time_init(void)
712	/* Set up the timer interrupt (0) to go to our simple timer routine */	711	/* Set up the timer interrupt (0) to go to our simple timer routine */
713	set_irq_handler(0, lguest_time_irq);	712	set_irq_handler(0, lguest_time_irq);
714		713
715	/* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
716	* use the TSC, otherwise it's a dumb nanosecond-resolution clock.
717	* Either way, the "rating" is set so high that it's always chosen over
718	* any other clocksource. */
719	if (lguest_data.tsc_khz)
720	lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
721	lguest_clock.shift);
722	clock_base = lguest_clock_read();
723	clocksource_register(&lguest_clock);	714	clocksource_register(&lguest_clock);
724		715
725	/* Now we've set up our clock, we can use it as the scheduler clock */
726	pv_time_ops.sched_clock = lguest_sched_clock;
727
728	/* We can't set cpumask in the initializer: damn C limitations! Set it	716	/* We can't set cpumask in the initializer: damn C limitations! Set it
729	* here and register our timer device. */	717	* here and register our timer device. */
730	lguest_clockevent.cpumask = cpumask_of_cpu(0);	718	lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -995,6 +983,7 @@ __init void lguest_init(void)
995	/* time operations */	983	/* time operations */
996	pv_time_ops.get_wallclock = lguest_get_wallclock;	984	pv_time_ops.get_wallclock = lguest_get_wallclock;
997	pv_time_ops.time_init = lguest_time_init;	985	pv_time_ops.time_init = lguest_time_init;
		986	pv_time_ops.get_cpu_khz = lguest_cpu_khz;
998		987
999	/* Now is a good time to look at the implementations of these functions	988	/* Now is a good time to look at the implementations of these functions
1000	* before returning to the rest of lguest_init(). */	989	* before returning to the rest of lguest_init(). */


diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 7743d73768df..c632c08cbbdc 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c
@@ -69,11 +69,22 @@ static __init int map_switcher(void)
69	switcher_page[i] = virt_to_page(addr);	69	switcher_page[i] = virt_to_page(addr);
70	}	70	}
71		71
		72	/* First we check that the Switcher won't overlap the fixmap area at
		73	* the top of memory. It's currently nowhere near, but it could have
		74	* very strange effects if it ever happened. */
		75	if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
		76	err = -ENOMEM;
		77	printk("lguest: mapping switcher would thwack fixmap\n");
		78	goto free_pages;
		79	}
		80
72	/* Now we reserve the "virtual memory area" we want: 0xFFC00000	81	/* Now we reserve the "virtual memory area" we want: 0xFFC00000
73	* (SWITCHER_ADDR). We might not get it in theory, but in practice	82	* (SWITCHER_ADDR). We might not get it in theory, but in practice
74	* it's worked so far. */	83	* it's worked so far. The end address needs +1 because __get_vm_area
		84	* allocates an extra guard page, so we need space for that. */
75	switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,	85	switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
76	VM_ALLOC, SWITCHER_ADDR, VMALLOC_END);	86	VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
		87	+ (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
77	if (!switcher_vma) {	88	if (!switcher_vma) {
78	err = -ENOMEM;	89	err = -ENOMEM;
79	printk("lguest: could not map switcher pages high\n");	90	printk("lguest: could not map switcher pages high\n");


diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 85d42d3d01a9..2221485b0773 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c
@@ -241,15 +241,16 @@ static ssize_t write(struct file file, const char __user in,
241	cpu = &lg->cpus[cpu_id];	241	cpu = &lg->cpus[cpu_id];
242	if (!cpu)	242	if (!cpu)
243	return -EINVAL;	243	return -EINVAL;
244	}
245		244
246	/* Once the Guest is dead, all you can do is read() why it died. */	245	/* Once the Guest is dead, you can only read() why it died. */
247	if (lg && lg->dead)	246	if (lg->dead)
248	return -ENOENT;	247	return -ENOENT;
249		248
250	/* If you're not the task which owns the Guest, you can only break */	249	/* If you're not the task which owns the Guest, all you can do
251	if (lg && current != cpu->tsk && req != LHREQ_BREAK)	250	* is break the Launcher out of running the Guest. */
252	return -EPERM;	251	if (current != cpu->tsk && req != LHREQ_BREAK)
		252	return -EPERM;
		253	}
253		254
254	switch (req) {	255	switch (req) {
255	case LHREQ_INITIALIZE:	256	case LHREQ_INITIALIZE:


diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 275f23c2deb4..a7f64a9d67e0 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c
@@ -391,7 +391,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
391	{	391	{
392	unsigned int i;	392	unsigned int i;
393	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)	393	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
394	if (lg->pgdirs[i].gpgdir == pgtable)	394	if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable)
395	break;	395	break;
396	return i;	396	return i;
397	}	397	}