aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-30 17:35:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-30 17:35:32 -0500
commitd145c7253c8cb2ed8a75a8839621b0bb8f778820 (patch)
treefac21920d149a2cddfdfbde65066ff98935a9c57 /drivers
parent44c3b59102e3ecc7a01e9811862633e670595e51 (diff)
parent84f12e39c856a8b1ab407f8216ecebaf4204b94d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (27 commits) lguest: use __PAGE_KERNEL instead of _PAGE_KERNEL lguest: Use explicit includes rateher than indirect lguest: get rid of lg variable assignments lguest: change gpte_addr header lguest: move changed bitmap to lg_cpu lguest: move last_pages to lg_cpu lguest: change last_guest to last_cpu lguest: change spte_addr header lguest: per-vcpu lguest pgdir management lguest: make pending notifications per-vcpu lguest: makes special fields be per-vcpu lguest: per-vcpu lguest task management lguest: replace lguest_arch with lg_cpu_arch. lguest: make registers per-vcpu lguest: make emulate_insn receive a vcpu struct. lguest: map_switcher_in_guest() per-vcpu lguest: per-vcpu interrupt processing. lguest: per-vcpu lguest timers lguest: make hypercalls use the vcpu struct lguest: make write() operation smp aware ... Manual conflict resolved (maybe even correctly, who knows) in drivers/lguest/x86/core.c
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/lguest/core.c46
-rw-r--r--drivers/lguest/hypercalls.c106
-rw-r--r--drivers/lguest/interrupts_and_traps.c149
-rw-r--r--drivers/lguest/lg.h154
-rw-r--r--drivers/lguest/lguest_user.c147
-rw-r--r--drivers/lguest/page_tables.c179
-rw-r--r--drivers/lguest/segments.c48
-rw-r--r--drivers/lguest/x86/core.c127
9 files changed, 513 insertions, 445 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 9e1f808e43cf..0ee9a8a4095e 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,7 @@ obj-$(CONFIG_ISDN) += isdn/
72obj-$(CONFIG_EDAC) += edac/ 72obj-$(CONFIG_EDAC) += edac/
73obj-$(CONFIG_MCA) += mca/ 73obj-$(CONFIG_MCA) += mca/
74obj-$(CONFIG_EISA) += eisa/ 74obj-$(CONFIG_EISA) += eisa/
75obj-$(CONFIG_LGUEST_GUEST) += lguest/ 75obj-y += lguest/
76obj-$(CONFIG_CPU_FREQ) += cpufreq/ 76obj-$(CONFIG_CPU_FREQ) += cpufreq/
77obj-$(CONFIG_CPU_IDLE) += cpuidle/ 77obj-$(CONFIG_CPU_IDLE) += cpuidle/
78obj-$(CONFIG_MMC) += mmc/ 78obj-$(CONFIG_MMC) += mmc/
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index cb4c67025d52..7743d73768df 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -151,43 +151,43 @@ int lguest_address_ok(const struct lguest *lg,
151/* This routine copies memory from the Guest. Here we can see how useful the 151/* This routine copies memory from the Guest. Here we can see how useful the
152 * kill_lguest() routine we met in the Launcher can be: we return a random 152 * kill_lguest() routine we met in the Launcher can be: we return a random
153 * value (all zeroes) instead of needing to return an error. */ 153 * value (all zeroes) instead of needing to return an error. */
154void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) 154void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
155{ 155{
156 if (!lguest_address_ok(lg, addr, bytes) 156 if (!lguest_address_ok(cpu->lg, addr, bytes)
157 || copy_from_user(b, lg->mem_base + addr, bytes) != 0) { 157 || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {
158 /* copy_from_user should do this, but as we rely on it... */ 158 /* copy_from_user should do this, but as we rely on it... */
159 memset(b, 0, bytes); 159 memset(b, 0, bytes);
160 kill_guest(lg, "bad read address %#lx len %u", addr, bytes); 160 kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);
161 } 161 }
162} 162}
163 163
164/* This is the write (copy into guest) version. */ 164/* This is the write (copy into guest) version. */
165void __lgwrite(struct lguest *lg, unsigned long addr, const void *b, 165void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
166 unsigned bytes) 166 unsigned bytes)
167{ 167{
168 if (!lguest_address_ok(lg, addr, bytes) 168 if (!lguest_address_ok(cpu->lg, addr, bytes)
169 || copy_to_user(lg->mem_base + addr, b, bytes) != 0) 169 || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)
170 kill_guest(lg, "bad write address %#lx len %u", addr, bytes); 170 kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);
171} 171}
172/*:*/ 172/*:*/
173 173
174/*H:030 Let's jump straight to the the main loop which runs the Guest. 174/*H:030 Let's jump straight to the the main loop which runs the Guest.
175 * Remember, this is called by the Launcher reading /dev/lguest, and we keep 175 * Remember, this is called by the Launcher reading /dev/lguest, and we keep
176 * going around and around until something interesting happens. */ 176 * going around and around until something interesting happens. */
177int run_guest(struct lguest *lg, unsigned long __user *user) 177int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
178{ 178{
179 /* We stop running once the Guest is dead. */ 179 /* We stop running once the Guest is dead. */
180 while (!lg->dead) { 180 while (!cpu->lg->dead) {
181 /* First we run any hypercalls the Guest wants done. */ 181 /* First we run any hypercalls the Guest wants done. */
182 if (lg->hcall) 182 if (cpu->hcall)
183 do_hypercalls(lg); 183 do_hypercalls(cpu);
184 184
185 /* It's possible the Guest did a NOTIFY hypercall to the 185 /* It's possible the Guest did a NOTIFY hypercall to the
186 * Launcher, in which case we return from the read() now. */ 186 * Launcher, in which case we return from the read() now. */
187 if (lg->pending_notify) { 187 if (cpu->pending_notify) {
188 if (put_user(lg->pending_notify, user)) 188 if (put_user(cpu->pending_notify, user))
189 return -EFAULT; 189 return -EFAULT;
190 return sizeof(lg->pending_notify); 190 return sizeof(cpu->pending_notify);
191 } 191 }
192 192
193 /* Check for signals */ 193 /* Check for signals */
@@ -195,13 +195,13 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
195 return -ERESTARTSYS; 195 return -ERESTARTSYS;
196 196
197 /* If Waker set break_out, return to Launcher. */ 197 /* If Waker set break_out, return to Launcher. */
198 if (lg->break_out) 198 if (cpu->break_out)
199 return -EAGAIN; 199 return -EAGAIN;
200 200
201 /* Check if there are any interrupts which can be delivered 201 /* Check if there are any interrupts which can be delivered
202 * now: if so, this sets up the hander to be executed when we 202 * now: if so, this sets up the hander to be executed when we
203 * next run the Guest. */ 203 * next run the Guest. */
204 maybe_do_interrupt(lg); 204 maybe_do_interrupt(cpu);
205 205
206 /* All long-lived kernel loops need to check with this horrible 206 /* All long-lived kernel loops need to check with this horrible
207 * thing called the freezer. If the Host is trying to suspend, 207 * thing called the freezer. If the Host is trying to suspend,
@@ -210,12 +210,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
210 210
211 /* Just make absolutely sure the Guest is still alive. One of 211 /* Just make absolutely sure the Guest is still alive. One of
212 * those hypercalls could have been fatal, for example. */ 212 * those hypercalls could have been fatal, for example. */
213 if (lg->dead) 213 if (cpu->lg->dead)
214 break; 214 break;
215 215
216 /* If the Guest asked to be stopped, we sleep. The Guest's 216 /* If the Guest asked to be stopped, we sleep. The Guest's
217 * clock timer or LHCALL_BREAK from the Waker will wake us. */ 217 * clock timer or LHCALL_BREAK from the Waker will wake us. */
218 if (lg->halted) { 218 if (cpu->halted) {
219 set_current_state(TASK_INTERRUPTIBLE); 219 set_current_state(TASK_INTERRUPTIBLE);
220 schedule(); 220 schedule();
221 continue; 221 continue;
@@ -226,15 +226,17 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
226 local_irq_disable(); 226 local_irq_disable();
227 227
228 /* Actually run the Guest until something happens. */ 228 /* Actually run the Guest until something happens. */
229 lguest_arch_run_guest(lg); 229 lguest_arch_run_guest(cpu);
230 230
231 /* Now we're ready to be interrupted or moved to other CPUs */ 231 /* Now we're ready to be interrupted or moved to other CPUs */
232 local_irq_enable(); 232 local_irq_enable();
233 233
234 /* Now we deal with whatever happened to the Guest. */ 234 /* Now we deal with whatever happened to the Guest. */
235 lguest_arch_handle_trap(lg); 235 lguest_arch_handle_trap(cpu);
236 } 236 }
237 237
238 if (cpu->lg->dead == ERR_PTR(-ERESTART))
239 return -ERESTART;
238 /* The Guest is dead => "No such file or directory" */ 240 /* The Guest is dead => "No such file or directory" */
239 return -ENOENT; 241 return -ENOENT;
240} 242}
@@ -253,7 +255,7 @@ static int __init init(void)
253 255
254 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ 256 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
255 if (paravirt_enabled()) { 257 if (paravirt_enabled()) {
256 printk("lguest is afraid of %s\n", pv_info.name); 258 printk("lguest is afraid of being a guest\n");
257 return -EPERM; 259 return -EPERM;
258 } 260 }
259 261
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index b478affe8f91..0f2cb4fd7c69 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -23,13 +23,14 @@
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/ktime.h>
26#include <asm/page.h> 27#include <asm/page.h>
27#include <asm/pgtable.h> 28#include <asm/pgtable.h>
28#include "lg.h" 29#include "lg.h"
29 30
30/*H:120 This is the core hypercall routine: where the Guest gets what it wants. 31/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
31 * Or gets killed. Or, in the case of LHCALL_CRASH, both. */ 32 * Or gets killed. Or, in the case of LHCALL_CRASH, both. */
32static void do_hcall(struct lguest *lg, struct hcall_args *args) 33static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
33{ 34{
34 switch (args->arg0) { 35 switch (args->arg0) {
35 case LHCALL_FLUSH_ASYNC: 36 case LHCALL_FLUSH_ASYNC:
@@ -39,60 +40,62 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
39 case LHCALL_LGUEST_INIT: 40 case LHCALL_LGUEST_INIT:
40 /* You can't get here unless you're already initialized. Don't 41 /* You can't get here unless you're already initialized. Don't
41 * do that. */ 42 * do that. */
42 kill_guest(lg, "already have lguest_data"); 43 kill_guest(cpu, "already have lguest_data");
43 break; 44 break;
44 case LHCALL_CRASH: { 45 case LHCALL_SHUTDOWN: {
45 /* Crash is such a trivial hypercall that we do it in four 46 /* Shutdown is such a trivial hypercall that we do it in four
46 * lines right here. */ 47 * lines right here. */
47 char msg[128]; 48 char msg[128];
48 /* If the lgread fails, it will call kill_guest() itself; the 49 /* If the lgread fails, it will call kill_guest() itself; the
49 * kill_guest() with the message will be ignored. */ 50 * kill_guest() with the message will be ignored. */
50 __lgread(lg, msg, args->arg1, sizeof(msg)); 51 __lgread(cpu, msg, args->arg1, sizeof(msg));
51 msg[sizeof(msg)-1] = '\0'; 52 msg[sizeof(msg)-1] = '\0';
52 kill_guest(lg, "CRASH: %s", msg); 53 kill_guest(cpu, "CRASH: %s", msg);
54 if (args->arg2 == LGUEST_SHUTDOWN_RESTART)
55 cpu->lg->dead = ERR_PTR(-ERESTART);
53 break; 56 break;
54 } 57 }
55 case LHCALL_FLUSH_TLB: 58 case LHCALL_FLUSH_TLB:
56 /* FLUSH_TLB comes in two flavors, depending on the 59 /* FLUSH_TLB comes in two flavors, depending on the
57 * argument: */ 60 * argument: */
58 if (args->arg1) 61 if (args->arg1)
59 guest_pagetable_clear_all(lg); 62 guest_pagetable_clear_all(cpu);
60 else 63 else
61 guest_pagetable_flush_user(lg); 64 guest_pagetable_flush_user(cpu);
62 break; 65 break;
63 66
64 /* All these calls simply pass the arguments through to the right 67 /* All these calls simply pass the arguments through to the right
65 * routines. */ 68 * routines. */
66 case LHCALL_NEW_PGTABLE: 69 case LHCALL_NEW_PGTABLE:
67 guest_new_pagetable(lg, args->arg1); 70 guest_new_pagetable(cpu, args->arg1);
68 break; 71 break;
69 case LHCALL_SET_STACK: 72 case LHCALL_SET_STACK:
70 guest_set_stack(lg, args->arg1, args->arg2, args->arg3); 73 guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
71 break; 74 break;
72 case LHCALL_SET_PTE: 75 case LHCALL_SET_PTE:
73 guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3)); 76 guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
74 break; 77 break;
75 case LHCALL_SET_PMD: 78 case LHCALL_SET_PMD:
76 guest_set_pmd(lg, args->arg1, args->arg2); 79 guest_set_pmd(cpu->lg, args->arg1, args->arg2);
77 break; 80 break;
78 case LHCALL_SET_CLOCKEVENT: 81 case LHCALL_SET_CLOCKEVENT:
79 guest_set_clockevent(lg, args->arg1); 82 guest_set_clockevent(cpu, args->arg1);
80 break; 83 break;
81 case LHCALL_TS: 84 case LHCALL_TS:
82 /* This sets the TS flag, as we saw used in run_guest(). */ 85 /* This sets the TS flag, as we saw used in run_guest(). */
83 lg->ts = args->arg1; 86 cpu->ts = args->arg1;
84 break; 87 break;
85 case LHCALL_HALT: 88 case LHCALL_HALT:
86 /* Similarly, this sets the halted flag for run_guest(). */ 89 /* Similarly, this sets the halted flag for run_guest(). */
87 lg->halted = 1; 90 cpu->halted = 1;
88 break; 91 break;
89 case LHCALL_NOTIFY: 92 case LHCALL_NOTIFY:
90 lg->pending_notify = args->arg1; 93 cpu->pending_notify = args->arg1;
91 break; 94 break;
92 default: 95 default:
93 /* It should be an architecture-specific hypercall. */ 96 /* It should be an architecture-specific hypercall. */
94 if (lguest_arch_do_hcall(lg, args)) 97 if (lguest_arch_do_hcall(cpu, args))
95 kill_guest(lg, "Bad hypercall %li\n", args->arg0); 98 kill_guest(cpu, "Bad hypercall %li\n", args->arg0);
96 } 99 }
97} 100}
98/*:*/ 101/*:*/
@@ -104,13 +107,13 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
104 * Guest put them in the ring, but we also promise the Guest that they will 107 * Guest put them in the ring, but we also promise the Guest that they will
105 * happen before any normal hypercall (which is why we check this before 108 * happen before any normal hypercall (which is why we check this before
106 * checking for a normal hcall). */ 109 * checking for a normal hcall). */
107static void do_async_hcalls(struct lguest *lg) 110static void do_async_hcalls(struct lg_cpu *cpu)
108{ 111{
109 unsigned int i; 112 unsigned int i;
110 u8 st[LHCALL_RING_SIZE]; 113 u8 st[LHCALL_RING_SIZE];
111 114
112 /* For simplicity, we copy the entire call status array in at once. */ 115 /* For simplicity, we copy the entire call status array in at once. */
113 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) 116 if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st)))
114 return; 117 return;
115 118
116 /* We process "struct lguest_data"s hcalls[] ring once. */ 119 /* We process "struct lguest_data"s hcalls[] ring once. */
@@ -119,7 +122,7 @@ static void do_async_hcalls(struct lguest *lg)
119 /* We remember where we were up to from last time. This makes 122 /* We remember where we were up to from last time. This makes
120 * sure that the hypercalls are done in the order the Guest 123 * sure that the hypercalls are done in the order the Guest
121 * places them in the ring. */ 124 * places them in the ring. */
122 unsigned int n = lg->next_hcall; 125 unsigned int n = cpu->next_hcall;
123 126
124 /* 0xFF means there's no call here (yet). */ 127 /* 0xFF means there's no call here (yet). */
125 if (st[n] == 0xFF) 128 if (st[n] == 0xFF)
@@ -127,65 +130,65 @@ static void do_async_hcalls(struct lguest *lg)
127 130
128 /* OK, we have hypercall. Increment the "next_hcall" cursor, 131 /* OK, we have hypercall. Increment the "next_hcall" cursor,
129 * and wrap back to 0 if we reach the end. */ 132 * and wrap back to 0 if we reach the end. */
130 if (++lg->next_hcall == LHCALL_RING_SIZE) 133 if (++cpu->next_hcall == LHCALL_RING_SIZE)
131 lg->next_hcall = 0; 134 cpu->next_hcall = 0;
132 135
133 /* Copy the hypercall arguments into a local copy of 136 /* Copy the hypercall arguments into a local copy of
134 * the hcall_args struct. */ 137 * the hcall_args struct. */
135 if (copy_from_user(&args, &lg->lguest_data->hcalls[n], 138 if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n],
136 sizeof(struct hcall_args))) { 139 sizeof(struct hcall_args))) {
137 kill_guest(lg, "Fetching async hypercalls"); 140 kill_guest(cpu, "Fetching async hypercalls");
138 break; 141 break;
139 } 142 }
140 143
141 /* Do the hypercall, same as a normal one. */ 144 /* Do the hypercall, same as a normal one. */
142 do_hcall(lg, &args); 145 do_hcall(cpu, &args);
143 146
144 /* Mark the hypercall done. */ 147 /* Mark the hypercall done. */
145 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { 148 if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) {
146 kill_guest(lg, "Writing result for async hypercall"); 149 kill_guest(cpu, "Writing result for async hypercall");
147 break; 150 break;
148 } 151 }
149 152
150 /* Stop doing hypercalls if they want to notify the Launcher: 153 /* Stop doing hypercalls if they want to notify the Launcher:
151 * it needs to service this first. */ 154 * it needs to service this first. */
152 if (lg->pending_notify) 155 if (cpu->pending_notify)
153 break; 156 break;
154 } 157 }
155} 158}
156 159
157/* Last of all, we look at what happens first of all. The very first time the 160/* Last of all, we look at what happens first of all. The very first time the
158 * Guest makes a hypercall, we end up here to set things up: */ 161 * Guest makes a hypercall, we end up here to set things up: */
159static void initialize(struct lguest *lg) 162static void initialize(struct lg_cpu *cpu)
160{ 163{
161 /* You can't do anything until you're initialized. The Guest knows the 164 /* You can't do anything until you're initialized. The Guest knows the
162 * rules, so we're unforgiving here. */ 165 * rules, so we're unforgiving here. */
163 if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) { 166 if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) {
164 kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0); 167 kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0);
165 return; 168 return;
166 } 169 }
167 170
168 if (lguest_arch_init_hypercalls(lg)) 171 if (lguest_arch_init_hypercalls(cpu))
169 kill_guest(lg, "bad guest page %p", lg->lguest_data); 172 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
170 173
171 /* The Guest tells us where we're not to deliver interrupts by putting 174 /* The Guest tells us where we're not to deliver interrupts by putting
172 * the range of addresses into "struct lguest_data". */ 175 * the range of addresses into "struct lguest_data". */
173 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) 176 if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
174 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)) 177 || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
175 kill_guest(lg, "bad guest page %p", lg->lguest_data); 178 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
176 179
177 /* We write the current time into the Guest's data page once so it can 180 /* We write the current time into the Guest's data page once so it can
178 * set its clock. */ 181 * set its clock. */
179 write_timestamp(lg); 182 write_timestamp(cpu);
180 183
181 /* page_tables.c will also do some setup. */ 184 /* page_tables.c will also do some setup. */
182 page_table_guest_data_init(lg); 185 page_table_guest_data_init(cpu);
183 186
184 /* This is the one case where the above accesses might have been the 187 /* This is the one case where the above accesses might have been the
185 * first write to a Guest page. This may have caused a copy-on-write 188 * first write to a Guest page. This may have caused a copy-on-write
186 * fault, but the old page might be (read-only) in the Guest 189 * fault, but the old page might be (read-only) in the Guest
187 * pagetable. */ 190 * pagetable. */
188 guest_pagetable_clear_all(lg); 191 guest_pagetable_clear_all(cpu);
189} 192}
190 193
191/*H:100 194/*H:100
@@ -194,27 +197,27 @@ static void initialize(struct lguest *lg)
194 * Remember from the Guest, hypercalls come in two flavors: normal and 197 * Remember from the Guest, hypercalls come in two flavors: normal and
195 * asynchronous. This file handles both of types. 198 * asynchronous. This file handles both of types.
196 */ 199 */
197void do_hypercalls(struct lguest *lg) 200void do_hypercalls(struct lg_cpu *cpu)
198{ 201{
199 /* Not initialized yet? This hypercall must do it. */ 202 /* Not initialized yet? This hypercall must do it. */
200 if (unlikely(!lg->lguest_data)) { 203 if (unlikely(!cpu->lg->lguest_data)) {
201 /* Set up the "struct lguest_data" */ 204 /* Set up the "struct lguest_data" */
202 initialize(lg); 205 initialize(cpu);
203 /* Hcall is done. */ 206 /* Hcall is done. */
204 lg->hcall = NULL; 207 cpu->hcall = NULL;
205 return; 208 return;
206 } 209 }
207 210
208 /* The Guest has initialized. 211 /* The Guest has initialized.
209 * 212 *
210 * Look in the hypercall ring for the async hypercalls: */ 213 * Look in the hypercall ring for the async hypercalls: */
211 do_async_hcalls(lg); 214 do_async_hcalls(cpu);
212 215
213 /* If we stopped reading the hypercall ring because the Guest did a 216 /* If we stopped reading the hypercall ring because the Guest did a
214 * NOTIFY to the Launcher, we want to return now. Otherwise we do 217 * NOTIFY to the Launcher, we want to return now. Otherwise we do
215 * the hypercall. */ 218 * the hypercall. */
216 if (!lg->pending_notify) { 219 if (!cpu->pending_notify) {
217 do_hcall(lg, lg->hcall); 220 do_hcall(cpu, cpu->hcall);
218 /* Tricky point: we reset the hcall pointer to mark the 221 /* Tricky point: we reset the hcall pointer to mark the
219 * hypercall as "done". We use the hcall pointer rather than 222 * hypercall as "done". We use the hcall pointer rather than
220 * the trap number to indicate a hypercall is pending. 223 * the trap number to indicate a hypercall is pending.
@@ -225,16 +228,17 @@ void do_hypercalls(struct lguest *lg)
225 * Launcher, the run_guest() loop will exit without running the 228 * Launcher, the run_guest() loop will exit without running the
226 * Guest. When it comes back it would try to re-run the 229 * Guest. When it comes back it would try to re-run the
227 * hypercall. */ 230 * hypercall. */
228 lg->hcall = NULL; 231 cpu->hcall = NULL;
229 } 232 }
230} 233}
231 234
232/* This routine supplies the Guest with time: it's used for wallclock time at 235/* This routine supplies the Guest with time: it's used for wallclock time at
233 * initial boot and as a rough time source if the TSC isn't available. */ 236 * initial boot and as a rough time source if the TSC isn't available. */
234void write_timestamp(struct lguest *lg) 237void write_timestamp(struct lg_cpu *cpu)
235{ 238{
236 struct timespec now; 239 struct timespec now;
237 ktime_get_real_ts(&now); 240 ktime_get_real_ts(&now);
238 if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec))) 241 if (copy_to_user(&cpu->lg->lguest_data->time,
239 kill_guest(lg, "Writing timestamp"); 242 &now, sizeof(struct timespec)))
243 kill_guest(cpu, "Writing timestamp");
240} 244}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 2b66f79c208b..32e97c1858e5 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi)
41 41
42/* We need a helper to "push" a value onto the Guest's stack, since that's a 42/* We need a helper to "push" a value onto the Guest's stack, since that's a
43 * big part of what delivering an interrupt does. */ 43 * big part of what delivering an interrupt does. */
44static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) 44static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
45{ 45{
46 /* Stack grows upwards: move stack then write value. */ 46 /* Stack grows upwards: move stack then write value. */
47 *gstack -= 4; 47 *gstack -= 4;
48 lgwrite(lg, *gstack, u32, val); 48 lgwrite(cpu, *gstack, u32, val);
49} 49}
50 50
51/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or 51/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
@@ -60,7 +60,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
60 * We set up the stack just like the CPU does for a real interrupt, so it's 60 * We set up the stack just like the CPU does for a real interrupt, so it's
61 * identical for the Guest (and the standard "iret" instruction will undo 61 * identical for the Guest (and the standard "iret" instruction will undo
62 * it). */ 62 * it). */
63static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) 63static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
64{ 64{
65 unsigned long gstack, origstack; 65 unsigned long gstack, origstack;
66 u32 eflags, ss, irq_enable; 66 u32 eflags, ss, irq_enable;
@@ -69,59 +69,59 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
69 /* There are two cases for interrupts: one where the Guest is already 69 /* There are two cases for interrupts: one where the Guest is already
70 * in the kernel, and a more complex one where the Guest is in 70 * in the kernel, and a more complex one where the Guest is in
71 * userspace. We check the privilege level to find out. */ 71 * userspace. We check the privilege level to find out. */
72 if ((lg->regs->ss&0x3) != GUEST_PL) { 72 if ((cpu->regs->ss&0x3) != GUEST_PL) {
73 /* The Guest told us their kernel stack with the SET_STACK 73 /* The Guest told us their kernel stack with the SET_STACK
74 * hypercall: both the virtual address and the segment */ 74 * hypercall: both the virtual address and the segment */
75 virtstack = lg->esp1; 75 virtstack = cpu->esp1;
76 ss = lg->ss1; 76 ss = cpu->ss1;
77 77
78 origstack = gstack = guest_pa(lg, virtstack); 78 origstack = gstack = guest_pa(cpu, virtstack);
79 /* We push the old stack segment and pointer onto the new 79 /* We push the old stack segment and pointer onto the new
80 * stack: when the Guest does an "iret" back from the interrupt 80 * stack: when the Guest does an "iret" back from the interrupt
81 * handler the CPU will notice they're dropping privilege 81 * handler the CPU will notice they're dropping privilege
82 * levels and expect these here. */ 82 * levels and expect these here. */
83 push_guest_stack(lg, &gstack, lg->regs->ss); 83 push_guest_stack(cpu, &gstack, cpu->regs->ss);
84 push_guest_stack(lg, &gstack, lg->regs->esp); 84 push_guest_stack(cpu, &gstack, cpu->regs->esp);
85 } else { 85 } else {
86 /* We're staying on the same Guest (kernel) stack. */ 86 /* We're staying on the same Guest (kernel) stack. */
87 virtstack = lg->regs->esp; 87 virtstack = cpu->regs->esp;
88 ss = lg->regs->ss; 88 ss = cpu->regs->ss;
89 89
90 origstack = gstack = guest_pa(lg, virtstack); 90 origstack = gstack = guest_pa(cpu, virtstack);
91 } 91 }
92 92
93 /* Remember that we never let the Guest actually disable interrupts, so 93 /* Remember that we never let the Guest actually disable interrupts, so
94 * the "Interrupt Flag" bit is always set. We copy that bit from the 94 * the "Interrupt Flag" bit is always set. We copy that bit from the
95 * Guest's "irq_enabled" field into the eflags word: we saw the Guest 95 * Guest's "irq_enabled" field into the eflags word: we saw the Guest
96 * copy it back in "lguest_iret". */ 96 * copy it back in "lguest_iret". */
97 eflags = lg->regs->eflags; 97 eflags = cpu->regs->eflags;
98 if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 98 if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
99 && !(irq_enable & X86_EFLAGS_IF)) 99 && !(irq_enable & X86_EFLAGS_IF))
100 eflags &= ~X86_EFLAGS_IF; 100 eflags &= ~X86_EFLAGS_IF;
101 101
102 /* An interrupt is expected to push three things on the stack: the old 102 /* An interrupt is expected to push three things on the stack: the old
103 * "eflags" word, the old code segment, and the old instruction 103 * "eflags" word, the old code segment, and the old instruction
104 * pointer. */ 104 * pointer. */
105 push_guest_stack(lg, &gstack, eflags); 105 push_guest_stack(cpu, &gstack, eflags);
106 push_guest_stack(lg, &gstack, lg->regs->cs); 106 push_guest_stack(cpu, &gstack, cpu->regs->cs);
107 push_guest_stack(lg, &gstack, lg->regs->eip); 107 push_guest_stack(cpu, &gstack, cpu->regs->eip);
108 108
109 /* For the six traps which supply an error code, we push that, too. */ 109 /* For the six traps which supply an error code, we push that, too. */
110 if (has_err) 110 if (has_err)
111 push_guest_stack(lg, &gstack, lg->regs->errcode); 111 push_guest_stack(cpu, &gstack, cpu->regs->errcode);
112 112
113 /* Now we've pushed all the old state, we change the stack, the code 113 /* Now we've pushed all the old state, we change the stack, the code
114 * segment and the address to execute. */ 114 * segment and the address to execute. */
115 lg->regs->ss = ss; 115 cpu->regs->ss = ss;
116 lg->regs->esp = virtstack + (gstack - origstack); 116 cpu->regs->esp = virtstack + (gstack - origstack);
117 lg->regs->cs = (__KERNEL_CS|GUEST_PL); 117 cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
118 lg->regs->eip = idt_address(lo, hi); 118 cpu->regs->eip = idt_address(lo, hi);
119 119
120 /* There are two kinds of interrupt handlers: 0xE is an "interrupt 120 /* There are two kinds of interrupt handlers: 0xE is an "interrupt
121 * gate" which expects interrupts to be disabled on entry. */ 121 * gate" which expects interrupts to be disabled on entry. */
122 if (idt_type(lo, hi) == 0xE) 122 if (idt_type(lo, hi) == 0xE)
123 if (put_user(0, &lg->lguest_data->irq_enabled)) 123 if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
124 kill_guest(lg, "Disabling interrupts"); 124 kill_guest(cpu, "Disabling interrupts");
125} 125}
126 126
127/*H:205 127/*H:205
@@ -129,23 +129,23 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
129 * 129 *
130 * maybe_do_interrupt() gets called before every entry to the Guest, to see if 130 * maybe_do_interrupt() gets called before every entry to the Guest, to see if
131 * we should divert the Guest to running an interrupt handler. */ 131 * we should divert the Guest to running an interrupt handler. */
132void maybe_do_interrupt(struct lguest *lg) 132void maybe_do_interrupt(struct lg_cpu *cpu)
133{ 133{
134 unsigned int irq; 134 unsigned int irq;
135 DECLARE_BITMAP(blk, LGUEST_IRQS); 135 DECLARE_BITMAP(blk, LGUEST_IRQS);
136 struct desc_struct *idt; 136 struct desc_struct *idt;
137 137
138 /* If the Guest hasn't even initialized yet, we can do nothing. */ 138 /* If the Guest hasn't even initialized yet, we can do nothing. */
139 if (!lg->lguest_data) 139 if (!cpu->lg->lguest_data)
140 return; 140 return;
141 141
142 /* Take our "irqs_pending" array and remove any interrupts the Guest 142 /* Take our "irqs_pending" array and remove any interrupts the Guest
143 * wants blocked: the result ends up in "blk". */ 143 * wants blocked: the result ends up in "blk". */
144 if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, 144 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
145 sizeof(blk))) 145 sizeof(blk)))
146 return; 146 return;
147 147
148 bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); 148 bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
149 149
150 /* Find the first interrupt. */ 150 /* Find the first interrupt. */
151 irq = find_first_bit(blk, LGUEST_IRQS); 151 irq = find_first_bit(blk, LGUEST_IRQS);
@@ -155,19 +155,20 @@ void maybe_do_interrupt(struct lguest *lg)
155 155
156 /* They may be in the middle of an iret, where they asked us never to 156 /* They may be in the middle of an iret, where they asked us never to
157 * deliver interrupts. */ 157 * deliver interrupts. */
158 if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) 158 if (cpu->regs->eip >= cpu->lg->noirq_start &&
159 (cpu->regs->eip < cpu->lg->noirq_end))
159 return; 160 return;
160 161
161 /* If they're halted, interrupts restart them. */ 162 /* If they're halted, interrupts restart them. */
162 if (lg->halted) { 163 if (cpu->halted) {
163 /* Re-enable interrupts. */ 164 /* Re-enable interrupts. */
164 if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) 165 if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled))
165 kill_guest(lg, "Re-enabling interrupts"); 166 kill_guest(cpu, "Re-enabling interrupts");
166 lg->halted = 0; 167 cpu->halted = 0;
167 } else { 168 } else {
168 /* Otherwise we check if they have interrupts disabled. */ 169 /* Otherwise we check if they have interrupts disabled. */
169 u32 irq_enabled; 170 u32 irq_enabled;
170 if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) 171 if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
171 irq_enabled = 0; 172 irq_enabled = 0;
172 if (!irq_enabled) 173 if (!irq_enabled)
173 return; 174 return;
@@ -176,15 +177,15 @@ void maybe_do_interrupt(struct lguest *lg)
176 /* Look at the IDT entry the Guest gave us for this interrupt. The 177 /* Look at the IDT entry the Guest gave us for this interrupt. The
177 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip 178 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
178 * over them. */ 179 * over them. */
179 idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; 180 idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
180 /* If they don't have a handler (yet?), we just ignore it */ 181 /* If they don't have a handler (yet?), we just ignore it */
181 if (idt_present(idt->a, idt->b)) { 182 if (idt_present(idt->a, idt->b)) {
182 /* OK, mark it no longer pending and deliver it. */ 183 /* OK, mark it no longer pending and deliver it. */
183 clear_bit(irq, lg->irqs_pending); 184 clear_bit(irq, cpu->irqs_pending);
184 /* set_guest_interrupt() takes the interrupt descriptor and a 185 /* set_guest_interrupt() takes the interrupt descriptor and a
185 * flag to say whether this interrupt pushes an error code onto 186 * flag to say whether this interrupt pushes an error code onto
186 * the stack as well: virtual interrupts never do. */ 187 * the stack as well: virtual interrupts never do. */
187 set_guest_interrupt(lg, idt->a, idt->b, 0); 188 set_guest_interrupt(cpu, idt->a, idt->b, 0);
188 } 189 }
189 190
190 /* Every time we deliver an interrupt, we update the timestamp in the 191 /* Every time we deliver an interrupt, we update the timestamp in the
@@ -192,7 +193,7 @@ void maybe_do_interrupt(struct lguest *lg)
192 * did this more often, but it can actually be quite slow: doing it 193 * did this more often, but it can actually be quite slow: doing it
193 * here is a compromise which means at least it gets updated every 194 * here is a compromise which means at least it gets updated every
194 * timer interrupt. */ 195 * timer interrupt. */
195 write_timestamp(lg); 196 write_timestamp(cpu);
196} 197}
197/*:*/ 198/*:*/
198 199
@@ -245,19 +246,19 @@ static int has_err(unsigned int trap)
245} 246}
246 247
247/* deliver_trap() returns true if it could deliver the trap. */ 248/* deliver_trap() returns true if it could deliver the trap. */
248int deliver_trap(struct lguest *lg, unsigned int num) 249int deliver_trap(struct lg_cpu *cpu, unsigned int num)
249{ 250{
250 /* Trap numbers are always 8 bit, but we set an impossible trap number 251 /* Trap numbers are always 8 bit, but we set an impossible trap number
251 * for traps inside the Switcher, so check that here. */ 252 * for traps inside the Switcher, so check that here. */
252 if (num >= ARRAY_SIZE(lg->arch.idt)) 253 if (num >= ARRAY_SIZE(cpu->arch.idt))
253 return 0; 254 return 0;
254 255
255 /* Early on the Guest hasn't set the IDT entries (or maybe it put a 256 /* Early on the Guest hasn't set the IDT entries (or maybe it put a
256 * bogus one in): if we fail here, the Guest will be killed. */ 257 * bogus one in): if we fail here, the Guest will be killed. */
257 if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b)) 258 if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
258 return 0; 259 return 0;
259 set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, 260 set_guest_interrupt(cpu, cpu->arch.idt[num].a,
260 has_err(num)); 261 cpu->arch.idt[num].b, has_err(num));
261 return 1; 262 return 1;
262} 263}
263 264
@@ -309,18 +310,18 @@ static int direct_trap(unsigned int num)
309 * the Guest. 310 * the Guest.
310 * 311 *
311 * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ 312 * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
312void pin_stack_pages(struct lguest *lg) 313void pin_stack_pages(struct lg_cpu *cpu)
313{ 314{
314 unsigned int i; 315 unsigned int i;
315 316
316 /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or 317 /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
317 * two pages of stack space. */ 318 * two pages of stack space. */
318 for (i = 0; i < lg->stack_pages; i++) 319 for (i = 0; i < cpu->lg->stack_pages; i++)
319 /* The stack grows *upwards*, so the address we're given is the 320 /* The stack grows *upwards*, so the address we're given is the
320 * start of the page after the kernel stack. Subtract one to 321 * start of the page after the kernel stack. Subtract one to
321 * get back onto the first stack page, and keep subtracting to 322 * get back onto the first stack page, and keep subtracting to
322 * get to the rest of the stack pages. */ 323 * get to the rest of the stack pages. */
323 pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE); 324 pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
324} 325}
325 326
326/* Direct traps also mean that we need to know whenever the Guest wants to use 327/* Direct traps also mean that we need to know whenever the Guest wants to use
@@ -331,21 +332,21 @@ void pin_stack_pages(struct lguest *lg)
331 * 332 *
332 * In Linux each process has its own kernel stack, so this happens a lot: we 333 * In Linux each process has its own kernel stack, so this happens a lot: we
333 * change stacks on each context switch. */ 334 * change stacks on each context switch. */
334void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) 335void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
335{ 336{
336 /* You are not allowed have a stack segment with privilege level 0: bad 337 /* You are not allowed have a stack segment with privilege level 0: bad
337 * Guest! */ 338 * Guest! */
338 if ((seg & 0x3) != GUEST_PL) 339 if ((seg & 0x3) != GUEST_PL)
339 kill_guest(lg, "bad stack segment %i", seg); 340 kill_guest(cpu, "bad stack segment %i", seg);
340 /* We only expect one or two stack pages. */ 341 /* We only expect one or two stack pages. */
341 if (pages > 2) 342 if (pages > 2)
342 kill_guest(lg, "bad stack pages %u", pages); 343 kill_guest(cpu, "bad stack pages %u", pages);
343 /* Save where the stack is, and how many pages */ 344 /* Save where the stack is, and how many pages */
344 lg->ss1 = seg; 345 cpu->ss1 = seg;
345 lg->esp1 = esp; 346 cpu->esp1 = esp;
346 lg->stack_pages = pages; 347 cpu->lg->stack_pages = pages;
347 /* Make sure the new stack pages are mapped */ 348 /* Make sure the new stack pages are mapped */
348 pin_stack_pages(lg); 349 pin_stack_pages(cpu);
349} 350}
350 351
351/* All this reference to mapping stacks leads us neatly into the other complex 352/* All this reference to mapping stacks leads us neatly into the other complex
@@ -353,7 +354,7 @@ void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
353 354
354/*H:235 This is the routine which actually checks the Guest's IDT entry and 355/*H:235 This is the routine which actually checks the Guest's IDT entry and
355 * transfers it into the entry in "struct lguest": */ 356 * transfers it into the entry in "struct lguest": */
356static void set_trap(struct lguest *lg, struct desc_struct *trap, 357static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
357 unsigned int num, u32 lo, u32 hi) 358 unsigned int num, u32 lo, u32 hi)
358{ 359{
359 u8 type = idt_type(lo, hi); 360 u8 type = idt_type(lo, hi);
@@ -366,7 +367,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap,
366 367
367 /* We only support interrupt and trap gates. */ 368 /* We only support interrupt and trap gates. */
368 if (type != 0xE && type != 0xF) 369 if (type != 0xE && type != 0xF)
369 kill_guest(lg, "bad IDT type %i", type); 370 kill_guest(cpu, "bad IDT type %i", type);
370 371
371 /* We only copy the handler address, present bit, privilege level and 372 /* We only copy the handler address, present bit, privilege level and
372 * type. The privilege level controls where the trap can be triggered 373 * type. The privilege level controls where the trap can be triggered
@@ -383,7 +384,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap,
383 * 384 *
384 * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the 385 * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
385 * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ 386 * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */
386void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) 387void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
387{ 388{
388 /* Guest never handles: NMI, doublefault, spurious interrupt or 389 /* Guest never handles: NMI, doublefault, spurious interrupt or
389 * hypercall. We ignore when it tries to set them. */ 390 * hypercall. We ignore when it tries to set them. */
@@ -392,13 +393,13 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
392 393
393 /* Mark the IDT as changed: next time the Guest runs we'll know we have 394 /* Mark the IDT as changed: next time the Guest runs we'll know we have
394 * to copy this again. */ 395 * to copy this again. */
395 lg->changed |= CHANGED_IDT; 396 cpu->changed |= CHANGED_IDT;
396 397
397 /* Check that the Guest doesn't try to step outside the bounds. */ 398 /* Check that the Guest doesn't try to step outside the bounds. */
398 if (num >= ARRAY_SIZE(lg->arch.idt)) 399 if (num >= ARRAY_SIZE(cpu->arch.idt))
399 kill_guest(lg, "Setting idt entry %u", num); 400 kill_guest(cpu, "Setting idt entry %u", num);
400 else 401 else
401 set_trap(lg, &lg->arch.idt[num], num, lo, hi); 402 set_trap(cpu, &cpu->arch.idt[num], num, lo, hi);
402} 403}
403 404
404/* The default entry for each interrupt points into the Switcher routines which 405/* The default entry for each interrupt points into the Switcher routines which
@@ -434,14 +435,14 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
434/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead 435/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
435 * we copy them into the IDT which we've set up for Guests on this CPU, just 436 * we copy them into the IDT which we've set up for Guests on this CPU, just
436 * before we run the Guest. This routine does that copy. */ 437 * before we run the Guest. This routine does that copy. */
437void copy_traps(const struct lguest *lg, struct desc_struct *idt, 438void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
438 const unsigned long *def) 439 const unsigned long *def)
439{ 440{
440 unsigned int i; 441 unsigned int i;
441 442
442 /* We can simply copy the direct traps, otherwise we use the default 443 /* We can simply copy the direct traps, otherwise we use the default
443 * ones in the Switcher: they will return to the Host. */ 444 * ones in the Switcher: they will return to the Host. */
444 for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) { 445 for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) {
445 /* If no Guest can ever override this trap, leave it alone. */ 446 /* If no Guest can ever override this trap, leave it alone. */
446 if (!direct_trap(i)) 447 if (!direct_trap(i))
447 continue; 448 continue;
@@ -450,8 +451,8 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
450 * Interrupt gates (type 14) disable interrupts as they are 451 * Interrupt gates (type 14) disable interrupts as they are
451 * entered, which we never let the Guest do. Not present 452 * entered, which we never let the Guest do. Not present
452 * entries (type 0x0) also can't go direct, of course. */ 453 * entries (type 0x0) also can't go direct, of course. */
453 if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF) 454 if (idt_type(cpu->arch.idt[i].a, cpu->arch.idt[i].b) == 0xF)
454 idt[i] = lg->arch.idt[i]; 455 idt[i] = cpu->arch.idt[i];
455 else 456 else
456 /* Reset it to the default. */ 457 /* Reset it to the default. */
457 default_idt_entry(&idt[i], i, def[i]); 458 default_idt_entry(&idt[i], i, def[i]);
@@ -470,13 +471,13 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
470 * infrastructure to set a callback at that time. 471 * infrastructure to set a callback at that time.
471 * 472 *
472 * 0 means "turn off the clock". */ 473 * 0 means "turn off the clock". */
473void guest_set_clockevent(struct lguest *lg, unsigned long delta) 474void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
474{ 475{
475 ktime_t expires; 476 ktime_t expires;
476 477
477 if (unlikely(delta == 0)) { 478 if (unlikely(delta == 0)) {
478 /* Clock event device is shutting down. */ 479 /* Clock event device is shutting down. */
479 hrtimer_cancel(&lg->hrt); 480 hrtimer_cancel(&cpu->hrt);
480 return; 481 return;
481 } 482 }
482 483
@@ -484,25 +485,25 @@ void guest_set_clockevent(struct lguest *lg, unsigned long delta)
484 * all the time between now and the timer interrupt it asked for. This 485 * all the time between now and the timer interrupt it asked for. This
485 * is almost always the right thing to do. */ 486 * is almost always the right thing to do. */
486 expires = ktime_add_ns(ktime_get_real(), delta); 487 expires = ktime_add_ns(ktime_get_real(), delta);
487 hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS); 488 hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS);
488} 489}
489 490
490/* This is the function called when the Guest's timer expires. */ 491/* This is the function called when the Guest's timer expires. */
491static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) 492static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
492{ 493{
493 struct lguest *lg = container_of(timer, struct lguest, hrt); 494 struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
494 495
495 /* Remember the first interrupt is the timer interrupt. */ 496 /* Remember the first interrupt is the timer interrupt. */
496 set_bit(0, lg->irqs_pending); 497 set_bit(0, cpu->irqs_pending);
497 /* If the Guest is actually stopped, we need to wake it up. */ 498 /* If the Guest is actually stopped, we need to wake it up. */
498 if (lg->halted) 499 if (cpu->halted)
499 wake_up_process(lg->tsk); 500 wake_up_process(cpu->tsk);
500 return HRTIMER_NORESTART; 501 return HRTIMER_NORESTART;
501} 502}
502 503
503/* This sets up the timer for this Guest. */ 504/* This sets up the timer for this Guest. */
504void init_clockdev(struct lguest *lg) 505void init_clockdev(struct lg_cpu *cpu)
505{ 506{
506 hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); 507 hrtimer_init(&cpu->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
507 lg->hrt.function = clockdev_fn; 508 cpu->hrt.function = clockdev_fn;
508} 509}
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 86924891b5eb..2337e1a06f02 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -8,6 +8,7 @@
8#include <linux/lguest.h> 8#include <linux/lguest.h>
9#include <linux/lguest_launcher.h> 9#include <linux/lguest_launcher.h>
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hrtimer.h>
11#include <linux/err.h> 12#include <linux/err.h>
12#include <asm/semaphore.h> 13#include <asm/semaphore.h>
13 14
@@ -38,58 +39,72 @@ struct lguest_pages
38#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */ 39#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */
39#define CHANGED_ALL 3 40#define CHANGED_ALL 3
40 41
41/* The private info the thread maintains about the guest. */ 42struct lguest;
42struct lguest 43
43{ 44struct lg_cpu {
44 /* At end of a page shared mapped over lguest_pages in guest. */ 45 unsigned int id;
45 unsigned long regs_page; 46 struct lguest *lg;
46 struct lguest_regs *regs;
47 struct lguest_data __user *lguest_data;
48 struct task_struct *tsk; 47 struct task_struct *tsk;
49 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ 48 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */
50 u32 pfn_limit; 49
51 /* This provides the offset to the base of guest-physical
52 * memory in the Launcher. */
53 void __user *mem_base;
54 unsigned long kernel_address;
55 u32 cr2; 50 u32 cr2;
56 int halted;
57 int ts; 51 int ts;
58 u32 next_hcall;
59 u32 esp1; 52 u32 esp1;
60 u8 ss1; 53 u8 ss1;
61 54
55 /* Bitmap of what has changed: see CHANGED_* above. */
56 int changed;
57
58 unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
59
60 /* At end of a page shared mapped over lguest_pages in guest. */
61 unsigned long regs_page;
62 struct lguest_regs *regs;
63
64 struct lguest_pages *last_pages;
65
66 int cpu_pgd; /* which pgd this cpu is currently using */
67
62 /* If a hypercall was asked for, this points to the arguments. */ 68 /* If a hypercall was asked for, this points to the arguments. */
63 struct hcall_args *hcall; 69 struct hcall_args *hcall;
70 u32 next_hcall;
71
72 /* Virtual clock device */
73 struct hrtimer hrt;
64 74
65 /* Do we need to stop what we're doing and return to userspace? */ 75 /* Do we need to stop what we're doing and return to userspace? */
66 int break_out; 76 int break_out;
67 wait_queue_head_t break_wq; 77 wait_queue_head_t break_wq;
78 int halted;
68 79
69 /* Bitmap of what has changed: see CHANGED_* above. */ 80 /* Pending virtual interrupts */
70 int changed; 81 DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
71 struct lguest_pages *last_pages; 82
83 struct lg_cpu_arch arch;
84};
85
86/* The private info the thread maintains about the guest. */
87struct lguest
88{
89 struct lguest_data __user *lguest_data;
90 struct lg_cpu cpus[NR_CPUS];
91 unsigned int nr_cpus;
92
93 u32 pfn_limit;
94 /* This provides the offset to the base of guest-physical
95 * memory in the Launcher. */
96 void __user *mem_base;
97 unsigned long kernel_address;
72 98
73 /* We keep a small number of these. */
74 u32 pgdidx;
75 struct pgdir pgdirs[4]; 99 struct pgdir pgdirs[4];
76 100
77 unsigned long noirq_start, noirq_end; 101 unsigned long noirq_start, noirq_end;
78 unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
79 102
80 unsigned int stack_pages; 103 unsigned int stack_pages;
81 u32 tsc_khz; 104 u32 tsc_khz;
82 105
83 /* Dead? */ 106 /* Dead? */
84 const char *dead; 107 const char *dead;
85
86 struct lguest_arch arch;
87
88 /* Virtual clock device */
89 struct hrtimer hrt;
90
91 /* Pending virtual interrupts */
92 DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
93}; 108};
94 109
95extern struct mutex lguest_lock; 110extern struct mutex lguest_lock;
@@ -97,26 +112,26 @@ extern struct mutex lguest_lock;
97/* core.c: */ 112/* core.c: */
98int lguest_address_ok(const struct lguest *lg, 113int lguest_address_ok(const struct lguest *lg,
99 unsigned long addr, unsigned long len); 114 unsigned long addr, unsigned long len);
100void __lgread(struct lguest *, void *, unsigned long, unsigned); 115void __lgread(struct lg_cpu *, void *, unsigned long, unsigned);
101void __lgwrite(struct lguest *, unsigned long, const void *, unsigned); 116void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned);
102 117
103/*H:035 Using memory-copy operations like that is usually inconvient, so we 118/*H:035 Using memory-copy operations like that is usually inconvient, so we
104 * have the following helper macros which read and write a specific type (often 119 * have the following helper macros which read and write a specific type (often
105 * an unsigned long). 120 * an unsigned long).
106 * 121 *
107 * This reads into a variable of the given type then returns that. */ 122 * This reads into a variable of the given type then returns that. */
108#define lgread(lg, addr, type) \ 123#define lgread(cpu, addr, type) \
109 ({ type _v; __lgread((lg), &_v, (addr), sizeof(_v)); _v; }) 124 ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; })
110 125
111/* This checks that the variable is of the given type, then writes it out. */ 126/* This checks that the variable is of the given type, then writes it out. */
112#define lgwrite(lg, addr, type, val) \ 127#define lgwrite(cpu, addr, type, val) \
113 do { \ 128 do { \
114 typecheck(type, val); \ 129 typecheck(type, val); \
115 __lgwrite((lg), (addr), &(val), sizeof(val)); \ 130 __lgwrite((cpu), (addr), &(val), sizeof(val)); \
116 } while(0) 131 } while(0)
117/* (end of memory access helper routines) :*/ 132/* (end of memory access helper routines) :*/
118 133
119int run_guest(struct lguest *lg, unsigned long __user *user); 134int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
120 135
121/* Helper macros to obtain the first 12 or the last 20 bits, this is only the 136/* Helper macros to obtain the first 12 or the last 20 bits, this is only the
122 * first step in the migration to the kernel types. pte_pfn is already defined 137 * first step in the migration to the kernel types. pte_pfn is already defined
@@ -126,52 +141,53 @@ int run_guest(struct lguest *lg, unsigned long __user *user);
126#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) 141#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
127 142
128/* interrupts_and_traps.c: */ 143/* interrupts_and_traps.c: */
129void maybe_do_interrupt(struct lguest *lg); 144void maybe_do_interrupt(struct lg_cpu *cpu);
130int deliver_trap(struct lguest *lg, unsigned int num); 145int deliver_trap(struct lg_cpu *cpu, unsigned int num);
131void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); 146void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
132void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); 147 u32 low, u32 hi);
133void pin_stack_pages(struct lguest *lg); 148void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages);
149void pin_stack_pages(struct lg_cpu *cpu);
134void setup_default_idt_entries(struct lguest_ro_state *state, 150void setup_default_idt_entries(struct lguest_ro_state *state,
135 const unsigned long *def); 151 const unsigned long *def);
136void copy_traps(const struct lguest *lg, struct desc_struct *idt, 152void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
137 const unsigned long *def); 153 const unsigned long *def);
138void guest_set_clockevent(struct lguest *lg, unsigned long delta); 154void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
139void init_clockdev(struct lguest *lg); 155void init_clockdev(struct lg_cpu *cpu);
140bool check_syscall_vector(struct lguest *lg); 156bool check_syscall_vector(struct lguest *lg);
141int init_interrupts(void); 157int init_interrupts(void);
142void free_interrupts(void); 158void free_interrupts(void);
143 159
144/* segments.c: */ 160/* segments.c: */
145void setup_default_gdt_entries(struct lguest_ro_state *state); 161void setup_default_gdt_entries(struct lguest_ro_state *state);
146void setup_guest_gdt(struct lguest *lg); 162void setup_guest_gdt(struct lg_cpu *cpu);
147void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); 163void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num);
148void guest_load_tls(struct lguest *lg, unsigned long tls_array); 164void guest_load_tls(struct lg_cpu *cpu, unsigned long tls_array);
149void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); 165void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt);
150void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); 166void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
151 167
152/* page_tables.c: */ 168/* page_tables.c: */
153int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); 169int init_guest_pagetable(struct lguest *lg, unsigned long pgtable);
154void free_guest_pagetable(struct lguest *lg); 170void free_guest_pagetable(struct lguest *lg);
155void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); 171void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
156void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); 172void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
157void guest_pagetable_clear_all(struct lguest *lg); 173void guest_pagetable_clear_all(struct lg_cpu *cpu);
158void guest_pagetable_flush_user(struct lguest *lg); 174void guest_pagetable_flush_user(struct lg_cpu *cpu);
159void guest_set_pte(struct lguest *lg, unsigned long gpgdir, 175void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
160 unsigned long vaddr, pte_t val); 176 unsigned long vaddr, pte_t val);
161void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); 177void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
162int demand_page(struct lguest *info, unsigned long cr2, int errcode); 178int demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode);
163void pin_page(struct lguest *lg, unsigned long vaddr); 179void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
164unsigned long guest_pa(struct lguest *lg, unsigned long vaddr); 180unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
165void page_table_guest_data_init(struct lguest *lg); 181void page_table_guest_data_init(struct lg_cpu *cpu);
166 182
167/* <arch>/core.c: */ 183/* <arch>/core.c: */
168void lguest_arch_host_init(void); 184void lguest_arch_host_init(void);
169void lguest_arch_host_fini(void); 185void lguest_arch_host_fini(void);
170void lguest_arch_run_guest(struct lguest *lg); 186void lguest_arch_run_guest(struct lg_cpu *cpu);
171void lguest_arch_handle_trap(struct lguest *lg); 187void lguest_arch_handle_trap(struct lg_cpu *cpu);
172int lguest_arch_init_hypercalls(struct lguest *lg); 188int lguest_arch_init_hypercalls(struct lg_cpu *cpu);
173int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args); 189int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args);
174void lguest_arch_setup_regs(struct lguest *lg, unsigned long start); 190void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start);
175 191
176/* <arch>/switcher.S: */ 192/* <arch>/switcher.S: */
177extern char start_switcher_text[], end_switcher_text[], switch_to_guest[]; 193extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
@@ -181,8 +197,8 @@ int lguest_device_init(void);
181void lguest_device_remove(void); 197void lguest_device_remove(void);
182 198
183/* hypercalls.c: */ 199/* hypercalls.c: */
184void do_hypercalls(struct lguest *lg); 200void do_hypercalls(struct lg_cpu *cpu);
185void write_timestamp(struct lguest *lg); 201void write_timestamp(struct lg_cpu *cpu);
186 202
187/*L:035 203/*L:035
188 * Let's step aside for the moment, to study one important routine that's used 204 * Let's step aside for the moment, to study one important routine that's used
@@ -208,12 +224,12 @@ void write_timestamp(struct lguest *lg);
208 * Like any macro which uses an "if", it is safely wrapped in a run-once "do { 224 * Like any macro which uses an "if", it is safely wrapped in a run-once "do {
209 * } while(0)". 225 * } while(0)".
210 */ 226 */
211#define kill_guest(lg, fmt...) \ 227#define kill_guest(cpu, fmt...) \
212do { \ 228do { \
213 if (!(lg)->dead) { \ 229 if (!(cpu)->lg->dead) { \
214 (lg)->dead = kasprintf(GFP_ATOMIC, fmt); \ 230 (cpu)->lg->dead = kasprintf(GFP_ATOMIC, fmt); \
215 if (!(lg)->dead) \ 231 if (!(cpu)->lg->dead) \
216 (lg)->dead = ERR_PTR(-ENOMEM); \ 232 (cpu)->lg->dead = ERR_PTR(-ENOMEM); \
217 } \ 233 } \
218} while(0) 234} while(0)
219/* (End of aside) :*/ 235/* (End of aside) :*/
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 3b92a61ba8d2..85d42d3d01a9 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -6,6 +6,7 @@
6#include <linux/uaccess.h> 6#include <linux/uaccess.h>
7#include <linux/miscdevice.h> 7#include <linux/miscdevice.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/sched.h>
9#include "lg.h" 10#include "lg.h"
10 11
11/*L:055 When something happens, the Waker process needs a way to stop the 12/*L:055 When something happens, the Waker process needs a way to stop the
@@ -13,7 +14,7 @@
13 * LHREQ_BREAK and the value "1" to /dev/lguest to do this. Once the Launcher 14 * LHREQ_BREAK and the value "1" to /dev/lguest to do this. Once the Launcher
14 * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release 15 * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
15 * the Waker. */ 16 * the Waker. */
16static int break_guest_out(struct lguest *lg, const unsigned long __user *input) 17static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
17{ 18{
18 unsigned long on; 19 unsigned long on;
19 20
@@ -22,21 +23,21 @@ static int break_guest_out(struct lguest *lg, const unsigned long __user *input)
22 return -EFAULT; 23 return -EFAULT;
23 24
24 if (on) { 25 if (on) {
25 lg->break_out = 1; 26 cpu->break_out = 1;
26 /* Pop it out of the Guest (may be running on different CPU) */ 27 /* Pop it out of the Guest (may be running on different CPU) */
27 wake_up_process(lg->tsk); 28 wake_up_process(cpu->tsk);
28 /* Wait for them to reset it */ 29 /* Wait for them to reset it */
29 return wait_event_interruptible(lg->break_wq, !lg->break_out); 30 return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
30 } else { 31 } else {
31 lg->break_out = 0; 32 cpu->break_out = 0;
32 wake_up(&lg->break_wq); 33 wake_up(&cpu->break_wq);
33 return 0; 34 return 0;
34 } 35 }
35} 36}
36 37
37/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt 38/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
38 * number to /dev/lguest. */ 39 * number to /dev/lguest. */
39static int user_send_irq(struct lguest *lg, const unsigned long __user *input) 40static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
40{ 41{
41 unsigned long irq; 42 unsigned long irq;
42 43
@@ -46,7 +47,7 @@ static int user_send_irq(struct lguest *lg, const unsigned long __user *input)
46 return -EINVAL; 47 return -EINVAL;
47 /* Next time the Guest runs, the core code will see if it can deliver 48 /* Next time the Guest runs, the core code will see if it can deliver
48 * this interrupt. */ 49 * this interrupt. */
49 set_bit(irq, lg->irqs_pending); 50 set_bit(irq, cpu->irqs_pending);
50 return 0; 51 return 0;
51} 52}
52 53
@@ -55,13 +56,21 @@ static int user_send_irq(struct lguest *lg, const unsigned long __user *input)
55static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) 56static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
56{ 57{
57 struct lguest *lg = file->private_data; 58 struct lguest *lg = file->private_data;
59 struct lg_cpu *cpu;
60 unsigned int cpu_id = *o;
58 61
59 /* You must write LHREQ_INITIALIZE first! */ 62 /* You must write LHREQ_INITIALIZE first! */
60 if (!lg) 63 if (!lg)
61 return -EINVAL; 64 return -EINVAL;
62 65
66 /* Watch out for arbitrary vcpu indexes! */
67 if (cpu_id >= lg->nr_cpus)
68 return -EINVAL;
69
70 cpu = &lg->cpus[cpu_id];
71
63 /* If you're not the task which owns the Guest, go away. */ 72 /* If you're not the task which owns the Guest, go away. */
64 if (current != lg->tsk) 73 if (current != cpu->tsk)
65 return -EPERM; 74 return -EPERM;
66 75
67 /* If the guest is already dead, we indicate why */ 76 /* If the guest is already dead, we indicate why */
@@ -81,11 +90,53 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
81 90
82 /* If we returned from read() last time because the Guest notified, 91 /* If we returned from read() last time because the Guest notified,
83 * clear the flag. */ 92 * clear the flag. */
84 if (lg->pending_notify) 93 if (cpu->pending_notify)
85 lg->pending_notify = 0; 94 cpu->pending_notify = 0;
86 95
87 /* Run the Guest until something interesting happens. */ 96 /* Run the Guest until something interesting happens. */
88 return run_guest(lg, (unsigned long __user *)user); 97 return run_guest(cpu, (unsigned long __user *)user);
98}
99
100static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
101{
102 if (id >= NR_CPUS)
103 return -EINVAL;
104
105 cpu->id = id;
106 cpu->lg = container_of((cpu - id), struct lguest, cpus[0]);
107 cpu->lg->nr_cpus++;
108 init_clockdev(cpu);
109
110 /* We need a complete page for the Guest registers: they are accessible
111 * to the Guest and we can only grant it access to whole pages. */
112 cpu->regs_page = get_zeroed_page(GFP_KERNEL);
113 if (!cpu->regs_page)
114 return -ENOMEM;
115
116 /* We actually put the registers at the bottom of the page. */
117 cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs);
118
119 /* Now we initialize the Guest's registers, handing it the start
120 * address. */
121 lguest_arch_setup_regs(cpu, start_ip);
122
123 /* Initialize the queue for the waker to wait on */
124 init_waitqueue_head(&cpu->break_wq);
125
126 /* We keep a pointer to the Launcher task (ie. current task) for when
127 * other Guests want to wake this one (inter-Guest I/O). */
128 cpu->tsk = current;
129
130 /* We need to keep a pointer to the Launcher's memory map, because if
131 * the Launcher dies we need to clean it up. If we don't keep a
132 * reference, it is destroyed before close() is called. */
133 cpu->mm = get_task_mm(cpu->tsk);
134
135 /* We remember which CPU's pages this Guest used last, for optimization
136 * when the same Guest runs on the same CPU twice. */
137 cpu->last_pages = NULL;
138
139 return 0;
89} 140}
90 141
91/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit) 142/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
@@ -134,15 +185,10 @@ static int initialize(struct file *file, const unsigned long __user *input)
134 lg->mem_base = (void __user *)(long)args[0]; 185 lg->mem_base = (void __user *)(long)args[0];
135 lg->pfn_limit = args[1]; 186 lg->pfn_limit = args[1];
136 187
137 /* We need a complete page for the Guest registers: they are accessible 188 /* This is the first cpu */
138 * to the Guest and we can only grant it access to whole pages. */ 189 err = lg_cpu_start(&lg->cpus[0], 0, args[3]);
139 lg->regs_page = get_zeroed_page(GFP_KERNEL); 190 if (err)
140 if (!lg->regs_page) {
141 err = -ENOMEM;
142 goto release_guest; 191 goto release_guest;
143 }
144 /* We actually put the registers at the bottom of the page. */
145 lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs);
146 192
147 /* Initialize the Guest's shadow page tables, using the toplevel 193 /* Initialize the Guest's shadow page tables, using the toplevel
148 * address the Launcher gave us. This allocates memory, so can 194 * address the Launcher gave us. This allocates memory, so can
@@ -151,28 +197,6 @@ static int initialize(struct file *file, const unsigned long __user *input)
151 if (err) 197 if (err)
152 goto free_regs; 198 goto free_regs;
153 199
154 /* Now we initialize the Guest's registers, handing it the start
155 * address. */
156 lguest_arch_setup_regs(lg, args[3]);
157
158 /* The timer for lguest's clock needs initialization. */
159 init_clockdev(lg);
160
161 /* We keep a pointer to the Launcher task (ie. current task) for when
162 * other Guests want to wake this one (inter-Guest I/O). */
163 lg->tsk = current;
164 /* We need to keep a pointer to the Launcher's memory map, because if
165 * the Launcher dies we need to clean it up. If we don't keep a
166 * reference, it is destroyed before close() is called. */
167 lg->mm = get_task_mm(lg->tsk);
168
169 /* Initialize the queue for the waker to wait on */
170 init_waitqueue_head(&lg->break_wq);
171
172 /* We remember which CPU's pages this Guest used last, for optimization
173 * when the same Guest runs on the same CPU twice. */
174 lg->last_pages = NULL;
175
176 /* We keep our "struct lguest" in the file's private_data. */ 200 /* We keep our "struct lguest" in the file's private_data. */
177 file->private_data = lg; 201 file->private_data = lg;
178 202
@@ -182,7 +206,8 @@ static int initialize(struct file *file, const unsigned long __user *input)
182 return sizeof(args); 206 return sizeof(args);
183 207
184free_regs: 208free_regs:
185 free_page(lg->regs_page); 209 /* FIXME: This should be in free_vcpu */
210 free_page(lg->cpus[0].regs_page);
186release_guest: 211release_guest:
187 kfree(lg); 212 kfree(lg);
188unlock: 213unlock:
@@ -202,30 +227,37 @@ static ssize_t write(struct file *file, const char __user *in,
202 struct lguest *lg = file->private_data; 227 struct lguest *lg = file->private_data;
203 const unsigned long __user *input = (const unsigned long __user *)in; 228 const unsigned long __user *input = (const unsigned long __user *)in;
204 unsigned long req; 229 unsigned long req;
230 struct lg_cpu *uninitialized_var(cpu);
231 unsigned int cpu_id = *off;
205 232
206 if (get_user(req, input) != 0) 233 if (get_user(req, input) != 0)
207 return -EFAULT; 234 return -EFAULT;
208 input++; 235 input++;
209 236
210 /* If you haven't initialized, you must do that first. */ 237 /* If you haven't initialized, you must do that first. */
211 if (req != LHREQ_INITIALIZE && !lg) 238 if (req != LHREQ_INITIALIZE) {
212 return -EINVAL; 239 if (!lg || (cpu_id >= lg->nr_cpus))
240 return -EINVAL;
241 cpu = &lg->cpus[cpu_id];
242 if (!cpu)
243 return -EINVAL;
244 }
213 245
214 /* Once the Guest is dead, all you can do is read() why it died. */ 246 /* Once the Guest is dead, all you can do is read() why it died. */
215 if (lg && lg->dead) 247 if (lg && lg->dead)
216 return -ENOENT; 248 return -ENOENT;
217 249
218 /* If you're not the task which owns the Guest, you can only break */ 250 /* If you're not the task which owns the Guest, you can only break */
219 if (lg && current != lg->tsk && req != LHREQ_BREAK) 251 if (lg && current != cpu->tsk && req != LHREQ_BREAK)
220 return -EPERM; 252 return -EPERM;
221 253
222 switch (req) { 254 switch (req) {
223 case LHREQ_INITIALIZE: 255 case LHREQ_INITIALIZE:
224 return initialize(file, input); 256 return initialize(file, input);
225 case LHREQ_IRQ: 257 case LHREQ_IRQ:
226 return user_send_irq(lg, input); 258 return user_send_irq(cpu, input);
227 case LHREQ_BREAK: 259 case LHREQ_BREAK:
228 return break_guest_out(lg, input); 260 return break_guest_out(cpu, input);
229 default: 261 default:
230 return -EINVAL; 262 return -EINVAL;
231 } 263 }
@@ -241,6 +273,7 @@ static ssize_t write(struct file *file, const char __user *in,
241static int close(struct inode *inode, struct file *file) 273static int close(struct inode *inode, struct file *file)
242{ 274{
243 struct lguest *lg = file->private_data; 275 struct lguest *lg = file->private_data;
276 unsigned int i;
244 277
245 /* If we never successfully initialized, there's nothing to clean up */ 278 /* If we never successfully initialized, there's nothing to clean up */
246 if (!lg) 279 if (!lg)
@@ -249,19 +282,23 @@ static int close(struct inode *inode, struct file *file)
249 /* We need the big lock, to protect from inter-guest I/O and other 282 /* We need the big lock, to protect from inter-guest I/O and other
250 * Launchers initializing guests. */ 283 * Launchers initializing guests. */
251 mutex_lock(&lguest_lock); 284 mutex_lock(&lguest_lock);
252 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ 285
253 hrtimer_cancel(&lg->hrt);
254 /* Free up the shadow page tables for the Guest. */ 286 /* Free up the shadow page tables for the Guest. */
255 free_guest_pagetable(lg); 287 free_guest_pagetable(lg);
256 /* Now all the memory cleanups are done, it's safe to release the 288
257 * Launcher's memory management structure. */ 289 for (i = 0; i < lg->nr_cpus; i++) {
258 mmput(lg->mm); 290 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
291 hrtimer_cancel(&lg->cpus[i].hrt);
292 /* We can free up the register page we allocated. */
293 free_page(lg->cpus[i].regs_page);
294 /* Now all the memory cleanups are done, it's safe to release
295 * the Launcher's memory management structure. */
296 mmput(lg->cpus[i].mm);
297 }
259 /* If lg->dead doesn't contain an error code it will be NULL or a 298 /* If lg->dead doesn't contain an error code it will be NULL or a
260 * kmalloc()ed string, either of which is ok to hand to kfree(). */ 299 * kmalloc()ed string, either of which is ok to hand to kfree(). */
261 if (!IS_ERR(lg->dead)) 300 if (!IS_ERR(lg->dead))
262 kfree(lg->dead); 301 kfree(lg->dead);
263 /* We can free up the register page we allocated. */
264 free_page(lg->regs_page);
265 /* We clear the entire structure, which also marks it as free for the 302 /* We clear the entire structure, which also marks it as free for the
266 * next user. */ 303 * next user. */
267 memset(lg, 0, sizeof(*lg)); 304 memset(lg, 0, sizeof(*lg));
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index fffabb327157..74b4cf2a6c41 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -68,23 +68,23 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
68 * page directory entry (PGD) for that address. Since we keep track of several 68 * page directory entry (PGD) for that address. Since we keep track of several
69 * page tables, the "i" argument tells us which one we're interested in (it's 69 * page tables, the "i" argument tells us which one we're interested in (it's
70 * usually the current one). */ 70 * usually the current one). */
71static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) 71static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
72{ 72{
73 unsigned int index = pgd_index(vaddr); 73 unsigned int index = pgd_index(vaddr);
74 74
75 /* We kill any Guest trying to touch the Switcher addresses. */ 75 /* We kill any Guest trying to touch the Switcher addresses. */
76 if (index >= SWITCHER_PGD_INDEX) { 76 if (index >= SWITCHER_PGD_INDEX) {
77 kill_guest(lg, "attempt to access switcher pages"); 77 kill_guest(cpu, "attempt to access switcher pages");
78 index = 0; 78 index = 0;
79 } 79 }
80 /* Return a pointer index'th pgd entry for the i'th page table. */ 80 /* Return a pointer index'th pgd entry for the i'th page table. */
81 return &lg->pgdirs[i].pgdir[index]; 81 return &cpu->lg->pgdirs[i].pgdir[index];
82} 82}
83 83
84/* This routine then takes the page directory entry returned above, which 84/* This routine then takes the page directory entry returned above, which
85 * contains the address of the page table entry (PTE) page. It then returns a 85 * contains the address of the page table entry (PTE) page. It then returns a
86 * pointer to the PTE entry for the given address. */ 86 * pointer to the PTE entry for the given address. */
87static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) 87static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
88{ 88{
89 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); 89 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
90 /* You should never call this if the PGD entry wasn't valid */ 90 /* You should never call this if the PGD entry wasn't valid */
@@ -94,14 +94,13 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
94 94
95/* These two functions just like the above two, except they access the Guest 95/* These two functions just like the above two, except they access the Guest
96 * page tables. Hence they return a Guest address. */ 96 * page tables. Hence they return a Guest address. */
97static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) 97static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
98{ 98{
99 unsigned int index = vaddr >> (PGDIR_SHIFT); 99 unsigned int index = vaddr >> (PGDIR_SHIFT);
100 return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); 100 return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
101} 101}
102 102
103static unsigned long gpte_addr(struct lguest *lg, 103static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
104 pgd_t gpgd, unsigned long vaddr)
105{ 104{
106 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; 105 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
107 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); 106 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
@@ -138,7 +137,7 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
138 * entry can be a little tricky. The flags are (almost) the same, but the 137 * entry can be a little tricky. The flags are (almost) the same, but the
139 * Guest PTE contains a virtual page number: the CPU needs the real page 138 * Guest PTE contains a virtual page number: the CPU needs the real page
140 * number. */ 139 * number. */
141static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) 140static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write)
142{ 141{
143 unsigned long pfn, base, flags; 142 unsigned long pfn, base, flags;
144 143
@@ -149,7 +148,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write)
149 flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); 148 flags = (pte_flags(gpte) & ~_PAGE_GLOBAL);
150 149
151 /* The Guest's pages are offset inside the Launcher. */ 150 /* The Guest's pages are offset inside the Launcher. */
152 base = (unsigned long)lg->mem_base / PAGE_SIZE; 151 base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE;
153 152
154 /* We need a temporary "unsigned long" variable to hold the answer from 153 /* We need a temporary "unsigned long" variable to hold the answer from
155 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't 154 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
@@ -157,7 +156,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write)
157 * page, given the virtual number. */ 156 * page, given the virtual number. */
158 pfn = get_pfn(base + pte_pfn(gpte), write); 157 pfn = get_pfn(base + pte_pfn(gpte), write);
159 if (pfn == -1UL) { 158 if (pfn == -1UL) {
160 kill_guest(lg, "failed to get page %lu", pte_pfn(gpte)); 159 kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte));
161 /* When we destroy the Guest, we'll go through the shadow page 160 /* When we destroy the Guest, we'll go through the shadow page
162 * tables and release_pte() them. Make sure we don't think 161 * tables and release_pte() them. Make sure we don't think
163 * this one is valid! */ 162 * this one is valid! */
@@ -177,17 +176,18 @@ static void release_pte(pte_t pte)
177} 176}
178/*:*/ 177/*:*/
179 178
180static void check_gpte(struct lguest *lg, pte_t gpte) 179static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
181{ 180{
182 if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) 181 if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE))
183 || pte_pfn(gpte) >= lg->pfn_limit) 182 || pte_pfn(gpte) >= cpu->lg->pfn_limit)
184 kill_guest(lg, "bad page table entry"); 183 kill_guest(cpu, "bad page table entry");
185} 184}
186 185
187static void check_gpgd(struct lguest *lg, pgd_t gpgd) 186static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
188{ 187{
189 if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit) 188 if ((pgd_flags(gpgd) & ~_PAGE_TABLE) ||
190 kill_guest(lg, "bad page directory entry"); 189 (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
190 kill_guest(cpu, "bad page directory entry");
191} 191}
192 192
193/*H:330 193/*H:330
@@ -200,7 +200,7 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd)
200 * 200 *
201 * If we fixed up the fault (ie. we mapped the address), this routine returns 201 * If we fixed up the fault (ie. we mapped the address), this routine returns
202 * true. Otherwise, it was a real fault and we need to tell the Guest. */ 202 * true. Otherwise, it was a real fault and we need to tell the Guest. */
203int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) 203int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
204{ 204{
205 pgd_t gpgd; 205 pgd_t gpgd;
206 pgd_t *spgd; 206 pgd_t *spgd;
@@ -209,24 +209,24 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
209 pte_t *spte; 209 pte_t *spte;
210 210
211 /* First step: get the top-level Guest page table entry. */ 211 /* First step: get the top-level Guest page table entry. */
212 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); 212 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
213 /* Toplevel not present? We can't map it in. */ 213 /* Toplevel not present? We can't map it in. */
214 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 214 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
215 return 0; 215 return 0;
216 216
217 /* Now look at the matching shadow entry. */ 217 /* Now look at the matching shadow entry. */
218 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 218 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
219 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { 219 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
220 /* No shadow entry: allocate a new shadow PTE page. */ 220 /* No shadow entry: allocate a new shadow PTE page. */
221 unsigned long ptepage = get_zeroed_page(GFP_KERNEL); 221 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
222 /* This is not really the Guest's fault, but killing it is 222 /* This is not really the Guest's fault, but killing it is
223 * simple for this corner case. */ 223 * simple for this corner case. */
224 if (!ptepage) { 224 if (!ptepage) {
225 kill_guest(lg, "out of memory allocating pte page"); 225 kill_guest(cpu, "out of memory allocating pte page");
226 return 0; 226 return 0;
227 } 227 }
228 /* We check that the Guest pgd is OK. */ 228 /* We check that the Guest pgd is OK. */
229 check_gpgd(lg, gpgd); 229 check_gpgd(cpu, gpgd);
230 /* And we copy the flags to the shadow PGD entry. The page 230 /* And we copy the flags to the shadow PGD entry. The page
231 * number in the shadow PGD is the page we just allocated. */ 231 * number in the shadow PGD is the page we just allocated. */
232 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); 232 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
@@ -234,8 +234,8 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
234 234
235 /* OK, now we look at the lower level in the Guest page table: keep its 235 /* OK, now we look at the lower level in the Guest page table: keep its
236 * address, because we might update it later. */ 236 * address, because we might update it later. */
237 gpte_ptr = gpte_addr(lg, gpgd, vaddr); 237 gpte_ptr = gpte_addr(gpgd, vaddr);
238 gpte = lgread(lg, gpte_ptr, pte_t); 238 gpte = lgread(cpu, gpte_ptr, pte_t);
239 239
240 /* If this page isn't in the Guest page tables, we can't page it in. */ 240 /* If this page isn't in the Guest page tables, we can't page it in. */
241 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 241 if (!(pte_flags(gpte) & _PAGE_PRESENT))
@@ -252,7 +252,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
252 252
253 /* Check that the Guest PTE flags are OK, and the page number is below 253 /* Check that the Guest PTE flags are OK, and the page number is below
254 * the pfn_limit (ie. not mapping the Launcher binary). */ 254 * the pfn_limit (ie. not mapping the Launcher binary). */
255 check_gpte(lg, gpte); 255 check_gpte(cpu, gpte);
256 256
257 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ 257 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
258 gpte = pte_mkyoung(gpte); 258 gpte = pte_mkyoung(gpte);
@@ -260,7 +260,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
260 gpte = pte_mkdirty(gpte); 260 gpte = pte_mkdirty(gpte);
261 261
262 /* Get the pointer to the shadow PTE entry we're going to set. */ 262 /* Get the pointer to the shadow PTE entry we're going to set. */
263 spte = spte_addr(lg, *spgd, vaddr); 263 spte = spte_addr(*spgd, vaddr);
264 /* If there was a valid shadow PTE entry here before, we release it. 264 /* If there was a valid shadow PTE entry here before, we release it.
265 * This can happen with a write to a previously read-only entry. */ 265 * This can happen with a write to a previously read-only entry. */
266 release_pte(*spte); 266 release_pte(*spte);
@@ -268,17 +268,17 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
268 /* If this is a write, we insist that the Guest page is writable (the 268 /* If this is a write, we insist that the Guest page is writable (the
269 * final arg to gpte_to_spte()). */ 269 * final arg to gpte_to_spte()). */
270 if (pte_dirty(gpte)) 270 if (pte_dirty(gpte))
271 *spte = gpte_to_spte(lg, gpte, 1); 271 *spte = gpte_to_spte(cpu, gpte, 1);
272 else 272 else
273 /* If this is a read, don't set the "writable" bit in the page 273 /* If this is a read, don't set the "writable" bit in the page
274 * table entry, even if the Guest says it's writable. That way 274 * table entry, even if the Guest says it's writable. That way
275 * we will come back here when a write does actually occur, so 275 * we will come back here when a write does actually occur, so
276 * we can update the Guest's _PAGE_DIRTY flag. */ 276 * we can update the Guest's _PAGE_DIRTY flag. */
277 *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0); 277 *spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0);
278 278
279 /* Finally, we write the Guest PTE entry back: we've set the 279 /* Finally, we write the Guest PTE entry back: we've set the
280 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ 280 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
281 lgwrite(lg, gpte_ptr, pte_t, gpte); 281 lgwrite(cpu, gpte_ptr, pte_t, gpte);
282 282
283 /* The fault is fixed, the page table is populated, the mapping 283 /* The fault is fixed, the page table is populated, the mapping
284 * manipulated, the result returned and the code complete. A small 284 * manipulated, the result returned and the code complete. A small
@@ -297,19 +297,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
297 * 297 *
298 * This is a quick version which answers the question: is this virtual address 298 * This is a quick version which answers the question: is this virtual address
299 * mapped by the shadow page tables, and is it writable? */ 299 * mapped by the shadow page tables, and is it writable? */
300static int page_writable(struct lguest *lg, unsigned long vaddr) 300static int page_writable(struct lg_cpu *cpu, unsigned long vaddr)
301{ 301{
302 pgd_t *spgd; 302 pgd_t *spgd;
303 unsigned long flags; 303 unsigned long flags;
304 304
305 /* Look at the current top level entry: is it present? */ 305 /* Look at the current top level entry: is it present? */
306 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 306 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
307 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) 307 if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
308 return 0; 308 return 0;
309 309
310 /* Check the flags on the pte entry itself: it must be present and 310 /* Check the flags on the pte entry itself: it must be present and
311 * writable. */ 311 * writable. */
312 flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); 312 flags = pte_flags(*(spte_addr(*spgd, vaddr)));
313 313
314 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 314 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
315} 315}
@@ -317,10 +317,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr)
317/* So, when pin_stack_pages() asks us to pin a page, we check if it's already 317/* So, when pin_stack_pages() asks us to pin a page, we check if it's already
318 * in the page tables, and if not, we call demand_page() with error code 2 318 * in the page tables, and if not, we call demand_page() with error code 2
319 * (meaning "write"). */ 319 * (meaning "write"). */
320void pin_page(struct lguest *lg, unsigned long vaddr) 320void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
321{ 321{
322 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) 322 if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
323 kill_guest(lg, "bad stack page %#lx", vaddr); 323 kill_guest(cpu, "bad stack page %#lx", vaddr);
324} 324}
325 325
326/*H:450 If we chase down the release_pgd() code, it looks like this: */ 326/*H:450 If we chase down the release_pgd() code, it looks like this: */
@@ -358,28 +358,28 @@ static void flush_user_mappings(struct lguest *lg, int idx)
358 * 358 *
359 * The Guest has a hypercall to throw away the page tables: it's used when a 359 * The Guest has a hypercall to throw away the page tables: it's used when a
360 * large number of mappings have been changed. */ 360 * large number of mappings have been changed. */
361void guest_pagetable_flush_user(struct lguest *lg) 361void guest_pagetable_flush_user(struct lg_cpu *cpu)
362{ 362{
363 /* Drop the userspace part of the current page table. */ 363 /* Drop the userspace part of the current page table. */
364 flush_user_mappings(lg, lg->pgdidx); 364 flush_user_mappings(cpu->lg, cpu->cpu_pgd);
365} 365}
366/*:*/ 366/*:*/
367 367
368/* We walk down the guest page tables to get a guest-physical address */ 368/* We walk down the guest page tables to get a guest-physical address */
369unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) 369unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
370{ 370{
371 pgd_t gpgd; 371 pgd_t gpgd;
372 pte_t gpte; 372 pte_t gpte;
373 373
374 /* First step: get the top-level Guest page table entry. */ 374 /* First step: get the top-level Guest page table entry. */
375 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); 375 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
376 /* Toplevel not present? We can't map it in. */ 376 /* Toplevel not present? We can't map it in. */
377 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 377 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
378 kill_guest(lg, "Bad address %#lx", vaddr); 378 kill_guest(cpu, "Bad address %#lx", vaddr);
379 379
380 gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); 380 gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
381 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 381 if (!(pte_flags(gpte) & _PAGE_PRESENT))
382 kill_guest(lg, "Bad address %#lx", vaddr); 382 kill_guest(cpu, "Bad address %#lx", vaddr);
383 383
384 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); 384 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
385} 385}
@@ -399,7 +399,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
399/*H:435 And this is us, creating the new page directory. If we really do 399/*H:435 And this is us, creating the new page directory. If we really do
400 * allocate a new one (and so the kernel parts are not there), we set 400 * allocate a new one (and so the kernel parts are not there), we set
401 * blank_pgdir. */ 401 * blank_pgdir. */
402static unsigned int new_pgdir(struct lguest *lg, 402static unsigned int new_pgdir(struct lg_cpu *cpu,
403 unsigned long gpgdir, 403 unsigned long gpgdir,
404 int *blank_pgdir) 404 int *blank_pgdir)
405{ 405{
@@ -407,22 +407,23 @@ static unsigned int new_pgdir(struct lguest *lg,
407 407
408 /* We pick one entry at random to throw out. Choosing the Least 408 /* We pick one entry at random to throw out. Choosing the Least
409 * Recently Used might be better, but this is easy. */ 409 * Recently Used might be better, but this is easy. */
410 next = random32() % ARRAY_SIZE(lg->pgdirs); 410 next = random32() % ARRAY_SIZE(cpu->lg->pgdirs);
411 /* If it's never been allocated at all before, try now. */ 411 /* If it's never been allocated at all before, try now. */
412 if (!lg->pgdirs[next].pgdir) { 412 if (!cpu->lg->pgdirs[next].pgdir) {
413 lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); 413 cpu->lg->pgdirs[next].pgdir =
414 (pgd_t *)get_zeroed_page(GFP_KERNEL);
414 /* If the allocation fails, just keep using the one we have */ 415 /* If the allocation fails, just keep using the one we have */
415 if (!lg->pgdirs[next].pgdir) 416 if (!cpu->lg->pgdirs[next].pgdir)
416 next = lg->pgdidx; 417 next = cpu->cpu_pgd;
417 else 418 else
418 /* This is a blank page, so there are no kernel 419 /* This is a blank page, so there are no kernel
419 * mappings: caller must map the stack! */ 420 * mappings: caller must map the stack! */
420 *blank_pgdir = 1; 421 *blank_pgdir = 1;
421 } 422 }
422 /* Record which Guest toplevel this shadows. */ 423 /* Record which Guest toplevel this shadows. */
423 lg->pgdirs[next].gpgdir = gpgdir; 424 cpu->lg->pgdirs[next].gpgdir = gpgdir;
424 /* Release all the non-kernel mappings. */ 425 /* Release all the non-kernel mappings. */
425 flush_user_mappings(lg, next); 426 flush_user_mappings(cpu->lg, next);
426 427
427 return next; 428 return next;
428} 429}
@@ -432,21 +433,21 @@ static unsigned int new_pgdir(struct lguest *lg,
432 * Now we've seen all the page table setting and manipulation, let's see what 433 * Now we've seen all the page table setting and manipulation, let's see what
433 * what happens when the Guest changes page tables (ie. changes the top-level 434 * what happens when the Guest changes page tables (ie. changes the top-level
434 * pgdir). This occurs on almost every context switch. */ 435 * pgdir). This occurs on almost every context switch. */
435void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) 436void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
436{ 437{
437 int newpgdir, repin = 0; 438 int newpgdir, repin = 0;
438 439
439 /* Look to see if we have this one already. */ 440 /* Look to see if we have this one already. */
440 newpgdir = find_pgdir(lg, pgtable); 441 newpgdir = find_pgdir(cpu->lg, pgtable);
441 /* If not, we allocate or mug an existing one: if it's a fresh one, 442 /* If not, we allocate or mug an existing one: if it's a fresh one,
442 * repin gets set to 1. */ 443 * repin gets set to 1. */
443 if (newpgdir == ARRAY_SIZE(lg->pgdirs)) 444 if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
444 newpgdir = new_pgdir(lg, pgtable, &repin); 445 newpgdir = new_pgdir(cpu, pgtable, &repin);
445 /* Change the current pgd index to the new one. */ 446 /* Change the current pgd index to the new one. */
446 lg->pgdidx = newpgdir; 447 cpu->cpu_pgd = newpgdir;
447 /* If it was completely blank, we map in the Guest kernel stack */ 448 /* If it was completely blank, we map in the Guest kernel stack */
448 if (repin) 449 if (repin)
449 pin_stack_pages(lg); 450 pin_stack_pages(cpu);
450} 451}
451 452
452/*H:470 Finally, a routine which throws away everything: all PGD entries in all 453/*H:470 Finally, a routine which throws away everything: all PGD entries in all
@@ -468,11 +469,11 @@ static void release_all_pagetables(struct lguest *lg)
468 * mapping. Since kernel mappings are in every page table, it's easiest to 469 * mapping. Since kernel mappings are in every page table, it's easiest to
469 * throw them all away. This traps the Guest in amber for a while as 470 * throw them all away. This traps the Guest in amber for a while as
470 * everything faults back in, but it's rare. */ 471 * everything faults back in, but it's rare. */
471void guest_pagetable_clear_all(struct lguest *lg) 472void guest_pagetable_clear_all(struct lg_cpu *cpu)
472{ 473{
473 release_all_pagetables(lg); 474 release_all_pagetables(cpu->lg);
474 /* We need the Guest kernel stack mapped again. */ 475 /* We need the Guest kernel stack mapped again. */
475 pin_stack_pages(lg); 476 pin_stack_pages(cpu);
476} 477}
477/*:*/ 478/*:*/
478/*M:009 Since we throw away all mappings when a kernel mapping changes, our 479/*M:009 Since we throw away all mappings when a kernel mapping changes, our
@@ -497,24 +498,24 @@ void guest_pagetable_clear_all(struct lguest *lg)
497 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if 498 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
498 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. 499 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
499 */ 500 */
500static void do_set_pte(struct lguest *lg, int idx, 501static void do_set_pte(struct lg_cpu *cpu, int idx,
501 unsigned long vaddr, pte_t gpte) 502 unsigned long vaddr, pte_t gpte)
502{ 503{
503 /* Look up the matching shadow page directory entry. */ 504 /* Look up the matching shadow page directory entry. */
504 pgd_t *spgd = spgd_addr(lg, idx, vaddr); 505 pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
505 506
506 /* If the top level isn't present, there's no entry to update. */ 507 /* If the top level isn't present, there's no entry to update. */
507 if (pgd_flags(*spgd) & _PAGE_PRESENT) { 508 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
508 /* Otherwise, we start by releasing the existing entry. */ 509 /* Otherwise, we start by releasing the existing entry. */
509 pte_t *spte = spte_addr(lg, *spgd, vaddr); 510 pte_t *spte = spte_addr(*spgd, vaddr);
510 release_pte(*spte); 511 release_pte(*spte);
511 512
512 /* If they're setting this entry as dirty or accessed, we might 513 /* If they're setting this entry as dirty or accessed, we might
513 * as well put that entry they've given us in now. This shaves 514 * as well put that entry they've given us in now. This shaves
514 * 10% off a copy-on-write micro-benchmark. */ 515 * 10% off a copy-on-write micro-benchmark. */
515 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 516 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
516 check_gpte(lg, gpte); 517 check_gpte(cpu, gpte);
517 *spte = gpte_to_spte(lg, gpte, 518 *spte = gpte_to_spte(cpu, gpte,
518 pte_flags(gpte) & _PAGE_DIRTY); 519 pte_flags(gpte) & _PAGE_DIRTY);
519 } else 520 } else
520 /* Otherwise kill it and we can demand_page() it in 521 /* Otherwise kill it and we can demand_page() it in
@@ -533,22 +534,22 @@ static void do_set_pte(struct lguest *lg, int idx,
533 * 534 *
534 * The benefit is that when we have to track a new page table, we can copy keep 535 * The benefit is that when we have to track a new page table, we can copy keep
535 * all the kernel mappings. This speeds up context switch immensely. */ 536 * all the kernel mappings. This speeds up context switch immensely. */
536void guest_set_pte(struct lguest *lg, 537void guest_set_pte(struct lg_cpu *cpu,
537 unsigned long gpgdir, unsigned long vaddr, pte_t gpte) 538 unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
538{ 539{
539 /* Kernel mappings must be changed on all top levels. Slow, but 540 /* Kernel mappings must be changed on all top levels. Slow, but
540 * doesn't happen often. */ 541 * doesn't happen often. */
541 if (vaddr >= lg->kernel_address) { 542 if (vaddr >= cpu->lg->kernel_address) {
542 unsigned int i; 543 unsigned int i;
543 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 544 for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
544 if (lg->pgdirs[i].pgdir) 545 if (cpu->lg->pgdirs[i].pgdir)
545 do_set_pte(lg, i, vaddr, gpte); 546 do_set_pte(cpu, i, vaddr, gpte);
546 } else { 547 } else {
547 /* Is this page table one we have a shadow for? */ 548 /* Is this page table one we have a shadow for? */
548 int pgdir = find_pgdir(lg, gpgdir); 549 int pgdir = find_pgdir(cpu->lg, gpgdir);
549 if (pgdir != ARRAY_SIZE(lg->pgdirs)) 550 if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs))
550 /* If so, do the update. */ 551 /* If so, do the update. */
551 do_set_pte(lg, pgdir, vaddr, gpte); 552 do_set_pte(cpu, pgdir, vaddr, gpte);
552 } 553 }
553} 554}
554 555
@@ -590,30 +591,32 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
590{ 591{
591 /* We start on the first shadow page table, and give it a blank PGD 592 /* We start on the first shadow page table, and give it a blank PGD
592 * page. */ 593 * page. */
593 lg->pgdidx = 0; 594 lg->pgdirs[0].gpgdir = pgtable;
594 lg->pgdirs[lg->pgdidx].gpgdir = pgtable; 595 lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
595 lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); 596 if (!lg->pgdirs[0].pgdir)
596 if (!lg->pgdirs[lg->pgdidx].pgdir)
597 return -ENOMEM; 597 return -ENOMEM;
598 lg->cpus[0].cpu_pgd = 0;
598 return 0; 599 return 0;
599} 600}
600 601
601/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ 602/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
602void page_table_guest_data_init(struct lguest *lg) 603void page_table_guest_data_init(struct lg_cpu *cpu)
603{ 604{
604 /* We get the kernel address: above this is all kernel memory. */ 605 /* We get the kernel address: above this is all kernel memory. */
605 if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address) 606 if (get_user(cpu->lg->kernel_address,
607 &cpu->lg->lguest_data->kernel_address)
606 /* We tell the Guest that it can't use the top 4MB of virtual 608 /* We tell the Guest that it can't use the top 4MB of virtual
607 * addresses used by the Switcher. */ 609 * addresses used by the Switcher. */
608 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) 610 || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem)
609 || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) 611 || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir))
610 kill_guest(lg, "bad guest page %p", lg->lguest_data); 612 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
611 613
612 /* In flush_user_mappings() we loop from 0 to 614 /* In flush_user_mappings() we loop from 0 to
613 * "pgd_index(lg->kernel_address)". This assumes it won't hit the 615 * "pgd_index(lg->kernel_address)". This assumes it won't hit the
614 * Switcher mappings, so check that now. */ 616 * Switcher mappings, so check that now. */
615 if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX) 617 if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
616 kill_guest(lg, "bad kernel address %#lx", lg->kernel_address); 618 kill_guest(cpu, "bad kernel address %#lx",
619 cpu->lg->kernel_address);
617} 620}
618 621
619/* When a Guest dies, our cleanup is fairly simple. */ 622/* When a Guest dies, our cleanup is fairly simple. */
@@ -634,17 +637,18 @@ void free_guest_pagetable(struct lguest *lg)
634 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages 637 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages
635 * for each CPU already set up, we just need to hook them in now we know which 638 * for each CPU already set up, we just need to hook them in now we know which
636 * Guest is about to run on this CPU. */ 639 * Guest is about to run on this CPU. */
637void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) 640void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
638{ 641{
639 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 642 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
640 pgd_t switcher_pgd; 643 pgd_t switcher_pgd;
641 pte_t regs_pte; 644 pte_t regs_pte;
645 unsigned long pfn;
642 646
643 /* Make the last PGD entry for this Guest point to the Switcher's PTE 647 /* Make the last PGD entry for this Guest point to the Switcher's PTE
644 * page for this CPU (with appropriate flags). */ 648 * page for this CPU (with appropriate flags). */
645 switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); 649 switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
646 650
647 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 651 cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
648 652
649 /* We also change the Switcher PTE page. When we're running the Guest, 653 /* We also change the Switcher PTE page. When we're running the Guest,
650 * we want the Guest's "regs" page to appear where the first Switcher 654 * we want the Guest's "regs" page to appear where the first Switcher
@@ -653,7 +657,8 @@ void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
653 * CPU's "struct lguest_pages": if we make sure the Guest's register 657 * CPU's "struct lguest_pages": if we make sure the Guest's register
654 * page is already mapped there, we don't have to copy them out 658 * page is already mapped there, we don't have to copy them out
655 * again. */ 659 * again. */
656 regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL)); 660 pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
661 regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
657 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; 662 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
658} 663}
659/*:*/ 664/*:*/
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 9e189cbec7dd..ec6aa3f1c36b 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -58,7 +58,7 @@ static int ignored_gdt(unsigned int num)
58 * Protection Fault in the Switcher when it restores a Guest segment register 58 * Protection Fault in the Switcher when it restores a Guest segment register
59 * which tries to use that entry. Then we kill the Guest for causing such a 59 * which tries to use that entry. Then we kill the Guest for causing such a
60 * mess: the message will be "unhandled trap 256". */ 60 * mess: the message will be "unhandled trap 256". */
61static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) 61static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end)
62{ 62{
63 unsigned int i; 63 unsigned int i;
64 64
@@ -71,14 +71,14 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
71 /* Segment descriptors contain a privilege level: the Guest is 71 /* Segment descriptors contain a privilege level: the Guest is
72 * sometimes careless and leaves this as 0, even though it's 72 * sometimes careless and leaves this as 0, even though it's
73 * running at privilege level 1. If so, we fix it here. */ 73 * running at privilege level 1. If so, we fix it here. */
74 if ((lg->arch.gdt[i].b & 0x00006000) == 0) 74 if ((cpu->arch.gdt[i].b & 0x00006000) == 0)
75 lg->arch.gdt[i].b |= (GUEST_PL << 13); 75 cpu->arch.gdt[i].b |= (GUEST_PL << 13);
76 76
77 /* Each descriptor has an "accessed" bit. If we don't set it 77 /* Each descriptor has an "accessed" bit. If we don't set it
78 * now, the CPU will try to set it when the Guest first loads 78 * now, the CPU will try to set it when the Guest first loads
79 * that entry into a segment register. But the GDT isn't 79 * that entry into a segment register. But the GDT isn't
80 * writable by the Guest, so bad things can happen. */ 80 * writable by the Guest, so bad things can happen. */
81 lg->arch.gdt[i].b |= 0x00000100; 81 cpu->arch.gdt[i].b |= 0x00000100;
82 } 82 }
83} 83}
84 84
@@ -109,31 +109,31 @@ void setup_default_gdt_entries(struct lguest_ro_state *state)
109 109
110/* This routine sets up the initial Guest GDT for booting. All entries start 110/* This routine sets up the initial Guest GDT for booting. All entries start
111 * as 0 (unusable). */ 111 * as 0 (unusable). */
112void setup_guest_gdt(struct lguest *lg) 112void setup_guest_gdt(struct lg_cpu *cpu)
113{ 113{
114 /* Start with full 0-4G segments... */ 114 /* Start with full 0-4G segments... */
115 lg->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; 115 cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
116 lg->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; 116 cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
117 /* ...except the Guest is allowed to use them, so set the privilege 117 /* ...except the Guest is allowed to use them, so set the privilege
118 * level appropriately in the flags. */ 118 * level appropriately in the flags. */
119 lg->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); 119 cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
120 lg->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); 120 cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
121} 121}
122 122
123/*H:650 An optimization of copy_gdt(), for just the three "thead-local storage" 123/*H:650 An optimization of copy_gdt(), for just the three "thead-local storage"
124 * entries. */ 124 * entries. */
125void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) 125void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt)
126{ 126{
127 unsigned int i; 127 unsigned int i;
128 128
129 for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++) 129 for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++)
130 gdt[i] = lg->arch.gdt[i]; 130 gdt[i] = cpu->arch.gdt[i];
131} 131}
132 132
133/*H:640 When the Guest is run on a different CPU, or the GDT entries have 133/*H:640 When the Guest is run on a different CPU, or the GDT entries have
134 * changed, copy_gdt() is called to copy the Guest's GDT entries across to this 134 * changed, copy_gdt() is called to copy the Guest's GDT entries across to this
135 * CPU's GDT. */ 135 * CPU's GDT. */
136void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) 136void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt)
137{ 137{
138 unsigned int i; 138 unsigned int i;
139 139
@@ -141,38 +141,38 @@ void copy_gdt(const struct lguest *lg, struct desc_struct *gdt)
141 * replaced. See ignored_gdt() above. */ 141 * replaced. See ignored_gdt() above. */
142 for (i = 0; i < GDT_ENTRIES; i++) 142 for (i = 0; i < GDT_ENTRIES; i++)
143 if (!ignored_gdt(i)) 143 if (!ignored_gdt(i))
144 gdt[i] = lg->arch.gdt[i]; 144 gdt[i] = cpu->arch.gdt[i];
145} 145}
146 146
147/*H:620 This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). 147/*H:620 This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT).
148 * We copy it from the Guest and tweak the entries. */ 148 * We copy it from the Guest and tweak the entries. */
149void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) 149void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num)
150{ 150{
151 /* We assume the Guest has the same number of GDT entries as the 151 /* We assume the Guest has the same number of GDT entries as the
152 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ 152 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
153 if (num > ARRAY_SIZE(lg->arch.gdt)) 153 if (num > ARRAY_SIZE(cpu->arch.gdt))
154 kill_guest(lg, "too many gdt entries %i", num); 154 kill_guest(cpu, "too many gdt entries %i", num);
155 155
156 /* We read the whole thing in, then fix it up. */ 156 /* We read the whole thing in, then fix it up. */
157 __lgread(lg, lg->arch.gdt, table, num * sizeof(lg->arch.gdt[0])); 157 __lgread(cpu, cpu->arch.gdt, table, num * sizeof(cpu->arch.gdt[0]));
158 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->arch.gdt)); 158 fixup_gdt_table(cpu, 0, ARRAY_SIZE(cpu->arch.gdt));
159 /* Mark that the GDT changed so the core knows it has to copy it again, 159 /* Mark that the GDT changed so the core knows it has to copy it again,
160 * even if the Guest is run on the same CPU. */ 160 * even if the Guest is run on the same CPU. */
161 lg->changed |= CHANGED_GDT; 161 cpu->changed |= CHANGED_GDT;
162} 162}
163 163
164/* This is the fast-track version for just changing the three TLS entries. 164/* This is the fast-track version for just changing the three TLS entries.
165 * Remember that this happens on every context switch, so it's worth 165 * Remember that this happens on every context switch, so it's worth
166 * optimizing. But wouldn't it be neater to have a single hypercall to cover 166 * optimizing. But wouldn't it be neater to have a single hypercall to cover
167 * both cases? */ 167 * both cases? */
168void guest_load_tls(struct lguest *lg, unsigned long gtls) 168void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls)
169{ 169{
170 struct desc_struct *tls = &lg->arch.gdt[GDT_ENTRY_TLS_MIN]; 170 struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN];
171 171
172 __lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); 172 __lgread(cpu, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
173 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); 173 fixup_gdt_table(cpu, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
174 /* Note that just the TLS entries have changed. */ 174 /* Note that just the TLS entries have changed. */
175 lg->changed |= CHANGED_GDT_TLS; 175 cpu->changed |= CHANGED_GDT_TLS;
176} 176}
177/*:*/ 177/*:*/
178 178
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 44adb00e1490..61f2f8eb8cad 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -60,7 +60,7 @@ static struct lguest_pages *lguest_pages(unsigned int cpu)
60 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); 60 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
61} 61}
62 62
63static DEFINE_PER_CPU(struct lguest *, last_guest); 63static DEFINE_PER_CPU(struct lg_cpu *, last_cpu);
64 64
65/*S:010 65/*S:010
66 * We approach the Switcher. 66 * We approach the Switcher.
@@ -73,16 +73,16 @@ static DEFINE_PER_CPU(struct lguest *, last_guest);
73 * since it last ran. We saw this set in interrupts_and_traps.c and 73 * since it last ran. We saw this set in interrupts_and_traps.c and
74 * segments.c. 74 * segments.c.
75 */ 75 */
76static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) 76static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages)
77{ 77{
78 /* Copying all this data can be quite expensive. We usually run the 78 /* Copying all this data can be quite expensive. We usually run the
79 * same Guest we ran last time (and that Guest hasn't run anywhere else 79 * same Guest we ran last time (and that Guest hasn't run anywhere else
80 * meanwhile). If that's not the case, we pretend everything in the 80 * meanwhile). If that's not the case, we pretend everything in the
81 * Guest has changed. */ 81 * Guest has changed. */
82 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { 82 if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) {
83 __get_cpu_var(last_guest) = lg; 83 __get_cpu_var(last_cpu) = cpu;
84 lg->last_pages = pages; 84 cpu->last_pages = pages;
85 lg->changed = CHANGED_ALL; 85 cpu->changed = CHANGED_ALL;
86 } 86 }
87 87
88 /* These copies are pretty cheap, so we do them unconditionally: */ 88 /* These copies are pretty cheap, so we do them unconditionally: */
@@ -90,42 +90,42 @@ static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
90 pages->state.host_cr3 = __pa(current->mm->pgd); 90 pages->state.host_cr3 = __pa(current->mm->pgd);
91 /* Set up the Guest's page tables to see this CPU's pages (and no 91 /* Set up the Guest's page tables to see this CPU's pages (and no
92 * other CPU's pages). */ 92 * other CPU's pages). */
93 map_switcher_in_guest(lg, pages); 93 map_switcher_in_guest(cpu, pages);
94 /* Set up the two "TSS" members which tell the CPU what stack to use 94 /* Set up the two "TSS" members which tell the CPU what stack to use
95 * for traps which do directly into the Guest (ie. traps at privilege 95 * for traps which do directly into the Guest (ie. traps at privilege
96 * level 1). */ 96 * level 1). */
97 pages->state.guest_tss.sp1 = lg->esp1; 97 pages->state.guest_tss.esp1 = cpu->esp1;
98 pages->state.guest_tss.ss1 = lg->ss1; 98 pages->state.guest_tss.ss1 = cpu->ss1;
99 99
100 /* Copy direct-to-Guest trap entries. */ 100 /* Copy direct-to-Guest trap entries. */
101 if (lg->changed & CHANGED_IDT) 101 if (cpu->changed & CHANGED_IDT)
102 copy_traps(lg, pages->state.guest_idt, default_idt_entries); 102 copy_traps(cpu, pages->state.guest_idt, default_idt_entries);
103 103
104 /* Copy all GDT entries which the Guest can change. */ 104 /* Copy all GDT entries which the Guest can change. */
105 if (lg->changed & CHANGED_GDT) 105 if (cpu->changed & CHANGED_GDT)
106 copy_gdt(lg, pages->state.guest_gdt); 106 copy_gdt(cpu, pages->state.guest_gdt);
107 /* If only the TLS entries have changed, copy them. */ 107 /* If only the TLS entries have changed, copy them. */
108 else if (lg->changed & CHANGED_GDT_TLS) 108 else if (cpu->changed & CHANGED_GDT_TLS)
109 copy_gdt_tls(lg, pages->state.guest_gdt); 109 copy_gdt_tls(cpu, pages->state.guest_gdt);
110 110
111 /* Mark the Guest as unchanged for next time. */ 111 /* Mark the Guest as unchanged for next time. */
112 lg->changed = 0; 112 cpu->changed = 0;
113} 113}
114 114
115/* Finally: the code to actually call into the Switcher to run the Guest. */ 115/* Finally: the code to actually call into the Switcher to run the Guest. */
116static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) 116static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
117{ 117{
118 /* This is a dummy value we need for GCC's sake. */ 118 /* This is a dummy value we need for GCC's sake. */
119 unsigned int clobber; 119 unsigned int clobber;
120 120
121 /* Copy the guest-specific information into this CPU's "struct 121 /* Copy the guest-specific information into this CPU's "struct
122 * lguest_pages". */ 122 * lguest_pages". */
123 copy_in_guest_info(lg, pages); 123 copy_in_guest_info(cpu, pages);
124 124
125 /* Set the trap number to 256 (impossible value). If we fault while 125 /* Set the trap number to 256 (impossible value). If we fault while
126 * switching to the Guest (bad segment registers or bug), this will 126 * switching to the Guest (bad segment registers or bug), this will
127 * cause us to abort the Guest. */ 127 * cause us to abort the Guest. */
128 lg->regs->trapnum = 256; 128 cpu->regs->trapnum = 256;
129 129
130 /* Now: we push the "eflags" register on the stack, then do an "lcall". 130 /* Now: we push the "eflags" register on the stack, then do an "lcall".
131 * This is how we change from using the kernel code segment to using 131 * This is how we change from using the kernel code segment to using
@@ -143,7 +143,7 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
143 * 0-th argument above, ie "a"). %ebx contains the 143 * 0-th argument above, ie "a"). %ebx contains the
144 * physical address of the Guest's top-level page 144 * physical address of the Guest's top-level page
145 * directory. */ 145 * directory. */
146 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) 146 : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir))
147 /* We tell gcc that all these registers could change, 147 /* We tell gcc that all these registers could change,
148 * which means we don't have to save and restore them in 148 * which means we don't have to save and restore them in
149 * the Switcher. */ 149 * the Switcher. */
@@ -161,12 +161,12 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
161 161
162/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts 162/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts
163 * are disabled: we own the CPU. */ 163 * are disabled: we own the CPU. */
164void lguest_arch_run_guest(struct lguest *lg) 164void lguest_arch_run_guest(struct lg_cpu *cpu)
165{ 165{
166 /* Remember the awfully-named TS bit? If the Guest has asked to set it 166 /* Remember the awfully-named TS bit? If the Guest has asked to set it
167 * we set it now, so we can trap and pass that trap to the Guest if it 167 * we set it now, so we can trap and pass that trap to the Guest if it
168 * uses the FPU. */ 168 * uses the FPU. */
169 if (lg->ts) 169 if (cpu->ts)
170 lguest_set_ts(); 170 lguest_set_ts();
171 171
172 /* SYSENTER is an optimized way of doing system calls. We can't allow 172 /* SYSENTER is an optimized way of doing system calls. We can't allow
@@ -180,7 +180,7 @@ void lguest_arch_run_guest(struct lguest *lg)
180 /* Now we actually run the Guest. It will return when something 180 /* Now we actually run the Guest. It will return when something
181 * interesting happens, and we can examine its registers to see what it 181 * interesting happens, and we can examine its registers to see what it
182 * was doing. */ 182 * was doing. */
183 run_guest_once(lg, lguest_pages(raw_smp_processor_id())); 183 run_guest_once(cpu, lguest_pages(raw_smp_processor_id()));
184 184
185 /* Note that the "regs" pointer contains two extra entries which are 185 /* Note that the "regs" pointer contains two extra entries which are
186 * not really registers: a trap number which says what interrupt or 186 * not really registers: a trap number which says what interrupt or
@@ -191,11 +191,11 @@ void lguest_arch_run_guest(struct lguest *lg)
191 * bad virtual address. We have to grab this now, because once we 191 * bad virtual address. We have to grab this now, because once we
192 * re-enable interrupts an interrupt could fault and thus overwrite 192 * re-enable interrupts an interrupt could fault and thus overwrite
193 * cr2, or we could even move off to a different CPU. */ 193 * cr2, or we could even move off to a different CPU. */
194 if (lg->regs->trapnum == 14) 194 if (cpu->regs->trapnum == 14)
195 lg->arch.last_pagefault = read_cr2(); 195 cpu->arch.last_pagefault = read_cr2();
196 /* Similarly, if we took a trap because the Guest used the FPU, 196 /* Similarly, if we took a trap because the Guest used the FPU,
197 * we have to restore the FPU it expects to see. */ 197 * we have to restore the FPU it expects to see. */
198 else if (lg->regs->trapnum == 7) 198 else if (cpu->regs->trapnum == 7)
199 math_state_restore(); 199 math_state_restore();
200 200
201 /* Restore SYSENTER if it's supposed to be on. */ 201 /* Restore SYSENTER if it's supposed to be on. */
@@ -214,22 +214,22 @@ void lguest_arch_run_guest(struct lguest *lg)
214 * When the Guest uses one of these instructions, we get a trap (General 214 * When the Guest uses one of these instructions, we get a trap (General
215 * Protection Fault) and come here. We see if it's one of those troublesome 215 * Protection Fault) and come here. We see if it's one of those troublesome
216 * instructions and skip over it. We return true if we did. */ 216 * instructions and skip over it. We return true if we did. */
217static int emulate_insn(struct lguest *lg) 217static int emulate_insn(struct lg_cpu *cpu)
218{ 218{
219 u8 insn; 219 u8 insn;
220 unsigned int insnlen = 0, in = 0, shift = 0; 220 unsigned int insnlen = 0, in = 0, shift = 0;
221 /* The eip contains the *virtual* address of the Guest's instruction: 221 /* The eip contains the *virtual* address of the Guest's instruction:
222 * guest_pa just subtracts the Guest's page_offset. */ 222 * guest_pa just subtracts the Guest's page_offset. */
223 unsigned long physaddr = guest_pa(lg, lg->regs->eip); 223 unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
224 224
225 /* This must be the Guest kernel trying to do something, not userspace! 225 /* This must be the Guest kernel trying to do something, not userspace!
226 * The bottom two bits of the CS segment register are the privilege 226 * The bottom two bits of the CS segment register are the privilege
227 * level. */ 227 * level. */
228 if ((lg->regs->cs & 3) != GUEST_PL) 228 if ((cpu->regs->cs & 3) != GUEST_PL)
229 return 0; 229 return 0;
230 230
231 /* Decoding x86 instructions is icky. */ 231 /* Decoding x86 instructions is icky. */
232 insn = lgread(lg, physaddr, u8); 232 insn = lgread(cpu, physaddr, u8);
233 233
234 /* 0x66 is an "operand prefix". It means it's using the upper 16 bits 234 /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
235 of the eax register. */ 235 of the eax register. */
@@ -237,7 +237,7 @@ static int emulate_insn(struct lguest *lg)
237 shift = 16; 237 shift = 16;
238 /* The instruction is 1 byte so far, read the next byte. */ 238 /* The instruction is 1 byte so far, read the next byte. */
239 insnlen = 1; 239 insnlen = 1;
240 insn = lgread(lg, physaddr + insnlen, u8); 240 insn = lgread(cpu, physaddr + insnlen, u8);
241 } 241 }
242 242
243 /* We can ignore the lower bit for the moment and decode the 4 opcodes 243 /* We can ignore the lower bit for the moment and decode the 4 opcodes
@@ -268,26 +268,26 @@ static int emulate_insn(struct lguest *lg)
268 if (in) { 268 if (in) {
269 /* Lower bit tells is whether it's a 16 or 32 bit access */ 269 /* Lower bit tells is whether it's a 16 or 32 bit access */
270 if (insn & 0x1) 270 if (insn & 0x1)
271 lg->regs->eax = 0xFFFFFFFF; 271 cpu->regs->eax = 0xFFFFFFFF;
272 else 272 else
273 lg->regs->eax |= (0xFFFF << shift); 273 cpu->regs->eax |= (0xFFFF << shift);
274 } 274 }
275 /* Finally, we've "done" the instruction, so move past it. */ 275 /* Finally, we've "done" the instruction, so move past it. */
276 lg->regs->eip += insnlen; 276 cpu->regs->eip += insnlen;
277 /* Success! */ 277 /* Success! */
278 return 1; 278 return 1;
279} 279}
280 280
281/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ 281/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
282void lguest_arch_handle_trap(struct lguest *lg) 282void lguest_arch_handle_trap(struct lg_cpu *cpu)
283{ 283{
284 switch (lg->regs->trapnum) { 284 switch (cpu->regs->trapnum) {
285 case 13: /* We've intercepted a General Protection Fault. */ 285 case 13: /* We've intercepted a General Protection Fault. */
286 /* Check if this was one of those annoying IN or OUT 286 /* Check if this was one of those annoying IN or OUT
287 * instructions which we need to emulate. If so, we just go 287 * instructions which we need to emulate. If so, we just go
288 * back into the Guest after we've done it. */ 288 * back into the Guest after we've done it. */
289 if (lg->regs->errcode == 0) { 289 if (cpu->regs->errcode == 0) {
290 if (emulate_insn(lg)) 290 if (emulate_insn(cpu))
291 return; 291 return;
292 } 292 }
293 break; 293 break;
@@ -301,7 +301,8 @@ void lguest_arch_handle_trap(struct lguest *lg)
301 * 301 *
302 * The errcode tells whether this was a read or a write, and 302 * The errcode tells whether this was a read or a write, and
303 * whether kernel or userspace code. */ 303 * whether kernel or userspace code. */
304 if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode)) 304 if (demand_page(cpu, cpu->arch.last_pagefault,
305 cpu->regs->errcode))
305 return; 306 return;
306 307
307 /* OK, it's really not there (or not OK): the Guest needs to 308 /* OK, it's really not there (or not OK): the Guest needs to
@@ -311,15 +312,16 @@ void lguest_arch_handle_trap(struct lguest *lg)
311 * Note that if the Guest were really messed up, this could 312 * Note that if the Guest were really messed up, this could
312 * happen before it's done the LHCALL_LGUEST_INIT hypercall, so 313 * happen before it's done the LHCALL_LGUEST_INIT hypercall, so
313 * lg->lguest_data could be NULL */ 314 * lg->lguest_data could be NULL */
314 if (lg->lguest_data && 315 if (cpu->lg->lguest_data &&
315 put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2)) 316 put_user(cpu->arch.last_pagefault,
316 kill_guest(lg, "Writing cr2"); 317 &cpu->lg->lguest_data->cr2))
318 kill_guest(cpu, "Writing cr2");
317 break; 319 break;
318 case 7: /* We've intercepted a Device Not Available fault. */ 320 case 7: /* We've intercepted a Device Not Available fault. */
319 /* If the Guest doesn't want to know, we already restored the 321 /* If the Guest doesn't want to know, we already restored the
320 * Floating Point Unit, so we just continue without telling 322 * Floating Point Unit, so we just continue without telling
321 * it. */ 323 * it. */
322 if (!lg->ts) 324 if (!cpu->ts)
323 return; 325 return;
324 break; 326 break;
325 case 32 ... 255: 327 case 32 ... 255:
@@ -332,19 +334,19 @@ void lguest_arch_handle_trap(struct lguest *lg)
332 case LGUEST_TRAP_ENTRY: 334 case LGUEST_TRAP_ENTRY:
333 /* Our 'struct hcall_args' maps directly over our regs: we set 335 /* Our 'struct hcall_args' maps directly over our regs: we set
334 * up the pointer now to indicate a hypercall is pending. */ 336 * up the pointer now to indicate a hypercall is pending. */
335 lg->hcall = (struct hcall_args *)lg->regs; 337 cpu->hcall = (struct hcall_args *)cpu->regs;
336 return; 338 return;
337 } 339 }
338 340
339 /* We didn't handle the trap, so it needs to go to the Guest. */ 341 /* We didn't handle the trap, so it needs to go to the Guest. */
340 if (!deliver_trap(lg, lg->regs->trapnum)) 342 if (!deliver_trap(cpu, cpu->regs->trapnum))
341 /* If the Guest doesn't have a handler (either it hasn't 343 /* If the Guest doesn't have a handler (either it hasn't
342 * registered any yet, or it's one of the faults we don't let 344 * registered any yet, or it's one of the faults we don't let
343 * it handle), it dies with a cryptic error message. */ 345 * it handle), it dies with a cryptic error message. */
344 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", 346 kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)",
345 lg->regs->trapnum, lg->regs->eip, 347 cpu->regs->trapnum, cpu->regs->eip,
346 lg->regs->trapnum == 14 ? lg->arch.last_pagefault 348 cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault
347 : lg->regs->errcode); 349 : cpu->regs->errcode);
348} 350}
349 351
350/* Now we can look at each of the routines this calls, in increasing order of 352/* Now we can look at each of the routines this calls, in increasing order of
@@ -487,17 +489,17 @@ void __exit lguest_arch_host_fini(void)
487 489
488 490
489/*H:122 The i386-specific hypercalls simply farm out to the right functions. */ 491/*H:122 The i386-specific hypercalls simply farm out to the right functions. */
490int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args) 492int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
491{ 493{
492 switch (args->arg0) { 494 switch (args->arg0) {
493 case LHCALL_LOAD_GDT: 495 case LHCALL_LOAD_GDT:
494 load_guest_gdt(lg, args->arg1, args->arg2); 496 load_guest_gdt(cpu, args->arg1, args->arg2);
495 break; 497 break;
496 case LHCALL_LOAD_IDT_ENTRY: 498 case LHCALL_LOAD_IDT_ENTRY:
497 load_guest_idt_entry(lg, args->arg1, args->arg2, args->arg3); 499 load_guest_idt_entry(cpu, args->arg1, args->arg2, args->arg3);
498 break; 500 break;
499 case LHCALL_LOAD_TLS: 501 case LHCALL_LOAD_TLS:
500 guest_load_tls(lg, args->arg1); 502 guest_load_tls(cpu, args->arg1);
501 break; 503 break;
502 default: 504 default:
503 /* Bad Guest. Bad! */ 505 /* Bad Guest. Bad! */
@@ -507,13 +509,14 @@ int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args)
507} 509}
508 510
509/*H:126 i386-specific hypercall initialization: */ 511/*H:126 i386-specific hypercall initialization: */
510int lguest_arch_init_hypercalls(struct lguest *lg) 512int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
511{ 513{
512 u32 tsc_speed; 514 u32 tsc_speed;
513 515
514 /* The pointer to the Guest's "struct lguest_data" is the only 516 /* The pointer to the Guest's "struct lguest_data" is the only
515 * argument. We check that address now. */ 517 * argument. We check that address now. */
516 if (!lguest_address_ok(lg, lg->hcall->arg1, sizeof(*lg->lguest_data))) 518 if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1,
519 sizeof(*cpu->lg->lguest_data)))
517 return -EFAULT; 520 return -EFAULT;
518 521
519 /* Having checked it, we simply set lg->lguest_data to point straight 522 /* Having checked it, we simply set lg->lguest_data to point straight
@@ -521,7 +524,7 @@ int lguest_arch_init_hypercalls(struct lguest *lg)
521 * copy_to_user/from_user from now on, instead of lgread/write. I put 524 * copy_to_user/from_user from now on, instead of lgread/write. I put
522 * this in to show that I'm not immune to writing stupid 525 * this in to show that I'm not immune to writing stupid
523 * optimizations. */ 526 * optimizations. */
524 lg->lguest_data = lg->mem_base + lg->hcall->arg1; 527 cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1;
525 528
526 /* We insist that the Time Stamp Counter exist and doesn't change with 529 /* We insist that the Time Stamp Counter exist and doesn't change with
527 * cpu frequency. Some devious chip manufacturers decided that TSC 530 * cpu frequency. Some devious chip manufacturers decided that TSC
@@ -534,12 +537,12 @@ int lguest_arch_init_hypercalls(struct lguest *lg)
534 tsc_speed = tsc_khz; 537 tsc_speed = tsc_khz;
535 else 538 else
536 tsc_speed = 0; 539 tsc_speed = 0;
537 if (put_user(tsc_speed, &lg->lguest_data->tsc_khz)) 540 if (put_user(tsc_speed, &cpu->lg->lguest_data->tsc_khz))
538 return -EFAULT; 541 return -EFAULT;
539 542
540 /* The interrupt code might not like the system call vector. */ 543 /* The interrupt code might not like the system call vector. */
541 if (!check_syscall_vector(lg)) 544 if (!check_syscall_vector(cpu->lg))
542 kill_guest(lg, "bad syscall vector"); 545 kill_guest(cpu, "bad syscall vector");
543 546
544 return 0; 547 return 0;
545} 548}
@@ -548,9 +551,9 @@ int lguest_arch_init_hypercalls(struct lguest *lg)
548 * 551 *
549 * Most of the Guest's registers are left alone: we used get_zeroed_page() to 552 * Most of the Guest's registers are left alone: we used get_zeroed_page() to
550 * allocate the structure, so they will be 0. */ 553 * allocate the structure, so they will be 0. */
551void lguest_arch_setup_regs(struct lguest *lg, unsigned long start) 554void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
552{ 555{
553 struct lguest_regs *regs = lg->regs; 556 struct lguest_regs *regs = cpu->regs;
554 557
555 /* There are four "segment" registers which the Guest needs to boot: 558 /* There are four "segment" registers which the Guest needs to boot:
556 * The "code segment" register (cs) refers to the kernel code segment 559 * The "code segment" register (cs) refers to the kernel code segment
@@ -577,5 +580,5 @@ void lguest_arch_setup_regs(struct lguest *lg, unsigned long start)
577 580
578 /* There are a couple of GDT entries the Guest expects when first 581 /* There are a couple of GDT entries the Guest expects when first
579 * booting. */ 582 * booting. */
580 setup_guest_gdt(lg); 583 setup_guest_gdt(cpu);
581} 584}