aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/page_tables.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/lguest/page_tables.c')
-rw-r--r--drivers/lguest/page_tables.c179
1 files changed, 92 insertions, 87 deletions
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index fffabb327157..74b4cf2a6c41 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -68,23 +68,23 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
68 * page directory entry (PGD) for that address. Since we keep track of several 68 * page directory entry (PGD) for that address. Since we keep track of several
69 * page tables, the "i" argument tells us which one we're interested in (it's 69 * page tables, the "i" argument tells us which one we're interested in (it's
70 * usually the current one). */ 70 * usually the current one). */
71static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) 71static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
72{ 72{
73 unsigned int index = pgd_index(vaddr); 73 unsigned int index = pgd_index(vaddr);
74 74
75 /* We kill any Guest trying to touch the Switcher addresses. */ 75 /* We kill any Guest trying to touch the Switcher addresses. */
76 if (index >= SWITCHER_PGD_INDEX) { 76 if (index >= SWITCHER_PGD_INDEX) {
77 kill_guest(lg, "attempt to access switcher pages"); 77 kill_guest(cpu, "attempt to access switcher pages");
78 index = 0; 78 index = 0;
79 } 79 }
80 /* Return a pointer index'th pgd entry for the i'th page table. */ 80 /* Return a pointer index'th pgd entry for the i'th page table. */
81 return &lg->pgdirs[i].pgdir[index]; 81 return &cpu->lg->pgdirs[i].pgdir[index];
82} 82}
83 83
84/* This routine then takes the page directory entry returned above, which 84/* This routine then takes the page directory entry returned above, which
85 * contains the address of the page table entry (PTE) page. It then returns a 85 * contains the address of the page table entry (PTE) page. It then returns a
86 * pointer to the PTE entry for the given address. */ 86 * pointer to the PTE entry for the given address. */
87static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) 87static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
88{ 88{
89 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); 89 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
90 /* You should never call this if the PGD entry wasn't valid */ 90 /* You should never call this if the PGD entry wasn't valid */
@@ -94,14 +94,13 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
94 94
95/* These two functions just like the above two, except they access the Guest 95/* These two functions just like the above two, except they access the Guest
96 * page tables. Hence they return a Guest address. */ 96 * page tables. Hence they return a Guest address. */
97static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) 97static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
98{ 98{
99 unsigned int index = vaddr >> (PGDIR_SHIFT); 99 unsigned int index = vaddr >> (PGDIR_SHIFT);
100 return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); 100 return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
101} 101}
102 102
103static unsigned long gpte_addr(struct lguest *lg, 103static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
104 pgd_t gpgd, unsigned long vaddr)
105{ 104{
106 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; 105 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
107 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); 106 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
@@ -138,7 +137,7 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
138 * entry can be a little tricky. The flags are (almost) the same, but the 137 * entry can be a little tricky. The flags are (almost) the same, but the
139 * Guest PTE contains a virtual page number: the CPU needs the real page 138 * Guest PTE contains a virtual page number: the CPU needs the real page
140 * number. */ 139 * number. */
141static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) 140static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write)
142{ 141{
143 unsigned long pfn, base, flags; 142 unsigned long pfn, base, flags;
144 143
@@ -149,7 +148,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write)
149 flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); 148 flags = (pte_flags(gpte) & ~_PAGE_GLOBAL);
150 149
151 /* The Guest's pages are offset inside the Launcher. */ 150 /* The Guest's pages are offset inside the Launcher. */
152 base = (unsigned long)lg->mem_base / PAGE_SIZE; 151 base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE;
153 152
154 /* We need a temporary "unsigned long" variable to hold the answer from 153 /* We need a temporary "unsigned long" variable to hold the answer from
155 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't 154 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
@@ -157,7 +156,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write)
157 * page, given the virtual number. */ 156 * page, given the virtual number. */
158 pfn = get_pfn(base + pte_pfn(gpte), write); 157 pfn = get_pfn(base + pte_pfn(gpte), write);
159 if (pfn == -1UL) { 158 if (pfn == -1UL) {
160 kill_guest(lg, "failed to get page %lu", pte_pfn(gpte)); 159 kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte));
161 /* When we destroy the Guest, we'll go through the shadow page 160 /* When we destroy the Guest, we'll go through the shadow page
162 * tables and release_pte() them. Make sure we don't think 161 * tables and release_pte() them. Make sure we don't think
163 * this one is valid! */ 162 * this one is valid! */
@@ -177,17 +176,18 @@ static void release_pte(pte_t pte)
177} 176}
178/*:*/ 177/*:*/
179 178
180static void check_gpte(struct lguest *lg, pte_t gpte) 179static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
181{ 180{
182 if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) 181 if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE))
183 || pte_pfn(gpte) >= lg->pfn_limit) 182 || pte_pfn(gpte) >= cpu->lg->pfn_limit)
184 kill_guest(lg, "bad page table entry"); 183 kill_guest(cpu, "bad page table entry");
185} 184}
186 185
187static void check_gpgd(struct lguest *lg, pgd_t gpgd) 186static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
188{ 187{
189 if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit) 188 if ((pgd_flags(gpgd) & ~_PAGE_TABLE) ||
190 kill_guest(lg, "bad page directory entry"); 189 (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
190 kill_guest(cpu, "bad page directory entry");
191} 191}
192 192
193/*H:330 193/*H:330
@@ -200,7 +200,7 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd)
200 * 200 *
201 * If we fixed up the fault (ie. we mapped the address), this routine returns 201 * If we fixed up the fault (ie. we mapped the address), this routine returns
202 * true. Otherwise, it was a real fault and we need to tell the Guest. */ 202 * true. Otherwise, it was a real fault and we need to tell the Guest. */
203int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) 203int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
204{ 204{
205 pgd_t gpgd; 205 pgd_t gpgd;
206 pgd_t *spgd; 206 pgd_t *spgd;
@@ -209,24 +209,24 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
209 pte_t *spte; 209 pte_t *spte;
210 210
211 /* First step: get the top-level Guest page table entry. */ 211 /* First step: get the top-level Guest page table entry. */
212 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); 212 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
213 /* Toplevel not present? We can't map it in. */ 213 /* Toplevel not present? We can't map it in. */
214 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 214 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
215 return 0; 215 return 0;
216 216
217 /* Now look at the matching shadow entry. */ 217 /* Now look at the matching shadow entry. */
218 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 218 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
219 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { 219 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
220 /* No shadow entry: allocate a new shadow PTE page. */ 220 /* No shadow entry: allocate a new shadow PTE page. */
221 unsigned long ptepage = get_zeroed_page(GFP_KERNEL); 221 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
222 /* This is not really the Guest's fault, but killing it is 222 /* This is not really the Guest's fault, but killing it is
223 * simple for this corner case. */ 223 * simple for this corner case. */
224 if (!ptepage) { 224 if (!ptepage) {
225 kill_guest(lg, "out of memory allocating pte page"); 225 kill_guest(cpu, "out of memory allocating pte page");
226 return 0; 226 return 0;
227 } 227 }
228 /* We check that the Guest pgd is OK. */ 228 /* We check that the Guest pgd is OK. */
229 check_gpgd(lg, gpgd); 229 check_gpgd(cpu, gpgd);
230 /* And we copy the flags to the shadow PGD entry. The page 230 /* And we copy the flags to the shadow PGD entry. The page
231 * number in the shadow PGD is the page we just allocated. */ 231 * number in the shadow PGD is the page we just allocated. */
232 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); 232 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
@@ -234,8 +234,8 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
234 234
235 /* OK, now we look at the lower level in the Guest page table: keep its 235 /* OK, now we look at the lower level in the Guest page table: keep its
236 * address, because we might update it later. */ 236 * address, because we might update it later. */
237 gpte_ptr = gpte_addr(lg, gpgd, vaddr); 237 gpte_ptr = gpte_addr(gpgd, vaddr);
238 gpte = lgread(lg, gpte_ptr, pte_t); 238 gpte = lgread(cpu, gpte_ptr, pte_t);
239 239
240 /* If this page isn't in the Guest page tables, we can't page it in. */ 240 /* If this page isn't in the Guest page tables, we can't page it in. */
241 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 241 if (!(pte_flags(gpte) & _PAGE_PRESENT))
@@ -252,7 +252,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
252 252
253 /* Check that the Guest PTE flags are OK, and the page number is below 253 /* Check that the Guest PTE flags are OK, and the page number is below
254 * the pfn_limit (ie. not mapping the Launcher binary). */ 254 * the pfn_limit (ie. not mapping the Launcher binary). */
255 check_gpte(lg, gpte); 255 check_gpte(cpu, gpte);
256 256
257 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ 257 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
258 gpte = pte_mkyoung(gpte); 258 gpte = pte_mkyoung(gpte);
@@ -260,7 +260,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
260 gpte = pte_mkdirty(gpte); 260 gpte = pte_mkdirty(gpte);
261 261
262 /* Get the pointer to the shadow PTE entry we're going to set. */ 262 /* Get the pointer to the shadow PTE entry we're going to set. */
263 spte = spte_addr(lg, *spgd, vaddr); 263 spte = spte_addr(*spgd, vaddr);
264 /* If there was a valid shadow PTE entry here before, we release it. 264 /* If there was a valid shadow PTE entry here before, we release it.
265 * This can happen with a write to a previously read-only entry. */ 265 * This can happen with a write to a previously read-only entry. */
266 release_pte(*spte); 266 release_pte(*spte);
@@ -268,17 +268,17 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
268 /* If this is a write, we insist that the Guest page is writable (the 268 /* If this is a write, we insist that the Guest page is writable (the
269 * final arg to gpte_to_spte()). */ 269 * final arg to gpte_to_spte()). */
270 if (pte_dirty(gpte)) 270 if (pte_dirty(gpte))
271 *spte = gpte_to_spte(lg, gpte, 1); 271 *spte = gpte_to_spte(cpu, gpte, 1);
272 else 272 else
273 /* If this is a read, don't set the "writable" bit in the page 273 /* If this is a read, don't set the "writable" bit in the page
274 * table entry, even if the Guest says it's writable. That way 274 * table entry, even if the Guest says it's writable. That way
275 * we will come back here when a write does actually occur, so 275 * we will come back here when a write does actually occur, so
276 * we can update the Guest's _PAGE_DIRTY flag. */ 276 * we can update the Guest's _PAGE_DIRTY flag. */
277 *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0); 277 *spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0);
278 278
279 /* Finally, we write the Guest PTE entry back: we've set the 279 /* Finally, we write the Guest PTE entry back: we've set the
280 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ 280 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
281 lgwrite(lg, gpte_ptr, pte_t, gpte); 281 lgwrite(cpu, gpte_ptr, pte_t, gpte);
282 282
283 /* The fault is fixed, the page table is populated, the mapping 283 /* The fault is fixed, the page table is populated, the mapping
284 * manipulated, the result returned and the code complete. A small 284 * manipulated, the result returned and the code complete. A small
@@ -297,19 +297,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
297 * 297 *
298 * This is a quick version which answers the question: is this virtual address 298 * This is a quick version which answers the question: is this virtual address
299 * mapped by the shadow page tables, and is it writable? */ 299 * mapped by the shadow page tables, and is it writable? */
300static int page_writable(struct lguest *lg, unsigned long vaddr) 300static int page_writable(struct lg_cpu *cpu, unsigned long vaddr)
301{ 301{
302 pgd_t *spgd; 302 pgd_t *spgd;
303 unsigned long flags; 303 unsigned long flags;
304 304
305 /* Look at the current top level entry: is it present? */ 305 /* Look at the current top level entry: is it present? */
306 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 306 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
307 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) 307 if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
308 return 0; 308 return 0;
309 309
310 /* Check the flags on the pte entry itself: it must be present and 310 /* Check the flags on the pte entry itself: it must be present and
311 * writable. */ 311 * writable. */
312 flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); 312 flags = pte_flags(*(spte_addr(*spgd, vaddr)));
313 313
314 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 314 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
315} 315}
@@ -317,10 +317,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr)
317/* So, when pin_stack_pages() asks us to pin a page, we check if it's already 317/* So, when pin_stack_pages() asks us to pin a page, we check if it's already
318 * in the page tables, and if not, we call demand_page() with error code 2 318 * in the page tables, and if not, we call demand_page() with error code 2
319 * (meaning "write"). */ 319 * (meaning "write"). */
320void pin_page(struct lguest *lg, unsigned long vaddr) 320void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
321{ 321{
322 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) 322 if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
323 kill_guest(lg, "bad stack page %#lx", vaddr); 323 kill_guest(cpu, "bad stack page %#lx", vaddr);
324} 324}
325 325
326/*H:450 If we chase down the release_pgd() code, it looks like this: */ 326/*H:450 If we chase down the release_pgd() code, it looks like this: */
@@ -358,28 +358,28 @@ static void flush_user_mappings(struct lguest *lg, int idx)
358 * 358 *
359 * The Guest has a hypercall to throw away the page tables: it's used when a 359 * The Guest has a hypercall to throw away the page tables: it's used when a
360 * large number of mappings have been changed. */ 360 * large number of mappings have been changed. */
361void guest_pagetable_flush_user(struct lguest *lg) 361void guest_pagetable_flush_user(struct lg_cpu *cpu)
362{ 362{
363 /* Drop the userspace part of the current page table. */ 363 /* Drop the userspace part of the current page table. */
364 flush_user_mappings(lg, lg->pgdidx); 364 flush_user_mappings(cpu->lg, cpu->cpu_pgd);
365} 365}
366/*:*/ 366/*:*/
367 367
368/* We walk down the guest page tables to get a guest-physical address */ 368/* We walk down the guest page tables to get a guest-physical address */
369unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) 369unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
370{ 370{
371 pgd_t gpgd; 371 pgd_t gpgd;
372 pte_t gpte; 372 pte_t gpte;
373 373
374 /* First step: get the top-level Guest page table entry. */ 374 /* First step: get the top-level Guest page table entry. */
375 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); 375 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
376 /* Toplevel not present? We can't map it in. */ 376 /* Toplevel not present? We can't map it in. */
377 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 377 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
378 kill_guest(lg, "Bad address %#lx", vaddr); 378 kill_guest(cpu, "Bad address %#lx", vaddr);
379 379
380 gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); 380 gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
381 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 381 if (!(pte_flags(gpte) & _PAGE_PRESENT))
382 kill_guest(lg, "Bad address %#lx", vaddr); 382 kill_guest(cpu, "Bad address %#lx", vaddr);
383 383
384 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); 384 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
385} 385}
@@ -399,7 +399,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
399/*H:435 And this is us, creating the new page directory. If we really do 399/*H:435 And this is us, creating the new page directory. If we really do
400 * allocate a new one (and so the kernel parts are not there), we set 400 * allocate a new one (and so the kernel parts are not there), we set
401 * blank_pgdir. */ 401 * blank_pgdir. */
402static unsigned int new_pgdir(struct lguest *lg, 402static unsigned int new_pgdir(struct lg_cpu *cpu,
403 unsigned long gpgdir, 403 unsigned long gpgdir,
404 int *blank_pgdir) 404 int *blank_pgdir)
405{ 405{
@@ -407,22 +407,23 @@ static unsigned int new_pgdir(struct lguest *lg,
407 407
408 /* We pick one entry at random to throw out. Choosing the Least 408 /* We pick one entry at random to throw out. Choosing the Least
409 * Recently Used might be better, but this is easy. */ 409 * Recently Used might be better, but this is easy. */
410 next = random32() % ARRAY_SIZE(lg->pgdirs); 410 next = random32() % ARRAY_SIZE(cpu->lg->pgdirs);
411 /* If it's never been allocated at all before, try now. */ 411 /* If it's never been allocated at all before, try now. */
412 if (!lg->pgdirs[next].pgdir) { 412 if (!cpu->lg->pgdirs[next].pgdir) {
413 lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); 413 cpu->lg->pgdirs[next].pgdir =
414 (pgd_t *)get_zeroed_page(GFP_KERNEL);
414 /* If the allocation fails, just keep using the one we have */ 415 /* If the allocation fails, just keep using the one we have */
415 if (!lg->pgdirs[next].pgdir) 416 if (!cpu->lg->pgdirs[next].pgdir)
416 next = lg->pgdidx; 417 next = cpu->cpu_pgd;
417 else 418 else
418 /* This is a blank page, so there are no kernel 419 /* This is a blank page, so there are no kernel
419 * mappings: caller must map the stack! */ 420 * mappings: caller must map the stack! */
420 *blank_pgdir = 1; 421 *blank_pgdir = 1;
421 } 422 }
422 /* Record which Guest toplevel this shadows. */ 423 /* Record which Guest toplevel this shadows. */
423 lg->pgdirs[next].gpgdir = gpgdir; 424 cpu->lg->pgdirs[next].gpgdir = gpgdir;
424 /* Release all the non-kernel mappings. */ 425 /* Release all the non-kernel mappings. */
425 flush_user_mappings(lg, next); 426 flush_user_mappings(cpu->lg, next);
426 427
427 return next; 428 return next;
428} 429}
@@ -432,21 +433,21 @@ static unsigned int new_pgdir(struct lguest *lg,
432 * Now we've seen all the page table setting and manipulation, let's see what 433 * Now we've seen all the page table setting and manipulation, let's see what
433 * what happens when the Guest changes page tables (ie. changes the top-level 434 * what happens when the Guest changes page tables (ie. changes the top-level
434 * pgdir). This occurs on almost every context switch. */ 435 * pgdir). This occurs on almost every context switch. */
435void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) 436void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
436{ 437{
437 int newpgdir, repin = 0; 438 int newpgdir, repin = 0;
438 439
439 /* Look to see if we have this one already. */ 440 /* Look to see if we have this one already. */
440 newpgdir = find_pgdir(lg, pgtable); 441 newpgdir = find_pgdir(cpu->lg, pgtable);
441 /* If not, we allocate or mug an existing one: if it's a fresh one, 442 /* If not, we allocate or mug an existing one: if it's a fresh one,
442 * repin gets set to 1. */ 443 * repin gets set to 1. */
443 if (newpgdir == ARRAY_SIZE(lg->pgdirs)) 444 if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
444 newpgdir = new_pgdir(lg, pgtable, &repin); 445 newpgdir = new_pgdir(cpu, pgtable, &repin);
445 /* Change the current pgd index to the new one. */ 446 /* Change the current pgd index to the new one. */
446 lg->pgdidx = newpgdir; 447 cpu->cpu_pgd = newpgdir;
447 /* If it was completely blank, we map in the Guest kernel stack */ 448 /* If it was completely blank, we map in the Guest kernel stack */
448 if (repin) 449 if (repin)
449 pin_stack_pages(lg); 450 pin_stack_pages(cpu);
450} 451}
451 452
452/*H:470 Finally, a routine which throws away everything: all PGD entries in all 453/*H:470 Finally, a routine which throws away everything: all PGD entries in all
@@ -468,11 +469,11 @@ static void release_all_pagetables(struct lguest *lg)
468 * mapping. Since kernel mappings are in every page table, it's easiest to 469 * mapping. Since kernel mappings are in every page table, it's easiest to
469 * throw them all away. This traps the Guest in amber for a while as 470 * throw them all away. This traps the Guest in amber for a while as
470 * everything faults back in, but it's rare. */ 471 * everything faults back in, but it's rare. */
471void guest_pagetable_clear_all(struct lguest *lg) 472void guest_pagetable_clear_all(struct lg_cpu *cpu)
472{ 473{
473 release_all_pagetables(lg); 474 release_all_pagetables(cpu->lg);
474 /* We need the Guest kernel stack mapped again. */ 475 /* We need the Guest kernel stack mapped again. */
475 pin_stack_pages(lg); 476 pin_stack_pages(cpu);
476} 477}
477/*:*/ 478/*:*/
478/*M:009 Since we throw away all mappings when a kernel mapping changes, our 479/*M:009 Since we throw away all mappings when a kernel mapping changes, our
@@ -497,24 +498,24 @@ void guest_pagetable_clear_all(struct lguest *lg)
497 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if 498 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
498 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. 499 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
499 */ 500 */
500static void do_set_pte(struct lguest *lg, int idx, 501static void do_set_pte(struct lg_cpu *cpu, int idx,
501 unsigned long vaddr, pte_t gpte) 502 unsigned long vaddr, pte_t gpte)
502{ 503{
503 /* Look up the matching shadow page directory entry. */ 504 /* Look up the matching shadow page directory entry. */
504 pgd_t *spgd = spgd_addr(lg, idx, vaddr); 505 pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
505 506
506 /* If the top level isn't present, there's no entry to update. */ 507 /* If the top level isn't present, there's no entry to update. */
507 if (pgd_flags(*spgd) & _PAGE_PRESENT) { 508 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
508 /* Otherwise, we start by releasing the existing entry. */ 509 /* Otherwise, we start by releasing the existing entry. */
509 pte_t *spte = spte_addr(lg, *spgd, vaddr); 510 pte_t *spte = spte_addr(*spgd, vaddr);
510 release_pte(*spte); 511 release_pte(*spte);
511 512
512 /* If they're setting this entry as dirty or accessed, we might 513 /* If they're setting this entry as dirty or accessed, we might
513 * as well put that entry they've given us in now. This shaves 514 * as well put that entry they've given us in now. This shaves
514 * 10% off a copy-on-write micro-benchmark. */ 515 * 10% off a copy-on-write micro-benchmark. */
515 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 516 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
516 check_gpte(lg, gpte); 517 check_gpte(cpu, gpte);
517 *spte = gpte_to_spte(lg, gpte, 518 *spte = gpte_to_spte(cpu, gpte,
518 pte_flags(gpte) & _PAGE_DIRTY); 519 pte_flags(gpte) & _PAGE_DIRTY);
519 } else 520 } else
520 /* Otherwise kill it and we can demand_page() it in 521 /* Otherwise kill it and we can demand_page() it in
@@ -533,22 +534,22 @@ static void do_set_pte(struct lguest *lg, int idx,
533 * 534 *
534 * The benefit is that when we have to track a new page table, we can copy keep 535 * The benefit is that when we have to track a new page table, we can copy keep
535 * all the kernel mappings. This speeds up context switch immensely. */ 536 * all the kernel mappings. This speeds up context switch immensely. */
536void guest_set_pte(struct lguest *lg, 537void guest_set_pte(struct lg_cpu *cpu,
537 unsigned long gpgdir, unsigned long vaddr, pte_t gpte) 538 unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
538{ 539{
539 /* Kernel mappings must be changed on all top levels. Slow, but 540 /* Kernel mappings must be changed on all top levels. Slow, but
540 * doesn't happen often. */ 541 * doesn't happen often. */
541 if (vaddr >= lg->kernel_address) { 542 if (vaddr >= cpu->lg->kernel_address) {
542 unsigned int i; 543 unsigned int i;
543 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 544 for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
544 if (lg->pgdirs[i].pgdir) 545 if (cpu->lg->pgdirs[i].pgdir)
545 do_set_pte(lg, i, vaddr, gpte); 546 do_set_pte(cpu, i, vaddr, gpte);
546 } else { 547 } else {
547 /* Is this page table one we have a shadow for? */ 548 /* Is this page table one we have a shadow for? */
548 int pgdir = find_pgdir(lg, gpgdir); 549 int pgdir = find_pgdir(cpu->lg, gpgdir);
549 if (pgdir != ARRAY_SIZE(lg->pgdirs)) 550 if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs))
550 /* If so, do the update. */ 551 /* If so, do the update. */
551 do_set_pte(lg, pgdir, vaddr, gpte); 552 do_set_pte(cpu, pgdir, vaddr, gpte);
552 } 553 }
553} 554}
554 555
@@ -590,30 +591,32 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
590{ 591{
591 /* We start on the first shadow page table, and give it a blank PGD 592 /* We start on the first shadow page table, and give it a blank PGD
592 * page. */ 593 * page. */
593 lg->pgdidx = 0; 594 lg->pgdirs[0].gpgdir = pgtable;
594 lg->pgdirs[lg->pgdidx].gpgdir = pgtable; 595 lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
595 lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); 596 if (!lg->pgdirs[0].pgdir)
596 if (!lg->pgdirs[lg->pgdidx].pgdir)
597 return -ENOMEM; 597 return -ENOMEM;
598 lg->cpus[0].cpu_pgd = 0;
598 return 0; 599 return 0;
599} 600}
600 601
601/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ 602/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
602void page_table_guest_data_init(struct lguest *lg) 603void page_table_guest_data_init(struct lg_cpu *cpu)
603{ 604{
604 /* We get the kernel address: above this is all kernel memory. */ 605 /* We get the kernel address: above this is all kernel memory. */
605 if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address) 606 if (get_user(cpu->lg->kernel_address,
607 &cpu->lg->lguest_data->kernel_address)
606 /* We tell the Guest that it can't use the top 4MB of virtual 608 /* We tell the Guest that it can't use the top 4MB of virtual
607 * addresses used by the Switcher. */ 609 * addresses used by the Switcher. */
608 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) 610 || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem)
609 || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) 611 || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir))
610 kill_guest(lg, "bad guest page %p", lg->lguest_data); 612 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
611 613
612 /* In flush_user_mappings() we loop from 0 to 614 /* In flush_user_mappings() we loop from 0 to
613 * "pgd_index(lg->kernel_address)". This assumes it won't hit the 615 * "pgd_index(lg->kernel_address)". This assumes it won't hit the
614 * Switcher mappings, so check that now. */ 616 * Switcher mappings, so check that now. */
615 if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX) 617 if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
616 kill_guest(lg, "bad kernel address %#lx", lg->kernel_address); 618 kill_guest(cpu, "bad kernel address %#lx",
619 cpu->lg->kernel_address);
617} 620}
618 621
619/* When a Guest dies, our cleanup is fairly simple. */ 622/* When a Guest dies, our cleanup is fairly simple. */
@@ -634,17 +637,18 @@ void free_guest_pagetable(struct lguest *lg)
634 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages 637 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages
635 * for each CPU already set up, we just need to hook them in now we know which 638 * for each CPU already set up, we just need to hook them in now we know which
636 * Guest is about to run on this CPU. */ 639 * Guest is about to run on this CPU. */
637void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) 640void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
638{ 641{
639 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 642 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
640 pgd_t switcher_pgd; 643 pgd_t switcher_pgd;
641 pte_t regs_pte; 644 pte_t regs_pte;
645 unsigned long pfn;
642 646
643 /* Make the last PGD entry for this Guest point to the Switcher's PTE 647 /* Make the last PGD entry for this Guest point to the Switcher's PTE
644 * page for this CPU (with appropriate flags). */ 648 * page for this CPU (with appropriate flags). */
645 switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); 649 switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
646 650
647 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 651 cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
648 652
649 /* We also change the Switcher PTE page. When we're running the Guest, 653 /* We also change the Switcher PTE page. When we're running the Guest,
650 * we want the Guest's "regs" page to appear where the first Switcher 654 * we want the Guest's "regs" page to appear where the first Switcher
@@ -653,7 +657,8 @@ void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
653 * CPU's "struct lguest_pages": if we make sure the Guest's register 657 * CPU's "struct lguest_pages": if we make sure the Guest's register
654 * page is already mapped there, we don't have to copy them out 658 * page is already mapped there, we don't have to copy them out
655 * again. */ 659 * again. */
656 regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL)); 660 pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
661 regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
657 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; 662 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
658} 663}
659/*:*/ 664/*:*/