aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/page_tables.c
diff options
context:
space:
mode:
authorGlauber de Oliveira Costa <gcosta@redhat.com>2008-01-07 08:05:37 -0500
committerRusty Russell <rusty@rustcorp.com.au>2008-01-30 06:50:14 -0500
commit1713608f280002d9ffc6de89d7de5cf367072d63 (patch)
tree332e7bdbe7ccccad408b309a4dd00b706b04082f /drivers/lguest/page_tables.c
parent5e232f4f428c4266ba5cdae9f23ba19a0913dcf9 (diff)
lguest: per-vcpu lguest pgdir management
this patch makes the pgdir management per-vcpu. The pgdirs pool is still guest-wide (although it'll probably need to grow when we are really executing more vcpus), but the pgdidx index is gone, since it makes no sense anymore. Instead, we use a per-vcpu index. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest/page_tables.c')
-rw-r--r--drivers/lguest/page_tables.c59
1 files changed, 30 insertions, 29 deletions
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index e34c81636a8..fb665611ccc 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -94,10 +94,10 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
94 94
95/* These two functions just like the above two, except they access the Guest 95/* These two functions just like the above two, except they access the Guest
96 * page tables. Hence they return a Guest address. */ 96 * page tables. Hence they return a Guest address. */
97static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) 97static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
98{ 98{
99 unsigned int index = vaddr >> (PGDIR_SHIFT); 99 unsigned int index = vaddr >> (PGDIR_SHIFT);
100 return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); 100 return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
101} 101}
102 102
103static unsigned long gpte_addr(struct lguest *lg, 103static unsigned long gpte_addr(struct lguest *lg,
@@ -200,22 +200,23 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd)
200 * 200 *
201 * If we fixed up the fault (ie. we mapped the address), this routine returns 201 * If we fixed up the fault (ie. we mapped the address), this routine returns
202 * true. Otherwise, it was a real fault and we need to tell the Guest. */ 202 * true. Otherwise, it was a real fault and we need to tell the Guest. */
203int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) 203int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
204{ 204{
205 pgd_t gpgd; 205 pgd_t gpgd;
206 pgd_t *spgd; 206 pgd_t *spgd;
207 unsigned long gpte_ptr; 207 unsigned long gpte_ptr;
208 pte_t gpte; 208 pte_t gpte;
209 pte_t *spte; 209 pte_t *spte;
210 struct lguest *lg = cpu->lg;
210 211
211 /* First step: get the top-level Guest page table entry. */ 212 /* First step: get the top-level Guest page table entry. */
212 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); 213 gpgd = lgread(lg, gpgd_addr(cpu, vaddr), pgd_t);
213 /* Toplevel not present? We can't map it in. */ 214 /* Toplevel not present? We can't map it in. */
214 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 215 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
215 return 0; 216 return 0;
216 217
217 /* Now look at the matching shadow entry. */ 218 /* Now look at the matching shadow entry. */
218 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 219 spgd = spgd_addr(lg, cpu->cpu_pgd, vaddr);
219 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { 220 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
220 /* No shadow entry: allocate a new shadow PTE page. */ 221 /* No shadow entry: allocate a new shadow PTE page. */
221 unsigned long ptepage = get_zeroed_page(GFP_KERNEL); 222 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
@@ -297,19 +298,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
297 * 298 *
298 * This is a quick version which answers the question: is this virtual address 299 * This is a quick version which answers the question: is this virtual address
299 * mapped by the shadow page tables, and is it writable? */ 300 * mapped by the shadow page tables, and is it writable? */
300static int page_writable(struct lguest *lg, unsigned long vaddr) 301static int page_writable(struct lg_cpu *cpu, unsigned long vaddr)
301{ 302{
302 pgd_t *spgd; 303 pgd_t *spgd;
303 unsigned long flags; 304 unsigned long flags;
304 305
305 /* Look at the current top level entry: is it present? */ 306 /* Look at the current top level entry: is it present? */
306 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 307 spgd = spgd_addr(cpu->lg, cpu->cpu_pgd, vaddr);
307 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) 308 if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
308 return 0; 309 return 0;
309 310
310 /* Check the flags on the pte entry itself: it must be present and 311 /* Check the flags on the pte entry itself: it must be present and
311 * writable. */ 312 * writable. */
312 flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); 313 flags = pte_flags(*(spte_addr(cpu->lg, *spgd, vaddr)));
313 314
314 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 315 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
315} 316}
@@ -317,10 +318,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr)
317/* So, when pin_stack_pages() asks us to pin a page, we check if it's already 318/* So, when pin_stack_pages() asks us to pin a page, we check if it's already
318 * in the page tables, and if not, we call demand_page() with error code 2 319 * in the page tables, and if not, we call demand_page() with error code 2
319 * (meaning "write"). */ 320 * (meaning "write"). */
320void pin_page(struct lguest *lg, unsigned long vaddr) 321void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
321{ 322{
322 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) 323 if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
323 kill_guest(lg, "bad stack page %#lx", vaddr); 324 kill_guest(cpu->lg, "bad stack page %#lx", vaddr);
324} 325}
325 326
326/*H:450 If we chase down the release_pgd() code, it looks like this: */ 327/*H:450 If we chase down the release_pgd() code, it looks like this: */
@@ -358,28 +359,28 @@ static void flush_user_mappings(struct lguest *lg, int idx)
358 * 359 *
359 * The Guest has a hypercall to throw away the page tables: it's used when a 360 * The Guest has a hypercall to throw away the page tables: it's used when a
360 * large number of mappings have been changed. */ 361 * large number of mappings have been changed. */
361void guest_pagetable_flush_user(struct lguest *lg) 362void guest_pagetable_flush_user(struct lg_cpu *cpu)
362{ 363{
363 /* Drop the userspace part of the current page table. */ 364 /* Drop the userspace part of the current page table. */
364 flush_user_mappings(lg, lg->pgdidx); 365 flush_user_mappings(cpu->lg, cpu->cpu_pgd);
365} 366}
366/*:*/ 367/*:*/
367 368
368/* We walk down the guest page tables to get a guest-physical address */ 369/* We walk down the guest page tables to get a guest-physical address */
369unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) 370unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
370{ 371{
371 pgd_t gpgd; 372 pgd_t gpgd;
372 pte_t gpte; 373 pte_t gpte;
373 374
374 /* First step: get the top-level Guest page table entry. */ 375 /* First step: get the top-level Guest page table entry. */
375 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); 376 gpgd = lgread(cpu->lg, gpgd_addr(cpu, vaddr), pgd_t);
376 /* Toplevel not present? We can't map it in. */ 377 /* Toplevel not present? We can't map it in. */
377 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 378 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
378 kill_guest(lg, "Bad address %#lx", vaddr); 379 kill_guest(cpu->lg, "Bad address %#lx", vaddr);
379 380
380 gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); 381 gpte = lgread(cpu->lg, gpte_addr(cpu->lg, gpgd, vaddr), pte_t);
381 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 382 if (!(pte_flags(gpte) & _PAGE_PRESENT))
382 kill_guest(lg, "Bad address %#lx", vaddr); 383 kill_guest(cpu->lg, "Bad address %#lx", vaddr);
383 384
384 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); 385 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
385} 386}
@@ -399,11 +400,12 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
399/*H:435 And this is us, creating the new page directory. If we really do 400/*H:435 And this is us, creating the new page directory. If we really do
400 * allocate a new one (and so the kernel parts are not there), we set 401 * allocate a new one (and so the kernel parts are not there), we set
401 * blank_pgdir. */ 402 * blank_pgdir. */
402static unsigned int new_pgdir(struct lguest *lg, 403static unsigned int new_pgdir(struct lg_cpu *cpu,
403 unsigned long gpgdir, 404 unsigned long gpgdir,
404 int *blank_pgdir) 405 int *blank_pgdir)
405{ 406{
406 unsigned int next; 407 unsigned int next;
408 struct lguest *lg = cpu->lg;
407 409
408 /* We pick one entry at random to throw out. Choosing the Least 410 /* We pick one entry at random to throw out. Choosing the Least
409 * Recently Used might be better, but this is easy. */ 411 * Recently Used might be better, but this is easy. */
@@ -413,7 +415,7 @@ static unsigned int new_pgdir(struct lguest *lg,
413 lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); 415 lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
414 /* If the allocation fails, just keep using the one we have */ 416 /* If the allocation fails, just keep using the one we have */
415 if (!lg->pgdirs[next].pgdir) 417 if (!lg->pgdirs[next].pgdir)
416 next = lg->pgdidx; 418 next = cpu->cpu_pgd;
417 else 419 else
418 /* This is a blank page, so there are no kernel 420 /* This is a blank page, so there are no kernel
419 * mappings: caller must map the stack! */ 421 * mappings: caller must map the stack! */
@@ -442,9 +444,9 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
442 /* If not, we allocate or mug an existing one: if it's a fresh one, 444 /* If not, we allocate or mug an existing one: if it's a fresh one,
443 * repin gets set to 1. */ 445 * repin gets set to 1. */
444 if (newpgdir == ARRAY_SIZE(lg->pgdirs)) 446 if (newpgdir == ARRAY_SIZE(lg->pgdirs))
445 newpgdir = new_pgdir(lg, pgtable, &repin); 447 newpgdir = new_pgdir(cpu, pgtable, &repin);
446 /* Change the current pgd index to the new one. */ 448 /* Change the current pgd index to the new one. */
447 lg->pgdidx = newpgdir; 449 cpu->cpu_pgd = newpgdir;
448 /* If it was completely blank, we map in the Guest kernel stack */ 450 /* If it was completely blank, we map in the Guest kernel stack */
449 if (repin) 451 if (repin)
450 pin_stack_pages(cpu); 452 pin_stack_pages(cpu);
@@ -591,11 +593,11 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
591{ 593{
592 /* We start on the first shadow page table, and give it a blank PGD 594 /* We start on the first shadow page table, and give it a blank PGD
593 * page. */ 595 * page. */
594 lg->pgdidx = 0; 596 lg->pgdirs[0].gpgdir = pgtable;
595 lg->pgdirs[lg->pgdidx].gpgdir = pgtable; 597 lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
596 lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); 598 if (!lg->pgdirs[0].pgdir)
597 if (!lg->pgdirs[lg->pgdidx].pgdir)
598 return -ENOMEM; 599 return -ENOMEM;
600 lg->cpus[0].cpu_pgd = 0;
599 return 0; 601 return 0;
600} 602}
601 603
@@ -607,7 +609,7 @@ void page_table_guest_data_init(struct lguest *lg)
607 /* We tell the Guest that it can't use the top 4MB of virtual 609 /* We tell the Guest that it can't use the top 4MB of virtual
608 * addresses used by the Switcher. */ 610 * addresses used by the Switcher. */
609 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) 611 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
610 || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) 612 || put_user(lg->pgdirs[0].gpgdir, &lg->lguest_data->pgdir))
611 kill_guest(lg, "bad guest page %p", lg->lguest_data); 613 kill_guest(lg, "bad guest page %p", lg->lguest_data);
612 614
613 /* In flush_user_mappings() we loop from 0 to 615 /* In flush_user_mappings() we loop from 0 to
@@ -637,7 +639,6 @@ void free_guest_pagetable(struct lguest *lg)
637 * Guest is about to run on this CPU. */ 639 * Guest is about to run on this CPU. */
638void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) 640void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
639{ 641{
640 struct lguest *lg = cpu->lg;
641 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 642 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
642 pgd_t switcher_pgd; 643 pgd_t switcher_pgd;
643 pte_t regs_pte; 644 pte_t regs_pte;
@@ -647,7 +648,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
647 * page for this CPU (with appropriate flags). */ 648 * page for this CPU (with appropriate flags). */
648 switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); 649 switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL);
649 650
650 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 651 cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
651 652
652 /* We also change the Switcher PTE page. When we're running the Guest, 653 /* We also change the Switcher PTE page. When we're running the Guest,
653 * we want the Guest's "regs" page to appear where the first Switcher 654 * we want the Guest's "regs" page to appear where the first Switcher