diff options
author | Glauber de Oliveira Costa <gcosta@redhat.com> | 2008-01-07 08:05:37 -0500 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2008-01-30 06:50:14 -0500 |
commit | 1713608f280002d9ffc6de89d7de5cf367072d63 (patch) | |
tree | 332e7bdbe7ccccad408b309a4dd00b706b04082f /drivers/lguest/page_tables.c | |
parent | 5e232f4f428c4266ba5cdae9f23ba19a0913dcf9 (diff) |
lguest: per-vcpu lguest pgdir management
this patch makes the pgdir management per-vcpu. The pgdirs pool
is still guest-wide (although it'll probably need to grow when we
are really executing more vcpus), but the pgdidx index is gone,
since it makes no sense anymore. Instead, we use a per-vcpu
index.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest/page_tables.c')
-rw-r--r-- | drivers/lguest/page_tables.c | 59 |
1 files changed, 30 insertions, 29 deletions
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index e34c81636a8..fb665611ccc 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -94,10 +94,10 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) | |||
94 | 94 | ||
95 | /* These two functions just like the above two, except they access the Guest | 95 | /* These two functions just like the above two, except they access the Guest |
96 | * page tables. Hence they return a Guest address. */ | 96 | * page tables. Hence they return a Guest address. */ |
97 | static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) | 97 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) |
98 | { | 98 | { |
99 | unsigned int index = vaddr >> (PGDIR_SHIFT); | 99 | unsigned int index = vaddr >> (PGDIR_SHIFT); |
100 | return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); | 100 | return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t); |
101 | } | 101 | } |
102 | 102 | ||
103 | static unsigned long gpte_addr(struct lguest *lg, | 103 | static unsigned long gpte_addr(struct lguest *lg, |
@@ -200,22 +200,23 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd) | |||
200 | * | 200 | * |
201 | * If we fixed up the fault (ie. we mapped the address), this routine returns | 201 | * If we fixed up the fault (ie. we mapped the address), this routine returns |
202 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ | 202 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ |
203 | int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | 203 | int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) |
204 | { | 204 | { |
205 | pgd_t gpgd; | 205 | pgd_t gpgd; |
206 | pgd_t *spgd; | 206 | pgd_t *spgd; |
207 | unsigned long gpte_ptr; | 207 | unsigned long gpte_ptr; |
208 | pte_t gpte; | 208 | pte_t gpte; |
209 | pte_t *spte; | 209 | pte_t *spte; |
210 | struct lguest *lg = cpu->lg; | ||
210 | 211 | ||
211 | /* First step: get the top-level Guest page table entry. */ | 212 | /* First step: get the top-level Guest page table entry. */ |
212 | gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); | 213 | gpgd = lgread(lg, gpgd_addr(cpu, vaddr), pgd_t); |
213 | /* Toplevel not present? We can't map it in. */ | 214 | /* Toplevel not present? We can't map it in. */ |
214 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | 215 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) |
215 | return 0; | 216 | return 0; |
216 | 217 | ||
217 | /* Now look at the matching shadow entry. */ | 218 | /* Now look at the matching shadow entry. */ |
218 | spgd = spgd_addr(lg, lg->pgdidx, vaddr); | 219 | spgd = spgd_addr(lg, cpu->cpu_pgd, vaddr); |
219 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { | 220 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { |
220 | /* No shadow entry: allocate a new shadow PTE page. */ | 221 | /* No shadow entry: allocate a new shadow PTE page. */ |
221 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 222 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
@@ -297,19 +298,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
297 | * | 298 | * |
298 | * This is a quick version which answers the question: is this virtual address | 299 | * This is a quick version which answers the question: is this virtual address |
299 | * mapped by the shadow page tables, and is it writable? */ | 300 | * mapped by the shadow page tables, and is it writable? */ |
300 | static int page_writable(struct lguest *lg, unsigned long vaddr) | 301 | static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) |
301 | { | 302 | { |
302 | pgd_t *spgd; | 303 | pgd_t *spgd; |
303 | unsigned long flags; | 304 | unsigned long flags; |
304 | 305 | ||
305 | /* Look at the current top level entry: is it present? */ | 306 | /* Look at the current top level entry: is it present? */ |
306 | spgd = spgd_addr(lg, lg->pgdidx, vaddr); | 307 | spgd = spgd_addr(cpu->lg, cpu->cpu_pgd, vaddr); |
307 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) | 308 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) |
308 | return 0; | 309 | return 0; |
309 | 310 | ||
310 | /* Check the flags on the pte entry itself: it must be present and | 311 | /* Check the flags on the pte entry itself: it must be present and |
311 | * writable. */ | 312 | * writable. */ |
312 | flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); | 313 | flags = pte_flags(*(spte_addr(cpu->lg, *spgd, vaddr))); |
313 | 314 | ||
314 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); | 315 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); |
315 | } | 316 | } |
@@ -317,10 +318,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr) | |||
317 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already | 318 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already |
318 | * in the page tables, and if not, we call demand_page() with error code 2 | 319 | * in the page tables, and if not, we call demand_page() with error code 2 |
319 | * (meaning "write"). */ | 320 | * (meaning "write"). */ |
320 | void pin_page(struct lguest *lg, unsigned long vaddr) | 321 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) |
321 | { | 322 | { |
322 | if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) | 323 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) |
323 | kill_guest(lg, "bad stack page %#lx", vaddr); | 324 | kill_guest(cpu->lg, "bad stack page %#lx", vaddr); |
324 | } | 325 | } |
325 | 326 | ||
326 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ | 327 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ |
@@ -358,28 +359,28 @@ static void flush_user_mappings(struct lguest *lg, int idx) | |||
358 | * | 359 | * |
359 | * The Guest has a hypercall to throw away the page tables: it's used when a | 360 | * The Guest has a hypercall to throw away the page tables: it's used when a |
360 | * large number of mappings have been changed. */ | 361 | * large number of mappings have been changed. */ |
361 | void guest_pagetable_flush_user(struct lguest *lg) | 362 | void guest_pagetable_flush_user(struct lg_cpu *cpu) |
362 | { | 363 | { |
363 | /* Drop the userspace part of the current page table. */ | 364 | /* Drop the userspace part of the current page table. */ |
364 | flush_user_mappings(lg, lg->pgdidx); | 365 | flush_user_mappings(cpu->lg, cpu->cpu_pgd); |
365 | } | 366 | } |
366 | /*:*/ | 367 | /*:*/ |
367 | 368 | ||
368 | /* We walk down the guest page tables to get a guest-physical address */ | 369 | /* We walk down the guest page tables to get a guest-physical address */ |
369 | unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | 370 | unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) |
370 | { | 371 | { |
371 | pgd_t gpgd; | 372 | pgd_t gpgd; |
372 | pte_t gpte; | 373 | pte_t gpte; |
373 | 374 | ||
374 | /* First step: get the top-level Guest page table entry. */ | 375 | /* First step: get the top-level Guest page table entry. */ |
375 | gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); | 376 | gpgd = lgread(cpu->lg, gpgd_addr(cpu, vaddr), pgd_t); |
376 | /* Toplevel not present? We can't map it in. */ | 377 | /* Toplevel not present? We can't map it in. */ |
377 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | 378 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) |
378 | kill_guest(lg, "Bad address %#lx", vaddr); | 379 | kill_guest(cpu->lg, "Bad address %#lx", vaddr); |
379 | 380 | ||
380 | gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); | 381 | gpte = lgread(cpu->lg, gpte_addr(cpu->lg, gpgd, vaddr), pte_t); |
381 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 382 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
382 | kill_guest(lg, "Bad address %#lx", vaddr); | 383 | kill_guest(cpu->lg, "Bad address %#lx", vaddr); |
383 | 384 | ||
384 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); | 385 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); |
385 | } | 386 | } |
@@ -399,11 +400,12 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | |||
399 | /*H:435 And this is us, creating the new page directory. If we really do | 400 | /*H:435 And this is us, creating the new page directory. If we really do |
400 | * allocate a new one (and so the kernel parts are not there), we set | 401 | * allocate a new one (and so the kernel parts are not there), we set |
401 | * blank_pgdir. */ | 402 | * blank_pgdir. */ |
402 | static unsigned int new_pgdir(struct lguest *lg, | 403 | static unsigned int new_pgdir(struct lg_cpu *cpu, |
403 | unsigned long gpgdir, | 404 | unsigned long gpgdir, |
404 | int *blank_pgdir) | 405 | int *blank_pgdir) |
405 | { | 406 | { |
406 | unsigned int next; | 407 | unsigned int next; |
408 | struct lguest *lg = cpu->lg; | ||
407 | 409 | ||
408 | /* We pick one entry at random to throw out. Choosing the Least | 410 | /* We pick one entry at random to throw out. Choosing the Least |
409 | * Recently Used might be better, but this is easy. */ | 411 | * Recently Used might be better, but this is easy. */ |
@@ -413,7 +415,7 @@ static unsigned int new_pgdir(struct lguest *lg, | |||
413 | lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); | 415 | lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); |
414 | /* If the allocation fails, just keep using the one we have */ | 416 | /* If the allocation fails, just keep using the one we have */ |
415 | if (!lg->pgdirs[next].pgdir) | 417 | if (!lg->pgdirs[next].pgdir) |
416 | next = lg->pgdidx; | 418 | next = cpu->cpu_pgd; |
417 | else | 419 | else |
418 | /* This is a blank page, so there are no kernel | 420 | /* This is a blank page, so there are no kernel |
419 | * mappings: caller must map the stack! */ | 421 | * mappings: caller must map the stack! */ |
@@ -442,9 +444,9 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) | |||
442 | /* If not, we allocate or mug an existing one: if it's a fresh one, | 444 | /* If not, we allocate or mug an existing one: if it's a fresh one, |
443 | * repin gets set to 1. */ | 445 | * repin gets set to 1. */ |
444 | if (newpgdir == ARRAY_SIZE(lg->pgdirs)) | 446 | if (newpgdir == ARRAY_SIZE(lg->pgdirs)) |
445 | newpgdir = new_pgdir(lg, pgtable, &repin); | 447 | newpgdir = new_pgdir(cpu, pgtable, &repin); |
446 | /* Change the current pgd index to the new one. */ | 448 | /* Change the current pgd index to the new one. */ |
447 | lg->pgdidx = newpgdir; | 449 | cpu->cpu_pgd = newpgdir; |
448 | /* If it was completely blank, we map in the Guest kernel stack */ | 450 | /* If it was completely blank, we map in the Guest kernel stack */ |
449 | if (repin) | 451 | if (repin) |
450 | pin_stack_pages(cpu); | 452 | pin_stack_pages(cpu); |
@@ -591,11 +593,11 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) | |||
591 | { | 593 | { |
592 | /* We start on the first shadow page table, and give it a blank PGD | 594 | /* We start on the first shadow page table, and give it a blank PGD |
593 | * page. */ | 595 | * page. */ |
594 | lg->pgdidx = 0; | 596 | lg->pgdirs[0].gpgdir = pgtable; |
595 | lg->pgdirs[lg->pgdidx].gpgdir = pgtable; | 597 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); |
596 | lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); | 598 | if (!lg->pgdirs[0].pgdir) |
597 | if (!lg->pgdirs[lg->pgdidx].pgdir) | ||
598 | return -ENOMEM; | 599 | return -ENOMEM; |
600 | lg->cpus[0].cpu_pgd = 0; | ||
599 | return 0; | 601 | return 0; |
600 | } | 602 | } |
601 | 603 | ||
@@ -607,7 +609,7 @@ void page_table_guest_data_init(struct lguest *lg) | |||
607 | /* We tell the Guest that it can't use the top 4MB of virtual | 609 | /* We tell the Guest that it can't use the top 4MB of virtual |
608 | * addresses used by the Switcher. */ | 610 | * addresses used by the Switcher. */ |
609 | || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) | 611 | || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) |
610 | || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) | 612 | || put_user(lg->pgdirs[0].gpgdir, &lg->lguest_data->pgdir)) |
611 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | 613 | kill_guest(lg, "bad guest page %p", lg->lguest_data); |
612 | 614 | ||
613 | /* In flush_user_mappings() we loop from 0 to | 615 | /* In flush_user_mappings() we loop from 0 to |
@@ -637,7 +639,6 @@ void free_guest_pagetable(struct lguest *lg) | |||
637 | * Guest is about to run on this CPU. */ | 639 | * Guest is about to run on this CPU. */ |
638 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | 640 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) |
639 | { | 641 | { |
640 | struct lguest *lg = cpu->lg; | ||
641 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); | 642 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); |
642 | pgd_t switcher_pgd; | 643 | pgd_t switcher_pgd; |
643 | pte_t regs_pte; | 644 | pte_t regs_pte; |
@@ -647,7 +648,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
647 | * page for this CPU (with appropriate flags). */ | 648 | * page for this CPU (with appropriate flags). */ |
648 | switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); | 649 | switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); |
649 | 650 | ||
650 | lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; | 651 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; |
651 | 652 | ||
652 | /* We also change the Switcher PTE page. When we're running the Guest, | 653 | /* We also change the Switcher PTE page. When we're running the Guest, |
653 | * we want the Guest's "regs" page to appear where the first Switcher | 654 | * we want the Guest's "regs" page to appear where the first Switcher |