diff options
Diffstat (limited to 'drivers/lguest/page_tables.c')
| -rw-r--r-- | drivers/lguest/page_tables.c | 179 |
1 files changed, 92 insertions, 87 deletions
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index fffabb327157..74b4cf2a6c41 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
| @@ -68,23 +68,23 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | |||
| 68 | * page directory entry (PGD) for that address. Since we keep track of several | 68 | * page directory entry (PGD) for that address. Since we keep track of several |
| 69 | * page tables, the "i" argument tells us which one we're interested in (it's | 69 | * page tables, the "i" argument tells us which one we're interested in (it's |
| 70 | * usually the current one). */ | 70 | * usually the current one). */ |
| 71 | static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) | 71 | static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) |
| 72 | { | 72 | { |
| 73 | unsigned int index = pgd_index(vaddr); | 73 | unsigned int index = pgd_index(vaddr); |
| 74 | 74 | ||
| 75 | /* We kill any Guest trying to touch the Switcher addresses. */ | 75 | /* We kill any Guest trying to touch the Switcher addresses. */ |
| 76 | if (index >= SWITCHER_PGD_INDEX) { | 76 | if (index >= SWITCHER_PGD_INDEX) { |
| 77 | kill_guest(lg, "attempt to access switcher pages"); | 77 | kill_guest(cpu, "attempt to access switcher pages"); |
| 78 | index = 0; | 78 | index = 0; |
| 79 | } | 79 | } |
| 80 | /* Return a pointer index'th pgd entry for the i'th page table. */ | 80 | /* Return a pointer index'th pgd entry for the i'th page table. */ |
| 81 | return &lg->pgdirs[i].pgdir[index]; | 81 | return &cpu->lg->pgdirs[i].pgdir[index]; |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | /* This routine then takes the page directory entry returned above, which | 84 | /* This routine then takes the page directory entry returned above, which |
| 85 | * contains the address of the page table entry (PTE) page. It then returns a | 85 | * contains the address of the page table entry (PTE) page. It then returns a |
| 86 | * pointer to the PTE entry for the given address. */ | 86 | * pointer to the PTE entry for the given address. */ |
| 87 | static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) | 87 | static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr) |
| 88 | { | 88 | { |
| 89 | pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); | 89 | pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); |
| 90 | /* You should never call this if the PGD entry wasn't valid */ | 90 | /* You should never call this if the PGD entry wasn't valid */ |
| @@ -94,14 +94,13 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) | |||
| 94 | 94 | ||
| 95 | /* These two functions just like the above two, except they access the Guest | 95 | /* These two functions just like the above two, except they access the Guest |
| 96 | * page tables. Hence they return a Guest address. */ | 96 | * page tables. Hence they return a Guest address. */ |
| 97 | static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) | 97 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) |
| 98 | { | 98 | { |
| 99 | unsigned int index = vaddr >> (PGDIR_SHIFT); | 99 | unsigned int index = vaddr >> (PGDIR_SHIFT); |
| 100 | return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); | 100 | return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | static unsigned long gpte_addr(struct lguest *lg, | 103 | static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) |
| 104 | pgd_t gpgd, unsigned long vaddr) | ||
| 105 | { | 104 | { |
| 106 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; | 105 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; |
| 107 | BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); | 106 | BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); |
| @@ -138,7 +137,7 @@ static unsigned long get_pfn(unsigned long virtpfn, int write) | |||
| 138 | * entry can be a little tricky. The flags are (almost) the same, but the | 137 | * entry can be a little tricky. The flags are (almost) the same, but the |
| 139 | * Guest PTE contains a virtual page number: the CPU needs the real page | 138 | * Guest PTE contains a virtual page number: the CPU needs the real page |
| 140 | * number. */ | 139 | * number. */ |
| 141 | static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) | 140 | static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) |
| 142 | { | 141 | { |
| 143 | unsigned long pfn, base, flags; | 142 | unsigned long pfn, base, flags; |
| 144 | 143 | ||
| @@ -149,7 +148,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) | |||
| 149 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); | 148 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); |
| 150 | 149 | ||
| 151 | /* The Guest's pages are offset inside the Launcher. */ | 150 | /* The Guest's pages are offset inside the Launcher. */ |
| 152 | base = (unsigned long)lg->mem_base / PAGE_SIZE; | 151 | base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; |
| 153 | 152 | ||
| 154 | /* We need a temporary "unsigned long" variable to hold the answer from | 153 | /* We need a temporary "unsigned long" variable to hold the answer from |
| 155 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't | 154 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't |
| @@ -157,7 +156,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) | |||
| 157 | * page, given the virtual number. */ | 156 | * page, given the virtual number. */ |
| 158 | pfn = get_pfn(base + pte_pfn(gpte), write); | 157 | pfn = get_pfn(base + pte_pfn(gpte), write); |
| 159 | if (pfn == -1UL) { | 158 | if (pfn == -1UL) { |
| 160 | kill_guest(lg, "failed to get page %lu", pte_pfn(gpte)); | 159 | kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); |
| 161 | /* When we destroy the Guest, we'll go through the shadow page | 160 | /* When we destroy the Guest, we'll go through the shadow page |
| 162 | * tables and release_pte() them. Make sure we don't think | 161 | * tables and release_pte() them. Make sure we don't think |
| 163 | * this one is valid! */ | 162 | * this one is valid! */ |
| @@ -177,17 +176,18 @@ static void release_pte(pte_t pte) | |||
| 177 | } | 176 | } |
| 178 | /*:*/ | 177 | /*:*/ |
| 179 | 178 | ||
| 180 | static void check_gpte(struct lguest *lg, pte_t gpte) | 179 | static void check_gpte(struct lg_cpu *cpu, pte_t gpte) |
| 181 | { | 180 | { |
| 182 | if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) | 181 | if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) |
| 183 | || pte_pfn(gpte) >= lg->pfn_limit) | 182 | || pte_pfn(gpte) >= cpu->lg->pfn_limit) |
| 184 | kill_guest(lg, "bad page table entry"); | 183 | kill_guest(cpu, "bad page table entry"); |
| 185 | } | 184 | } |
| 186 | 185 | ||
| 187 | static void check_gpgd(struct lguest *lg, pgd_t gpgd) | 186 | static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) |
| 188 | { | 187 | { |
| 189 | if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit) | 188 | if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || |
| 190 | kill_guest(lg, "bad page directory entry"); | 189 | (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) |
| 190 | kill_guest(cpu, "bad page directory entry"); | ||
| 191 | } | 191 | } |
| 192 | 192 | ||
| 193 | /*H:330 | 193 | /*H:330 |
| @@ -200,7 +200,7 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd) | |||
| 200 | * | 200 | * |
| 201 | * If we fixed up the fault (ie. we mapped the address), this routine returns | 201 | * If we fixed up the fault (ie. we mapped the address), this routine returns |
| 202 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ | 202 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ |
| 203 | int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | 203 | int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) |
| 204 | { | 204 | { |
| 205 | pgd_t gpgd; | 205 | pgd_t gpgd; |
| 206 | pgd_t *spgd; | 206 | pgd_t *spgd; |
| @@ -209,24 +209,24 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
| 209 | pte_t *spte; | 209 | pte_t *spte; |
| 210 | 210 | ||
| 211 | /* First step: get the top-level Guest page table entry. */ | 211 | /* First step: get the top-level Guest page table entry. */ |
| 212 | gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); | 212 | gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); |
| 213 | /* Toplevel not present? We can't map it in. */ | 213 | /* Toplevel not present? We can't map it in. */ |
| 214 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | 214 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) |
| 215 | return 0; | 215 | return 0; |
| 216 | 216 | ||
| 217 | /* Now look at the matching shadow entry. */ | 217 | /* Now look at the matching shadow entry. */ |
| 218 | spgd = spgd_addr(lg, lg->pgdidx, vaddr); | 218 | spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); |
| 219 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { | 219 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { |
| 220 | /* No shadow entry: allocate a new shadow PTE page. */ | 220 | /* No shadow entry: allocate a new shadow PTE page. */ |
| 221 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 221 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
| 222 | /* This is not really the Guest's fault, but killing it is | 222 | /* This is not really the Guest's fault, but killing it is |
| 223 | * simple for this corner case. */ | 223 | * simple for this corner case. */ |
| 224 | if (!ptepage) { | 224 | if (!ptepage) { |
| 225 | kill_guest(lg, "out of memory allocating pte page"); | 225 | kill_guest(cpu, "out of memory allocating pte page"); |
| 226 | return 0; | 226 | return 0; |
| 227 | } | 227 | } |
| 228 | /* We check that the Guest pgd is OK. */ | 228 | /* We check that the Guest pgd is OK. */ |
| 229 | check_gpgd(lg, gpgd); | 229 | check_gpgd(cpu, gpgd); |
| 230 | /* And we copy the flags to the shadow PGD entry. The page | 230 | /* And we copy the flags to the shadow PGD entry. The page |
| 231 | * number in the shadow PGD is the page we just allocated. */ | 231 | * number in the shadow PGD is the page we just allocated. */ |
| 232 | *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); | 232 | *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); |
| @@ -234,8 +234,8 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
| 234 | 234 | ||
| 235 | /* OK, now we look at the lower level in the Guest page table: keep its | 235 | /* OK, now we look at the lower level in the Guest page table: keep its |
| 236 | * address, because we might update it later. */ | 236 | * address, because we might update it later. */ |
| 237 | gpte_ptr = gpte_addr(lg, gpgd, vaddr); | 237 | gpte_ptr = gpte_addr(gpgd, vaddr); |
| 238 | gpte = lgread(lg, gpte_ptr, pte_t); | 238 | gpte = lgread(cpu, gpte_ptr, pte_t); |
| 239 | 239 | ||
| 240 | /* If this page isn't in the Guest page tables, we can't page it in. */ | 240 | /* If this page isn't in the Guest page tables, we can't page it in. */ |
| 241 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 241 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
| @@ -252,7 +252,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
| 252 | 252 | ||
| 253 | /* Check that the Guest PTE flags are OK, and the page number is below | 253 | /* Check that the Guest PTE flags are OK, and the page number is below |
| 254 | * the pfn_limit (ie. not mapping the Launcher binary). */ | 254 | * the pfn_limit (ie. not mapping the Launcher binary). */ |
| 255 | check_gpte(lg, gpte); | 255 | check_gpte(cpu, gpte); |
| 256 | 256 | ||
| 257 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ | 257 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ |
| 258 | gpte = pte_mkyoung(gpte); | 258 | gpte = pte_mkyoung(gpte); |
| @@ -260,7 +260,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
| 260 | gpte = pte_mkdirty(gpte); | 260 | gpte = pte_mkdirty(gpte); |
| 261 | 261 | ||
| 262 | /* Get the pointer to the shadow PTE entry we're going to set. */ | 262 | /* Get the pointer to the shadow PTE entry we're going to set. */ |
| 263 | spte = spte_addr(lg, *spgd, vaddr); | 263 | spte = spte_addr(*spgd, vaddr); |
| 264 | /* If there was a valid shadow PTE entry here before, we release it. | 264 | /* If there was a valid shadow PTE entry here before, we release it. |
| 265 | * This can happen with a write to a previously read-only entry. */ | 265 | * This can happen with a write to a previously read-only entry. */ |
| 266 | release_pte(*spte); | 266 | release_pte(*spte); |
| @@ -268,17 +268,17 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
| 268 | /* If this is a write, we insist that the Guest page is writable (the | 268 | /* If this is a write, we insist that the Guest page is writable (the |
| 269 | * final arg to gpte_to_spte()). */ | 269 | * final arg to gpte_to_spte()). */ |
| 270 | if (pte_dirty(gpte)) | 270 | if (pte_dirty(gpte)) |
| 271 | *spte = gpte_to_spte(lg, gpte, 1); | 271 | *spte = gpte_to_spte(cpu, gpte, 1); |
| 272 | else | 272 | else |
| 273 | /* If this is a read, don't set the "writable" bit in the page | 273 | /* If this is a read, don't set the "writable" bit in the page |
| 274 | * table entry, even if the Guest says it's writable. That way | 274 | * table entry, even if the Guest says it's writable. That way |
| 275 | * we will come back here when a write does actually occur, so | 275 | * we will come back here when a write does actually occur, so |
| 276 | * we can update the Guest's _PAGE_DIRTY flag. */ | 276 | * we can update the Guest's _PAGE_DIRTY flag. */ |
| 277 | *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0); | 277 | *spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0); |
| 278 | 278 | ||
| 279 | /* Finally, we write the Guest PTE entry back: we've set the | 279 | /* Finally, we write the Guest PTE entry back: we've set the |
| 280 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ | 280 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ |
| 281 | lgwrite(lg, gpte_ptr, pte_t, gpte); | 281 | lgwrite(cpu, gpte_ptr, pte_t, gpte); |
| 282 | 282 | ||
| 283 | /* The fault is fixed, the page table is populated, the mapping | 283 | /* The fault is fixed, the page table is populated, the mapping |
| 284 | * manipulated, the result returned and the code complete. A small | 284 | * manipulated, the result returned and the code complete. A small |
| @@ -297,19 +297,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
| 297 | * | 297 | * |
| 298 | * This is a quick version which answers the question: is this virtual address | 298 | * This is a quick version which answers the question: is this virtual address |
| 299 | * mapped by the shadow page tables, and is it writable? */ | 299 | * mapped by the shadow page tables, and is it writable? */ |
| 300 | static int page_writable(struct lguest *lg, unsigned long vaddr) | 300 | static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) |
| 301 | { | 301 | { |
| 302 | pgd_t *spgd; | 302 | pgd_t *spgd; |
| 303 | unsigned long flags; | 303 | unsigned long flags; |
| 304 | 304 | ||
| 305 | /* Look at the current top level entry: is it present? */ | 305 | /* Look at the current top level entry: is it present? */ |
| 306 | spgd = spgd_addr(lg, lg->pgdidx, vaddr); | 306 | spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); |
| 307 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) | 307 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) |
| 308 | return 0; | 308 | return 0; |
| 309 | 309 | ||
| 310 | /* Check the flags on the pte entry itself: it must be present and | 310 | /* Check the flags on the pte entry itself: it must be present and |
| 311 | * writable. */ | 311 | * writable. */ |
| 312 | flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); | 312 | flags = pte_flags(*(spte_addr(*spgd, vaddr))); |
| 313 | 313 | ||
| 314 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); | 314 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); |
| 315 | } | 315 | } |
| @@ -317,10 +317,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr) | |||
| 317 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already | 317 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already |
| 318 | * in the page tables, and if not, we call demand_page() with error code 2 | 318 | * in the page tables, and if not, we call demand_page() with error code 2 |
| 319 | * (meaning "write"). */ | 319 | * (meaning "write"). */ |
| 320 | void pin_page(struct lguest *lg, unsigned long vaddr) | 320 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) |
| 321 | { | 321 | { |
| 322 | if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) | 322 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) |
| 323 | kill_guest(lg, "bad stack page %#lx", vaddr); | 323 | kill_guest(cpu, "bad stack page %#lx", vaddr); |
| 324 | } | 324 | } |
| 325 | 325 | ||
| 326 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ | 326 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ |
| @@ -358,28 +358,28 @@ static void flush_user_mappings(struct lguest *lg, int idx) | |||
| 358 | * | 358 | * |
| 359 | * The Guest has a hypercall to throw away the page tables: it's used when a | 359 | * The Guest has a hypercall to throw away the page tables: it's used when a |
| 360 | * large number of mappings have been changed. */ | 360 | * large number of mappings have been changed. */ |
| 361 | void guest_pagetable_flush_user(struct lguest *lg) | 361 | void guest_pagetable_flush_user(struct lg_cpu *cpu) |
| 362 | { | 362 | { |
| 363 | /* Drop the userspace part of the current page table. */ | 363 | /* Drop the userspace part of the current page table. */ |
| 364 | flush_user_mappings(lg, lg->pgdidx); | 364 | flush_user_mappings(cpu->lg, cpu->cpu_pgd); |
| 365 | } | 365 | } |
| 366 | /*:*/ | 366 | /*:*/ |
| 367 | 367 | ||
| 368 | /* We walk down the guest page tables to get a guest-physical address */ | 368 | /* We walk down the guest page tables to get a guest-physical address */ |
| 369 | unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | 369 | unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) |
| 370 | { | 370 | { |
| 371 | pgd_t gpgd; | 371 | pgd_t gpgd; |
| 372 | pte_t gpte; | 372 | pte_t gpte; |
| 373 | 373 | ||
| 374 | /* First step: get the top-level Guest page table entry. */ | 374 | /* First step: get the top-level Guest page table entry. */ |
| 375 | gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); | 375 | gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); |
| 376 | /* Toplevel not present? We can't map it in. */ | 376 | /* Toplevel not present? We can't map it in. */ |
| 377 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | 377 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) |
| 378 | kill_guest(lg, "Bad address %#lx", vaddr); | 378 | kill_guest(cpu, "Bad address %#lx", vaddr); |
| 379 | 379 | ||
| 380 | gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); | 380 | gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t); |
| 381 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 381 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
| 382 | kill_guest(lg, "Bad address %#lx", vaddr); | 382 | kill_guest(cpu, "Bad address %#lx", vaddr); |
| 383 | 383 | ||
| 384 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); | 384 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); |
| 385 | } | 385 | } |
| @@ -399,7 +399,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | |||
| 399 | /*H:435 And this is us, creating the new page directory. If we really do | 399 | /*H:435 And this is us, creating the new page directory. If we really do |
| 400 | * allocate a new one (and so the kernel parts are not there), we set | 400 | * allocate a new one (and so the kernel parts are not there), we set |
| 401 | * blank_pgdir. */ | 401 | * blank_pgdir. */ |
| 402 | static unsigned int new_pgdir(struct lguest *lg, | 402 | static unsigned int new_pgdir(struct lg_cpu *cpu, |
| 403 | unsigned long gpgdir, | 403 | unsigned long gpgdir, |
| 404 | int *blank_pgdir) | 404 | int *blank_pgdir) |
| 405 | { | 405 | { |
| @@ -407,22 +407,23 @@ static unsigned int new_pgdir(struct lguest *lg, | |||
| 407 | 407 | ||
| 408 | /* We pick one entry at random to throw out. Choosing the Least | 408 | /* We pick one entry at random to throw out. Choosing the Least |
| 409 | * Recently Used might be better, but this is easy. */ | 409 | * Recently Used might be better, but this is easy. */ |
| 410 | next = random32() % ARRAY_SIZE(lg->pgdirs); | 410 | next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); |
| 411 | /* If it's never been allocated at all before, try now. */ | 411 | /* If it's never been allocated at all before, try now. */ |
| 412 | if (!lg->pgdirs[next].pgdir) { | 412 | if (!cpu->lg->pgdirs[next].pgdir) { |
| 413 | lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); | 413 | cpu->lg->pgdirs[next].pgdir = |
| 414 | (pgd_t *)get_zeroed_page(GFP_KERNEL); | ||
| 414 | /* If the allocation fails, just keep using the one we have */ | 415 | /* If the allocation fails, just keep using the one we have */ |
| 415 | if (!lg->pgdirs[next].pgdir) | 416 | if (!cpu->lg->pgdirs[next].pgdir) |
| 416 | next = lg->pgdidx; | 417 | next = cpu->cpu_pgd; |
| 417 | else | 418 | else |
| 418 | /* This is a blank page, so there are no kernel | 419 | /* This is a blank page, so there are no kernel |
| 419 | * mappings: caller must map the stack! */ | 420 | * mappings: caller must map the stack! */ |
| 420 | *blank_pgdir = 1; | 421 | *blank_pgdir = 1; |
| 421 | } | 422 | } |
| 422 | /* Record which Guest toplevel this shadows. */ | 423 | /* Record which Guest toplevel this shadows. */ |
| 423 | lg->pgdirs[next].gpgdir = gpgdir; | 424 | cpu->lg->pgdirs[next].gpgdir = gpgdir; |
| 424 | /* Release all the non-kernel mappings. */ | 425 | /* Release all the non-kernel mappings. */ |
| 425 | flush_user_mappings(lg, next); | 426 | flush_user_mappings(cpu->lg, next); |
| 426 | 427 | ||
| 427 | return next; | 428 | return next; |
| 428 | } | 429 | } |
| @@ -432,21 +433,21 @@ static unsigned int new_pgdir(struct lguest *lg, | |||
| 432 | * Now we've seen all the page table setting and manipulation, let's see what | 433 | * Now we've seen all the page table setting and manipulation, let's see what |
| 433 | * what happens when the Guest changes page tables (ie. changes the top-level | 434 | * what happens when the Guest changes page tables (ie. changes the top-level |
| 434 | * pgdir). This occurs on almost every context switch. */ | 435 | * pgdir). This occurs on almost every context switch. */ |
| 435 | void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) | 436 | void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) |
| 436 | { | 437 | { |
| 437 | int newpgdir, repin = 0; | 438 | int newpgdir, repin = 0; |
| 438 | 439 | ||
| 439 | /* Look to see if we have this one already. */ | 440 | /* Look to see if we have this one already. */ |
| 440 | newpgdir = find_pgdir(lg, pgtable); | 441 | newpgdir = find_pgdir(cpu->lg, pgtable); |
| 441 | /* If not, we allocate or mug an existing one: if it's a fresh one, | 442 | /* If not, we allocate or mug an existing one: if it's a fresh one, |
| 442 | * repin gets set to 1. */ | 443 | * repin gets set to 1. */ |
| 443 | if (newpgdir == ARRAY_SIZE(lg->pgdirs)) | 444 | if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) |
| 444 | newpgdir = new_pgdir(lg, pgtable, &repin); | 445 | newpgdir = new_pgdir(cpu, pgtable, &repin); |
| 445 | /* Change the current pgd index to the new one. */ | 446 | /* Change the current pgd index to the new one. */ |
| 446 | lg->pgdidx = newpgdir; | 447 | cpu->cpu_pgd = newpgdir; |
| 447 | /* If it was completely blank, we map in the Guest kernel stack */ | 448 | /* If it was completely blank, we map in the Guest kernel stack */ |
| 448 | if (repin) | 449 | if (repin) |
| 449 | pin_stack_pages(lg); | 450 | pin_stack_pages(cpu); |
| 450 | } | 451 | } |
| 451 | 452 | ||
| 452 | /*H:470 Finally, a routine which throws away everything: all PGD entries in all | 453 | /*H:470 Finally, a routine which throws away everything: all PGD entries in all |
| @@ -468,11 +469,11 @@ static void release_all_pagetables(struct lguest *lg) | |||
| 468 | * mapping. Since kernel mappings are in every page table, it's easiest to | 469 | * mapping. Since kernel mappings are in every page table, it's easiest to |
| 469 | * throw them all away. This traps the Guest in amber for a while as | 470 | * throw them all away. This traps the Guest in amber for a while as |
| 470 | * everything faults back in, but it's rare. */ | 471 | * everything faults back in, but it's rare. */ |
| 471 | void guest_pagetable_clear_all(struct lguest *lg) | 472 | void guest_pagetable_clear_all(struct lg_cpu *cpu) |
| 472 | { | 473 | { |
| 473 | release_all_pagetables(lg); | 474 | release_all_pagetables(cpu->lg); |
| 474 | /* We need the Guest kernel stack mapped again. */ | 475 | /* We need the Guest kernel stack mapped again. */ |
| 475 | pin_stack_pages(lg); | 476 | pin_stack_pages(cpu); |
| 476 | } | 477 | } |
| 477 | /*:*/ | 478 | /*:*/ |
| 478 | /*M:009 Since we throw away all mappings when a kernel mapping changes, our | 479 | /*M:009 Since we throw away all mappings when a kernel mapping changes, our |
| @@ -497,24 +498,24 @@ void guest_pagetable_clear_all(struct lguest *lg) | |||
| 497 | * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if | 498 | * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if |
| 498 | * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. | 499 | * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. |
| 499 | */ | 500 | */ |
| 500 | static void do_set_pte(struct lguest *lg, int idx, | 501 | static void do_set_pte(struct lg_cpu *cpu, int idx, |
| 501 | unsigned long vaddr, pte_t gpte) | 502 | unsigned long vaddr, pte_t gpte) |
| 502 | { | 503 | { |
| 503 | /* Look up the matching shadow page directory entry. */ | 504 | /* Look up the matching shadow page directory entry. */ |
| 504 | pgd_t *spgd = spgd_addr(lg, idx, vaddr); | 505 | pgd_t *spgd = spgd_addr(cpu, idx, vaddr); |
| 505 | 506 | ||
| 506 | /* If the top level isn't present, there's no entry to update. */ | 507 | /* If the top level isn't present, there's no entry to update. */ |
| 507 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { | 508 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { |
| 508 | /* Otherwise, we start by releasing the existing entry. */ | 509 | /* Otherwise, we start by releasing the existing entry. */ |
| 509 | pte_t *spte = spte_addr(lg, *spgd, vaddr); | 510 | pte_t *spte = spte_addr(*spgd, vaddr); |
| 510 | release_pte(*spte); | 511 | release_pte(*spte); |
| 511 | 512 | ||
| 512 | /* If they're setting this entry as dirty or accessed, we might | 513 | /* If they're setting this entry as dirty or accessed, we might |
| 513 | * as well put that entry they've given us in now. This shaves | 514 | * as well put that entry they've given us in now. This shaves |
| 514 | * 10% off a copy-on-write micro-benchmark. */ | 515 | * 10% off a copy-on-write micro-benchmark. */ |
| 515 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { | 516 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { |
| 516 | check_gpte(lg, gpte); | 517 | check_gpte(cpu, gpte); |
| 517 | *spte = gpte_to_spte(lg, gpte, | 518 | *spte = gpte_to_spte(cpu, gpte, |
| 518 | pte_flags(gpte) & _PAGE_DIRTY); | 519 | pte_flags(gpte) & _PAGE_DIRTY); |
| 519 | } else | 520 | } else |
| 520 | /* Otherwise kill it and we can demand_page() it in | 521 | /* Otherwise kill it and we can demand_page() it in |
| @@ -533,22 +534,22 @@ static void do_set_pte(struct lguest *lg, int idx, | |||
| 533 | * | 534 | * |
| 534 | * The benefit is that when we have to track a new page table, we can copy keep | 535 | * The benefit is that when we have to track a new page table, we can copy keep |
| 535 | * all the kernel mappings. This speeds up context switch immensely. */ | 536 | * all the kernel mappings. This speeds up context switch immensely. */ |
| 536 | void guest_set_pte(struct lguest *lg, | 537 | void guest_set_pte(struct lg_cpu *cpu, |
| 537 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) | 538 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) |
| 538 | { | 539 | { |
| 539 | /* Kernel mappings must be changed on all top levels. Slow, but | 540 | /* Kernel mappings must be changed on all top levels. Slow, but |
| 540 | * doesn't happen often. */ | 541 | * doesn't happen often. */ |
| 541 | if (vaddr >= lg->kernel_address) { | 542 | if (vaddr >= cpu->lg->kernel_address) { |
| 542 | unsigned int i; | 543 | unsigned int i; |
| 543 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) | 544 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) |
| 544 | if (lg->pgdirs[i].pgdir) | 545 | if (cpu->lg->pgdirs[i].pgdir) |
| 545 | do_set_pte(lg, i, vaddr, gpte); | 546 | do_set_pte(cpu, i, vaddr, gpte); |
| 546 | } else { | 547 | } else { |
| 547 | /* Is this page table one we have a shadow for? */ | 548 | /* Is this page table one we have a shadow for? */ |
| 548 | int pgdir = find_pgdir(lg, gpgdir); | 549 | int pgdir = find_pgdir(cpu->lg, gpgdir); |
| 549 | if (pgdir != ARRAY_SIZE(lg->pgdirs)) | 550 | if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs)) |
| 550 | /* If so, do the update. */ | 551 | /* If so, do the update. */ |
| 551 | do_set_pte(lg, pgdir, vaddr, gpte); | 552 | do_set_pte(cpu, pgdir, vaddr, gpte); |
| 552 | } | 553 | } |
| 553 | } | 554 | } |
| 554 | 555 | ||
| @@ -590,30 +591,32 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) | |||
| 590 | { | 591 | { |
| 591 | /* We start on the first shadow page table, and give it a blank PGD | 592 | /* We start on the first shadow page table, and give it a blank PGD |
| 592 | * page. */ | 593 | * page. */ |
| 593 | lg->pgdidx = 0; | 594 | lg->pgdirs[0].gpgdir = pgtable; |
| 594 | lg->pgdirs[lg->pgdidx].gpgdir = pgtable; | 595 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); |
| 595 | lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); | 596 | if (!lg->pgdirs[0].pgdir) |
| 596 | if (!lg->pgdirs[lg->pgdidx].pgdir) | ||
| 597 | return -ENOMEM; | 597 | return -ENOMEM; |
| 598 | lg->cpus[0].cpu_pgd = 0; | ||
| 598 | return 0; | 599 | return 0; |
| 599 | } | 600 | } |
| 600 | 601 | ||
| 601 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ | 602 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ |
| 602 | void page_table_guest_data_init(struct lguest *lg) | 603 | void page_table_guest_data_init(struct lg_cpu *cpu) |
| 603 | { | 604 | { |
| 604 | /* We get the kernel address: above this is all kernel memory. */ | 605 | /* We get the kernel address: above this is all kernel memory. */ |
| 605 | if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address) | 606 | if (get_user(cpu->lg->kernel_address, |
| 607 | &cpu->lg->lguest_data->kernel_address) | ||
| 606 | /* We tell the Guest that it can't use the top 4MB of virtual | 608 | /* We tell the Guest that it can't use the top 4MB of virtual |
| 607 | * addresses used by the Switcher. */ | 609 | * addresses used by the Switcher. */ |
| 608 | || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) | 610 | || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem) |
| 609 | || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) | 611 | || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir)) |
| 610 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | 612 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
| 611 | 613 | ||
| 612 | /* In flush_user_mappings() we loop from 0 to | 614 | /* In flush_user_mappings() we loop from 0 to |
| 613 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the | 615 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the |
| 614 | * Switcher mappings, so check that now. */ | 616 | * Switcher mappings, so check that now. */ |
| 615 | if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX) | 617 | if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX) |
| 616 | kill_guest(lg, "bad kernel address %#lx", lg->kernel_address); | 618 | kill_guest(cpu, "bad kernel address %#lx", |
| 619 | cpu->lg->kernel_address); | ||
| 617 | } | 620 | } |
| 618 | 621 | ||
| 619 | /* When a Guest dies, our cleanup is fairly simple. */ | 622 | /* When a Guest dies, our cleanup is fairly simple. */ |
| @@ -634,17 +637,18 @@ void free_guest_pagetable(struct lguest *lg) | |||
| 634 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages | 637 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages |
| 635 | * for each CPU already set up, we just need to hook them in now we know which | 638 | * for each CPU already set up, we just need to hook them in now we know which |
| 636 | * Guest is about to run on this CPU. */ | 639 | * Guest is about to run on this CPU. */ |
| 637 | void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) | 640 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) |
| 638 | { | 641 | { |
| 639 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); | 642 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); |
| 640 | pgd_t switcher_pgd; | 643 | pgd_t switcher_pgd; |
| 641 | pte_t regs_pte; | 644 | pte_t regs_pte; |
| 645 | unsigned long pfn; | ||
| 642 | 646 | ||
| 643 | /* Make the last PGD entry for this Guest point to the Switcher's PTE | 647 | /* Make the last PGD entry for this Guest point to the Switcher's PTE |
| 644 | * page for this CPU (with appropriate flags). */ | 648 | * page for this CPU (with appropriate flags). */ |
| 645 | switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); | 649 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL); |
| 646 | 650 | ||
| 647 | lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; | 651 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; |
| 648 | 652 | ||
| 649 | /* We also change the Switcher PTE page. When we're running the Guest, | 653 | /* We also change the Switcher PTE page. When we're running the Guest, |
| 650 | * we want the Guest's "regs" page to appear where the first Switcher | 654 | * we want the Guest's "regs" page to appear where the first Switcher |
| @@ -653,7 +657,8 @@ void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) | |||
| 653 | * CPU's "struct lguest_pages": if we make sure the Guest's register | 657 | * CPU's "struct lguest_pages": if we make sure the Guest's register |
| 654 | * page is already mapped there, we don't have to copy them out | 658 | * page is already mapped there, we don't have to copy them out |
| 655 | * again. */ | 659 | * again. */ |
| 656 | regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL)); | 660 | pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; |
| 661 | regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL)); | ||
| 657 | switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; | 662 | switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; |
| 658 | } | 663 | } |
| 659 | /*:*/ | 664 | /*:*/ |
