diff options
Diffstat (limited to 'drivers/lguest/page_tables.c')
-rw-r--r-- | drivers/lguest/page_tables.c | 179 |
1 files changed, 92 insertions, 87 deletions
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index fffabb327157..74b4cf2a6c41 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -68,23 +68,23 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | |||
68 | * page directory entry (PGD) for that address. Since we keep track of several | 68 | * page directory entry (PGD) for that address. Since we keep track of several |
69 | * page tables, the "i" argument tells us which one we're interested in (it's | 69 | * page tables, the "i" argument tells us which one we're interested in (it's |
70 | * usually the current one). */ | 70 | * usually the current one). */ |
71 | static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) | 71 | static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) |
72 | { | 72 | { |
73 | unsigned int index = pgd_index(vaddr); | 73 | unsigned int index = pgd_index(vaddr); |
74 | 74 | ||
75 | /* We kill any Guest trying to touch the Switcher addresses. */ | 75 | /* We kill any Guest trying to touch the Switcher addresses. */ |
76 | if (index >= SWITCHER_PGD_INDEX) { | 76 | if (index >= SWITCHER_PGD_INDEX) { |
77 | kill_guest(lg, "attempt to access switcher pages"); | 77 | kill_guest(cpu, "attempt to access switcher pages"); |
78 | index = 0; | 78 | index = 0; |
79 | } | 79 | } |
80 | /* Return a pointer index'th pgd entry for the i'th page table. */ | 80 | /* Return a pointer index'th pgd entry for the i'th page table. */ |
81 | return &lg->pgdirs[i].pgdir[index]; | 81 | return &cpu->lg->pgdirs[i].pgdir[index]; |
82 | } | 82 | } |
83 | 83 | ||
84 | /* This routine then takes the page directory entry returned above, which | 84 | /* This routine then takes the page directory entry returned above, which |
85 | * contains the address of the page table entry (PTE) page. It then returns a | 85 | * contains the address of the page table entry (PTE) page. It then returns a |
86 | * pointer to the PTE entry for the given address. */ | 86 | * pointer to the PTE entry for the given address. */ |
87 | static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) | 87 | static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr) |
88 | { | 88 | { |
89 | pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); | 89 | pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); |
90 | /* You should never call this if the PGD entry wasn't valid */ | 90 | /* You should never call this if the PGD entry wasn't valid */ |
@@ -94,14 +94,13 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) | |||
94 | 94 | ||
95 | /* These two functions just like the above two, except they access the Guest | 95 | /* These two functions just like the above two, except they access the Guest |
96 | * page tables. Hence they return a Guest address. */ | 96 | * page tables. Hence they return a Guest address. */ |
97 | static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) | 97 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) |
98 | { | 98 | { |
99 | unsigned int index = vaddr >> (PGDIR_SHIFT); | 99 | unsigned int index = vaddr >> (PGDIR_SHIFT); |
100 | return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); | 100 | return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t); |
101 | } | 101 | } |
102 | 102 | ||
103 | static unsigned long gpte_addr(struct lguest *lg, | 103 | static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) |
104 | pgd_t gpgd, unsigned long vaddr) | ||
105 | { | 104 | { |
106 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; | 105 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; |
107 | BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); | 106 | BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); |
@@ -138,7 +137,7 @@ static unsigned long get_pfn(unsigned long virtpfn, int write) | |||
138 | * entry can be a little tricky. The flags are (almost) the same, but the | 137 | * entry can be a little tricky. The flags are (almost) the same, but the |
139 | * Guest PTE contains a virtual page number: the CPU needs the real page | 138 | * Guest PTE contains a virtual page number: the CPU needs the real page |
140 | * number. */ | 139 | * number. */ |
141 | static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) | 140 | static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) |
142 | { | 141 | { |
143 | unsigned long pfn, base, flags; | 142 | unsigned long pfn, base, flags; |
144 | 143 | ||
@@ -149,7 +148,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) | |||
149 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); | 148 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); |
150 | 149 | ||
151 | /* The Guest's pages are offset inside the Launcher. */ | 150 | /* The Guest's pages are offset inside the Launcher. */ |
152 | base = (unsigned long)lg->mem_base / PAGE_SIZE; | 151 | base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; |
153 | 152 | ||
154 | /* We need a temporary "unsigned long" variable to hold the answer from | 153 | /* We need a temporary "unsigned long" variable to hold the answer from |
155 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't | 154 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't |
@@ -157,7 +156,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) | |||
157 | * page, given the virtual number. */ | 156 | * page, given the virtual number. */ |
158 | pfn = get_pfn(base + pte_pfn(gpte), write); | 157 | pfn = get_pfn(base + pte_pfn(gpte), write); |
159 | if (pfn == -1UL) { | 158 | if (pfn == -1UL) { |
160 | kill_guest(lg, "failed to get page %lu", pte_pfn(gpte)); | 159 | kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); |
161 | /* When we destroy the Guest, we'll go through the shadow page | 160 | /* When we destroy the Guest, we'll go through the shadow page |
162 | * tables and release_pte() them. Make sure we don't think | 161 | * tables and release_pte() them. Make sure we don't think |
163 | * this one is valid! */ | 162 | * this one is valid! */ |
@@ -177,17 +176,18 @@ static void release_pte(pte_t pte) | |||
177 | } | 176 | } |
178 | /*:*/ | 177 | /*:*/ |
179 | 178 | ||
180 | static void check_gpte(struct lguest *lg, pte_t gpte) | 179 | static void check_gpte(struct lg_cpu *cpu, pte_t gpte) |
181 | { | 180 | { |
182 | if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) | 181 | if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) |
183 | || pte_pfn(gpte) >= lg->pfn_limit) | 182 | || pte_pfn(gpte) >= cpu->lg->pfn_limit) |
184 | kill_guest(lg, "bad page table entry"); | 183 | kill_guest(cpu, "bad page table entry"); |
185 | } | 184 | } |
186 | 185 | ||
187 | static void check_gpgd(struct lguest *lg, pgd_t gpgd) | 186 | static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) |
188 | { | 187 | { |
189 | if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit) | 188 | if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || |
190 | kill_guest(lg, "bad page directory entry"); | 189 | (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) |
190 | kill_guest(cpu, "bad page directory entry"); | ||
191 | } | 191 | } |
192 | 192 | ||
193 | /*H:330 | 193 | /*H:330 |
@@ -200,7 +200,7 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd) | |||
200 | * | 200 | * |
201 | * If we fixed up the fault (ie. we mapped the address), this routine returns | 201 | * If we fixed up the fault (ie. we mapped the address), this routine returns |
202 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ | 202 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ |
203 | int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | 203 | int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) |
204 | { | 204 | { |
205 | pgd_t gpgd; | 205 | pgd_t gpgd; |
206 | pgd_t *spgd; | 206 | pgd_t *spgd; |
@@ -209,24 +209,24 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
209 | pte_t *spte; | 209 | pte_t *spte; |
210 | 210 | ||
211 | /* First step: get the top-level Guest page table entry. */ | 211 | /* First step: get the top-level Guest page table entry. */ |
212 | gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); | 212 | gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); |
213 | /* Toplevel not present? We can't map it in. */ | 213 | /* Toplevel not present? We can't map it in. */ |
214 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | 214 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) |
215 | return 0; | 215 | return 0; |
216 | 216 | ||
217 | /* Now look at the matching shadow entry. */ | 217 | /* Now look at the matching shadow entry. */ |
218 | spgd = spgd_addr(lg, lg->pgdidx, vaddr); | 218 | spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); |
219 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { | 219 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { |
220 | /* No shadow entry: allocate a new shadow PTE page. */ | 220 | /* No shadow entry: allocate a new shadow PTE page. */ |
221 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 221 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
222 | /* This is not really the Guest's fault, but killing it is | 222 | /* This is not really the Guest's fault, but killing it is |
223 | * simple for this corner case. */ | 223 | * simple for this corner case. */ |
224 | if (!ptepage) { | 224 | if (!ptepage) { |
225 | kill_guest(lg, "out of memory allocating pte page"); | 225 | kill_guest(cpu, "out of memory allocating pte page"); |
226 | return 0; | 226 | return 0; |
227 | } | 227 | } |
228 | /* We check that the Guest pgd is OK. */ | 228 | /* We check that the Guest pgd is OK. */ |
229 | check_gpgd(lg, gpgd); | 229 | check_gpgd(cpu, gpgd); |
230 | /* And we copy the flags to the shadow PGD entry. The page | 230 | /* And we copy the flags to the shadow PGD entry. The page |
231 | * number in the shadow PGD is the page we just allocated. */ | 231 | * number in the shadow PGD is the page we just allocated. */ |
232 | *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); | 232 | *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); |
@@ -234,8 +234,8 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
234 | 234 | ||
235 | /* OK, now we look at the lower level in the Guest page table: keep its | 235 | /* OK, now we look at the lower level in the Guest page table: keep its |
236 | * address, because we might update it later. */ | 236 | * address, because we might update it later. */ |
237 | gpte_ptr = gpte_addr(lg, gpgd, vaddr); | 237 | gpte_ptr = gpte_addr(gpgd, vaddr); |
238 | gpte = lgread(lg, gpte_ptr, pte_t); | 238 | gpte = lgread(cpu, gpte_ptr, pte_t); |
239 | 239 | ||
240 | /* If this page isn't in the Guest page tables, we can't page it in. */ | 240 | /* If this page isn't in the Guest page tables, we can't page it in. */ |
241 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 241 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
@@ -252,7 +252,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
252 | 252 | ||
253 | /* Check that the Guest PTE flags are OK, and the page number is below | 253 | /* Check that the Guest PTE flags are OK, and the page number is below |
254 | * the pfn_limit (ie. not mapping the Launcher binary). */ | 254 | * the pfn_limit (ie. not mapping the Launcher binary). */ |
255 | check_gpte(lg, gpte); | 255 | check_gpte(cpu, gpte); |
256 | 256 | ||
257 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ | 257 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ |
258 | gpte = pte_mkyoung(gpte); | 258 | gpte = pte_mkyoung(gpte); |
@@ -260,7 +260,7 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
260 | gpte = pte_mkdirty(gpte); | 260 | gpte = pte_mkdirty(gpte); |
261 | 261 | ||
262 | /* Get the pointer to the shadow PTE entry we're going to set. */ | 262 | /* Get the pointer to the shadow PTE entry we're going to set. */ |
263 | spte = spte_addr(lg, *spgd, vaddr); | 263 | spte = spte_addr(*spgd, vaddr); |
264 | /* If there was a valid shadow PTE entry here before, we release it. | 264 | /* If there was a valid shadow PTE entry here before, we release it. |
265 | * This can happen with a write to a previously read-only entry. */ | 265 | * This can happen with a write to a previously read-only entry. */ |
266 | release_pte(*spte); | 266 | release_pte(*spte); |
@@ -268,17 +268,17 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
268 | /* If this is a write, we insist that the Guest page is writable (the | 268 | /* If this is a write, we insist that the Guest page is writable (the |
269 | * final arg to gpte_to_spte()). */ | 269 | * final arg to gpte_to_spte()). */ |
270 | if (pte_dirty(gpte)) | 270 | if (pte_dirty(gpte)) |
271 | *spte = gpte_to_spte(lg, gpte, 1); | 271 | *spte = gpte_to_spte(cpu, gpte, 1); |
272 | else | 272 | else |
273 | /* If this is a read, don't set the "writable" bit in the page | 273 | /* If this is a read, don't set the "writable" bit in the page |
274 | * table entry, even if the Guest says it's writable. That way | 274 | * table entry, even if the Guest says it's writable. That way |
275 | * we will come back here when a write does actually occur, so | 275 | * we will come back here when a write does actually occur, so |
276 | * we can update the Guest's _PAGE_DIRTY flag. */ | 276 | * we can update the Guest's _PAGE_DIRTY flag. */ |
277 | *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0); | 277 | *spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0); |
278 | 278 | ||
279 | /* Finally, we write the Guest PTE entry back: we've set the | 279 | /* Finally, we write the Guest PTE entry back: we've set the |
280 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ | 280 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ |
281 | lgwrite(lg, gpte_ptr, pte_t, gpte); | 281 | lgwrite(cpu, gpte_ptr, pte_t, gpte); |
282 | 282 | ||
283 | /* The fault is fixed, the page table is populated, the mapping | 283 | /* The fault is fixed, the page table is populated, the mapping |
284 | * manipulated, the result returned and the code complete. A small | 284 | * manipulated, the result returned and the code complete. A small |
@@ -297,19 +297,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) | |||
297 | * | 297 | * |
298 | * This is a quick version which answers the question: is this virtual address | 298 | * This is a quick version which answers the question: is this virtual address |
299 | * mapped by the shadow page tables, and is it writable? */ | 299 | * mapped by the shadow page tables, and is it writable? */ |
300 | static int page_writable(struct lguest *lg, unsigned long vaddr) | 300 | static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) |
301 | { | 301 | { |
302 | pgd_t *spgd; | 302 | pgd_t *spgd; |
303 | unsigned long flags; | 303 | unsigned long flags; |
304 | 304 | ||
305 | /* Look at the current top level entry: is it present? */ | 305 | /* Look at the current top level entry: is it present? */ |
306 | spgd = spgd_addr(lg, lg->pgdidx, vaddr); | 306 | spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); |
307 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) | 307 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) |
308 | return 0; | 308 | return 0; |
309 | 309 | ||
310 | /* Check the flags on the pte entry itself: it must be present and | 310 | /* Check the flags on the pte entry itself: it must be present and |
311 | * writable. */ | 311 | * writable. */ |
312 | flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); | 312 | flags = pte_flags(*(spte_addr(*spgd, vaddr))); |
313 | 313 | ||
314 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); | 314 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); |
315 | } | 315 | } |
@@ -317,10 +317,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr) | |||
317 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already | 317 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already |
318 | * in the page tables, and if not, we call demand_page() with error code 2 | 318 | * in the page tables, and if not, we call demand_page() with error code 2 |
319 | * (meaning "write"). */ | 319 | * (meaning "write"). */ |
320 | void pin_page(struct lguest *lg, unsigned long vaddr) | 320 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) |
321 | { | 321 | { |
322 | if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) | 322 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) |
323 | kill_guest(lg, "bad stack page %#lx", vaddr); | 323 | kill_guest(cpu, "bad stack page %#lx", vaddr); |
324 | } | 324 | } |
325 | 325 | ||
326 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ | 326 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ |
@@ -358,28 +358,28 @@ static void flush_user_mappings(struct lguest *lg, int idx) | |||
358 | * | 358 | * |
359 | * The Guest has a hypercall to throw away the page tables: it's used when a | 359 | * The Guest has a hypercall to throw away the page tables: it's used when a |
360 | * large number of mappings have been changed. */ | 360 | * large number of mappings have been changed. */ |
361 | void guest_pagetable_flush_user(struct lguest *lg) | 361 | void guest_pagetable_flush_user(struct lg_cpu *cpu) |
362 | { | 362 | { |
363 | /* Drop the userspace part of the current page table. */ | 363 | /* Drop the userspace part of the current page table. */ |
364 | flush_user_mappings(lg, lg->pgdidx); | 364 | flush_user_mappings(cpu->lg, cpu->cpu_pgd); |
365 | } | 365 | } |
366 | /*:*/ | 366 | /*:*/ |
367 | 367 | ||
368 | /* We walk down the guest page tables to get a guest-physical address */ | 368 | /* We walk down the guest page tables to get a guest-physical address */ |
369 | unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | 369 | unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) |
370 | { | 370 | { |
371 | pgd_t gpgd; | 371 | pgd_t gpgd; |
372 | pte_t gpte; | 372 | pte_t gpte; |
373 | 373 | ||
374 | /* First step: get the top-level Guest page table entry. */ | 374 | /* First step: get the top-level Guest page table entry. */ |
375 | gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); | 375 | gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); |
376 | /* Toplevel not present? We can't map it in. */ | 376 | /* Toplevel not present? We can't map it in. */ |
377 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | 377 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) |
378 | kill_guest(lg, "Bad address %#lx", vaddr); | 378 | kill_guest(cpu, "Bad address %#lx", vaddr); |
379 | 379 | ||
380 | gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); | 380 | gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t); |
381 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 381 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
382 | kill_guest(lg, "Bad address %#lx", vaddr); | 382 | kill_guest(cpu, "Bad address %#lx", vaddr); |
383 | 383 | ||
384 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); | 384 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); |
385 | } | 385 | } |
@@ -399,7 +399,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | |||
399 | /*H:435 And this is us, creating the new page directory. If we really do | 399 | /*H:435 And this is us, creating the new page directory. If we really do |
400 | * allocate a new one (and so the kernel parts are not there), we set | 400 | * allocate a new one (and so the kernel parts are not there), we set |
401 | * blank_pgdir. */ | 401 | * blank_pgdir. */ |
402 | static unsigned int new_pgdir(struct lguest *lg, | 402 | static unsigned int new_pgdir(struct lg_cpu *cpu, |
403 | unsigned long gpgdir, | 403 | unsigned long gpgdir, |
404 | int *blank_pgdir) | 404 | int *blank_pgdir) |
405 | { | 405 | { |
@@ -407,22 +407,23 @@ static unsigned int new_pgdir(struct lguest *lg, | |||
407 | 407 | ||
408 | /* We pick one entry at random to throw out. Choosing the Least | 408 | /* We pick one entry at random to throw out. Choosing the Least |
409 | * Recently Used might be better, but this is easy. */ | 409 | * Recently Used might be better, but this is easy. */ |
410 | next = random32() % ARRAY_SIZE(lg->pgdirs); | 410 | next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); |
411 | /* If it's never been allocated at all before, try now. */ | 411 | /* If it's never been allocated at all before, try now. */ |
412 | if (!lg->pgdirs[next].pgdir) { | 412 | if (!cpu->lg->pgdirs[next].pgdir) { |
413 | lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); | 413 | cpu->lg->pgdirs[next].pgdir = |
414 | (pgd_t *)get_zeroed_page(GFP_KERNEL); | ||
414 | /* If the allocation fails, just keep using the one we have */ | 415 | /* If the allocation fails, just keep using the one we have */ |
415 | if (!lg->pgdirs[next].pgdir) | 416 | if (!cpu->lg->pgdirs[next].pgdir) |
416 | next = lg->pgdidx; | 417 | next = cpu->cpu_pgd; |
417 | else | 418 | else |
418 | /* This is a blank page, so there are no kernel | 419 | /* This is a blank page, so there are no kernel |
419 | * mappings: caller must map the stack! */ | 420 | * mappings: caller must map the stack! */ |
420 | *blank_pgdir = 1; | 421 | *blank_pgdir = 1; |
421 | } | 422 | } |
422 | /* Record which Guest toplevel this shadows. */ | 423 | /* Record which Guest toplevel this shadows. */ |
423 | lg->pgdirs[next].gpgdir = gpgdir; | 424 | cpu->lg->pgdirs[next].gpgdir = gpgdir; |
424 | /* Release all the non-kernel mappings. */ | 425 | /* Release all the non-kernel mappings. */ |
425 | flush_user_mappings(lg, next); | 426 | flush_user_mappings(cpu->lg, next); |
426 | 427 | ||
427 | return next; | 428 | return next; |
428 | } | 429 | } |
@@ -432,21 +433,21 @@ static unsigned int new_pgdir(struct lguest *lg, | |||
432 | * Now we've seen all the page table setting and manipulation, let's see what | 433 | * Now we've seen all the page table setting and manipulation, let's see what |
433 | * what happens when the Guest changes page tables (ie. changes the top-level | 434 | * what happens when the Guest changes page tables (ie. changes the top-level |
434 | * pgdir). This occurs on almost every context switch. */ | 435 | * pgdir). This occurs on almost every context switch. */ |
435 | void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) | 436 | void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) |
436 | { | 437 | { |
437 | int newpgdir, repin = 0; | 438 | int newpgdir, repin = 0; |
438 | 439 | ||
439 | /* Look to see if we have this one already. */ | 440 | /* Look to see if we have this one already. */ |
440 | newpgdir = find_pgdir(lg, pgtable); | 441 | newpgdir = find_pgdir(cpu->lg, pgtable); |
441 | /* If not, we allocate or mug an existing one: if it's a fresh one, | 442 | /* If not, we allocate or mug an existing one: if it's a fresh one, |
442 | * repin gets set to 1. */ | 443 | * repin gets set to 1. */ |
443 | if (newpgdir == ARRAY_SIZE(lg->pgdirs)) | 444 | if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) |
444 | newpgdir = new_pgdir(lg, pgtable, &repin); | 445 | newpgdir = new_pgdir(cpu, pgtable, &repin); |
445 | /* Change the current pgd index to the new one. */ | 446 | /* Change the current pgd index to the new one. */ |
446 | lg->pgdidx = newpgdir; | 447 | cpu->cpu_pgd = newpgdir; |
447 | /* If it was completely blank, we map in the Guest kernel stack */ | 448 | /* If it was completely blank, we map in the Guest kernel stack */ |
448 | if (repin) | 449 | if (repin) |
449 | pin_stack_pages(lg); | 450 | pin_stack_pages(cpu); |
450 | } | 451 | } |
451 | 452 | ||
452 | /*H:470 Finally, a routine which throws away everything: all PGD entries in all | 453 | /*H:470 Finally, a routine which throws away everything: all PGD entries in all |
@@ -468,11 +469,11 @@ static void release_all_pagetables(struct lguest *lg) | |||
468 | * mapping. Since kernel mappings are in every page table, it's easiest to | 469 | * mapping. Since kernel mappings are in every page table, it's easiest to |
469 | * throw them all away. This traps the Guest in amber for a while as | 470 | * throw them all away. This traps the Guest in amber for a while as |
470 | * everything faults back in, but it's rare. */ | 471 | * everything faults back in, but it's rare. */ |
471 | void guest_pagetable_clear_all(struct lguest *lg) | 472 | void guest_pagetable_clear_all(struct lg_cpu *cpu) |
472 | { | 473 | { |
473 | release_all_pagetables(lg); | 474 | release_all_pagetables(cpu->lg); |
474 | /* We need the Guest kernel stack mapped again. */ | 475 | /* We need the Guest kernel stack mapped again. */ |
475 | pin_stack_pages(lg); | 476 | pin_stack_pages(cpu); |
476 | } | 477 | } |
477 | /*:*/ | 478 | /*:*/ |
478 | /*M:009 Since we throw away all mappings when a kernel mapping changes, our | 479 | /*M:009 Since we throw away all mappings when a kernel mapping changes, our |
@@ -497,24 +498,24 @@ void guest_pagetable_clear_all(struct lguest *lg) | |||
497 | * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if | 498 | * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if |
498 | * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. | 499 | * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. |
499 | */ | 500 | */ |
500 | static void do_set_pte(struct lguest *lg, int idx, | 501 | static void do_set_pte(struct lg_cpu *cpu, int idx, |
501 | unsigned long vaddr, pte_t gpte) | 502 | unsigned long vaddr, pte_t gpte) |
502 | { | 503 | { |
503 | /* Look up the matching shadow page directory entry. */ | 504 | /* Look up the matching shadow page directory entry. */ |
504 | pgd_t *spgd = spgd_addr(lg, idx, vaddr); | 505 | pgd_t *spgd = spgd_addr(cpu, idx, vaddr); |
505 | 506 | ||
506 | /* If the top level isn't present, there's no entry to update. */ | 507 | /* If the top level isn't present, there's no entry to update. */ |
507 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { | 508 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { |
508 | /* Otherwise, we start by releasing the existing entry. */ | 509 | /* Otherwise, we start by releasing the existing entry. */ |
509 | pte_t *spte = spte_addr(lg, *spgd, vaddr); | 510 | pte_t *spte = spte_addr(*spgd, vaddr); |
510 | release_pte(*spte); | 511 | release_pte(*spte); |
511 | 512 | ||
512 | /* If they're setting this entry as dirty or accessed, we might | 513 | /* If they're setting this entry as dirty or accessed, we might |
513 | * as well put that entry they've given us in now. This shaves | 514 | * as well put that entry they've given us in now. This shaves |
514 | * 10% off a copy-on-write micro-benchmark. */ | 515 | * 10% off a copy-on-write micro-benchmark. */ |
515 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { | 516 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { |
516 | check_gpte(lg, gpte); | 517 | check_gpte(cpu, gpte); |
517 | *spte = gpte_to_spte(lg, gpte, | 518 | *spte = gpte_to_spte(cpu, gpte, |
518 | pte_flags(gpte) & _PAGE_DIRTY); | 519 | pte_flags(gpte) & _PAGE_DIRTY); |
519 | } else | 520 | } else |
520 | /* Otherwise kill it and we can demand_page() it in | 521 | /* Otherwise kill it and we can demand_page() it in |
@@ -533,22 +534,22 @@ static void do_set_pte(struct lguest *lg, int idx, | |||
533 | * | 534 | * |
534 | * The benefit is that when we have to track a new page table, we can copy keep | 535 | * The benefit is that when we have to track a new page table, we can copy keep |
535 | * all the kernel mappings. This speeds up context switch immensely. */ | 536 | * all the kernel mappings. This speeds up context switch immensely. */ |
536 | void guest_set_pte(struct lguest *lg, | 537 | void guest_set_pte(struct lg_cpu *cpu, |
537 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) | 538 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) |
538 | { | 539 | { |
539 | /* Kernel mappings must be changed on all top levels. Slow, but | 540 | /* Kernel mappings must be changed on all top levels. Slow, but |
540 | * doesn't happen often. */ | 541 | * doesn't happen often. */ |
541 | if (vaddr >= lg->kernel_address) { | 542 | if (vaddr >= cpu->lg->kernel_address) { |
542 | unsigned int i; | 543 | unsigned int i; |
543 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) | 544 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) |
544 | if (lg->pgdirs[i].pgdir) | 545 | if (cpu->lg->pgdirs[i].pgdir) |
545 | do_set_pte(lg, i, vaddr, gpte); | 546 | do_set_pte(cpu, i, vaddr, gpte); |
546 | } else { | 547 | } else { |
547 | /* Is this page table one we have a shadow for? */ | 548 | /* Is this page table one we have a shadow for? */ |
548 | int pgdir = find_pgdir(lg, gpgdir); | 549 | int pgdir = find_pgdir(cpu->lg, gpgdir); |
549 | if (pgdir != ARRAY_SIZE(lg->pgdirs)) | 550 | if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs)) |
550 | /* If so, do the update. */ | 551 | /* If so, do the update. */ |
551 | do_set_pte(lg, pgdir, vaddr, gpte); | 552 | do_set_pte(cpu, pgdir, vaddr, gpte); |
552 | } | 553 | } |
553 | } | 554 | } |
554 | 555 | ||
@@ -590,30 +591,32 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) | |||
590 | { | 591 | { |
591 | /* We start on the first shadow page table, and give it a blank PGD | 592 | /* We start on the first shadow page table, and give it a blank PGD |
592 | * page. */ | 593 | * page. */ |
593 | lg->pgdidx = 0; | 594 | lg->pgdirs[0].gpgdir = pgtable; |
594 | lg->pgdirs[lg->pgdidx].gpgdir = pgtable; | 595 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); |
595 | lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); | 596 | if (!lg->pgdirs[0].pgdir) |
596 | if (!lg->pgdirs[lg->pgdidx].pgdir) | ||
597 | return -ENOMEM; | 597 | return -ENOMEM; |
598 | lg->cpus[0].cpu_pgd = 0; | ||
598 | return 0; | 599 | return 0; |
599 | } | 600 | } |
600 | 601 | ||
601 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ | 602 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ |
602 | void page_table_guest_data_init(struct lguest *lg) | 603 | void page_table_guest_data_init(struct lg_cpu *cpu) |
603 | { | 604 | { |
604 | /* We get the kernel address: above this is all kernel memory. */ | 605 | /* We get the kernel address: above this is all kernel memory. */ |
605 | if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address) | 606 | if (get_user(cpu->lg->kernel_address, |
607 | &cpu->lg->lguest_data->kernel_address) | ||
606 | /* We tell the Guest that it can't use the top 4MB of virtual | 608 | /* We tell the Guest that it can't use the top 4MB of virtual |
607 | * addresses used by the Switcher. */ | 609 | * addresses used by the Switcher. */ |
608 | || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) | 610 | || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem) |
609 | || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) | 611 | || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir)) |
610 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | 612 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
611 | 613 | ||
612 | /* In flush_user_mappings() we loop from 0 to | 614 | /* In flush_user_mappings() we loop from 0 to |
613 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the | 615 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the |
614 | * Switcher mappings, so check that now. */ | 616 | * Switcher mappings, so check that now. */ |
615 | if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX) | 617 | if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX) |
616 | kill_guest(lg, "bad kernel address %#lx", lg->kernel_address); | 618 | kill_guest(cpu, "bad kernel address %#lx", |
619 | cpu->lg->kernel_address); | ||
617 | } | 620 | } |
618 | 621 | ||
619 | /* When a Guest dies, our cleanup is fairly simple. */ | 622 | /* When a Guest dies, our cleanup is fairly simple. */ |
@@ -634,17 +637,18 @@ void free_guest_pagetable(struct lguest *lg) | |||
634 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages | 637 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages |
635 | * for each CPU already set up, we just need to hook them in now we know which | 638 | * for each CPU already set up, we just need to hook them in now we know which |
636 | * Guest is about to run on this CPU. */ | 639 | * Guest is about to run on this CPU. */ |
637 | void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) | 640 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) |
638 | { | 641 | { |
639 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); | 642 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); |
640 | pgd_t switcher_pgd; | 643 | pgd_t switcher_pgd; |
641 | pte_t regs_pte; | 644 | pte_t regs_pte; |
645 | unsigned long pfn; | ||
642 | 646 | ||
643 | /* Make the last PGD entry for this Guest point to the Switcher's PTE | 647 | /* Make the last PGD entry for this Guest point to the Switcher's PTE |
644 | * page for this CPU (with appropriate flags). */ | 648 | * page for this CPU (with appropriate flags). */ |
645 | switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); | 649 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL); |
646 | 650 | ||
647 | lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; | 651 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; |
648 | 652 | ||
649 | /* We also change the Switcher PTE page. When we're running the Guest, | 653 | /* We also change the Switcher PTE page. When we're running the Guest, |
650 | * we want the Guest's "regs" page to appear where the first Switcher | 654 | * we want the Guest's "regs" page to appear where the first Switcher |
@@ -653,7 +657,8 @@ void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) | |||
653 | * CPU's "struct lguest_pages": if we make sure the Guest's register | 657 | * CPU's "struct lguest_pages": if we make sure the Guest's register |
654 | * page is already mapped there, we don't have to copy them out | 658 | * page is already mapped there, we don't have to copy them out |
655 | * again. */ | 659 | * again. */ |
656 | regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL)); | 660 | pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; |
661 | regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL)); | ||
657 | switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; | 662 | switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; |
658 | } | 663 | } |
659 | /*:*/ | 664 | /*:*/ |