diff options
author | Zachary Amsden <zach@vmware.com> | 2007-02-13 07:26:21 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-02-13 07:26:21 -0500 |
commit | c119ecce894120790903ef535dac3e105f3d6cde (patch) | |
tree | b9a60fe46b03d396ba396912c237e6ee2e9ef873 /arch/i386/mm/pgtable.c | |
parent | 90611fe923aa3ac7ffb9e5df45c83860b0f00227 (diff) |
[PATCH] MM: page allocation hooks for VMI backend
The VMI backend uses explicit page type notification to track shadow page
tables. The allocation of page table roots is especially tricky. We need to
clone the root for non-PAE mode while it is protected under the pgd lock to
correctly copy the shadow.
We don't need to allocate pgds in PAE mode, (PDPs in Intel terminology) as
they only have 4 entries, and are cached entirely by the processor, which
makes shadowing them rather simple.
For base page table level allocation, pmd_populate provides the exact hook
point we need. Also, we need to allocate pages when splitting a large page,
and we must release pages before returning the page to any free pool.
Despite being required with these slightly odd semantics for VMI, Xen also
uses these hooks to determine the exact moment when page tables are created or
released.
AK: All nops for other architectures
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386/mm/pgtable.c')
-rw-r--r-- | arch/i386/mm/pgtable.c | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index f349eaf450b0..b5f538f52272 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -248,9 +248,15 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |||
248 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | 248 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
249 | swapper_pg_dir + USER_PTRS_PER_PGD, | 249 | swapper_pg_dir + USER_PTRS_PER_PGD, |
250 | KERNEL_PGD_PTRS); | 250 | KERNEL_PGD_PTRS); |
251 | |||
251 | if (PTRS_PER_PMD > 1) | 252 | if (PTRS_PER_PMD > 1) |
252 | return; | 253 | return; |
253 | 254 | ||
255 | /* must happen under lock */ | ||
256 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
257 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
258 | USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); | ||
259 | |||
254 | pgd_list_add(pgd); | 260 | pgd_list_add(pgd); |
255 | spin_unlock_irqrestore(&pgd_lock, flags); | 261 | spin_unlock_irqrestore(&pgd_lock, flags); |
256 | } | 262 | } |
@@ -260,6 +266,7 @@ void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |||
260 | { | 266 | { |
261 | unsigned long flags; /* can be called from interrupt context */ | 267 | unsigned long flags; /* can be called from interrupt context */ |
262 | 268 | ||
269 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | ||
263 | spin_lock_irqsave(&pgd_lock, flags); | 270 | spin_lock_irqsave(&pgd_lock, flags); |
264 | pgd_list_del(pgd); | 271 | pgd_list_del(pgd); |
265 | spin_unlock_irqrestore(&pgd_lock, flags); | 272 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -277,13 +284,18 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
277 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | 284 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
278 | if (!pmd) | 285 | if (!pmd) |
279 | goto out_oom; | 286 | goto out_oom; |
287 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | ||
280 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | 288 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
281 | } | 289 | } |
282 | return pgd; | 290 | return pgd; |
283 | 291 | ||
284 | out_oom: | 292 | out_oom: |
285 | for (i--; i >= 0; i--) | 293 | for (i--; i >= 0; i--) { |
286 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 294 | pgd_t pgdent = pgd[i]; |
295 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
296 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
297 | kmem_cache_free(pmd_cache, pmd); | ||
298 | } | ||
287 | kmem_cache_free(pgd_cache, pgd); | 299 | kmem_cache_free(pgd_cache, pgd); |
288 | return NULL; | 300 | return NULL; |
289 | } | 301 | } |
@@ -294,8 +306,12 @@ void pgd_free(pgd_t *pgd) | |||
294 | 306 | ||
295 | /* in the PAE case user pgd entries are overwritten before usage */ | 307 | /* in the PAE case user pgd entries are overwritten before usage */ |
296 | if (PTRS_PER_PMD > 1) | 308 | if (PTRS_PER_PMD > 1) |
297 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) | 309 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { |
298 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 310 | pgd_t pgdent = pgd[i]; |
311 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
312 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
313 | kmem_cache_free(pmd_cache, pmd); | ||
314 | } | ||
299 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | 315 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
300 | kmem_cache_free(pgd_cache, pgd); | 316 | kmem_cache_free(pgd_cache, pgd); |
301 | } | 317 | } |