aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-02-13 07:26:21 -0500
committerAndi Kleen <andi@basil.nowhere.org>2007-02-13 07:26:21 -0500
commitc119ecce894120790903ef535dac3e105f3d6cde (patch)
treeb9a60fe46b03d396ba396912c237e6ee2e9ef873 /arch/i386
parent90611fe923aa3ac7ffb9e5df45c83860b0f00227 (diff)
[PATCH] MM: page allocation hooks for VMI backend
The VMI backend uses explicit page type notification to track shadow page tables. The allocation of page table roots is especially tricky. We need to clone the root for non-PAE mode while it is protected under the pgd lock to correctly copy the shadow. We don't need to allocate pgds in PAE mode, (PDPs in Intel terminology) as they only have 4 entries, and are cached entirely by the processor, which makes shadowing them rather simple. For base page table level allocation, pmd_populate provides the exact hook point we need. Also, we need to allocate pages when splitting a large page, and we must release pages before returning the page to any free pool. Despite being required with these slightly odd semantics for VMI, Xen also uses these hooks to determine the exact moment when page tables are created or released. AK: All nops for other architectures Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Andi Kleen <ak@suse.de> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/kernel/paravirt.c6
-rw-r--r--arch/i386/mm/init.c4
-rw-r--r--arch/i386/mm/pageattr.c2
-rw-r--r--arch/i386/mm/pgtable.c24
4 files changed, 32 insertions, 4 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index e55fd05da0f5..7329ec9fcc99 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -550,6 +550,12 @@ struct paravirt_ops paravirt_ops = {
550 .flush_tlb_kernel = native_flush_tlb_global, 550 .flush_tlb_kernel = native_flush_tlb_global,
551 .flush_tlb_single = native_flush_tlb_single, 551 .flush_tlb_single = native_flush_tlb_single,
552 552
553 .alloc_pt = (void *)native_nop,
554 .alloc_pd = (void *)native_nop,
555 .alloc_pd_clone = (void *)native_nop,
556 .release_pt = (void *)native_nop,
557 .release_pd = (void *)native_nop,
558
553 .set_pte = native_set_pte, 559 .set_pte = native_set_pte,
554 .set_pte_at = native_set_pte_at, 560 .set_pte_at = native_set_pte_at,
555 .set_pmd = native_set_pmd, 561 .set_pmd = native_set_pmd,
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index c5c5ea700cc7..ae436882af7a 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -62,6 +62,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
62 62
63#ifdef CONFIG_X86_PAE 63#ifdef CONFIG_X86_PAE
64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
65 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
65 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 66 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
66 pud = pud_offset(pgd, 0); 67 pud = pud_offset(pgd, 0);
67 if (pmd_table != pmd_offset(pud, 0)) 68 if (pmd_table != pmd_offset(pud, 0))
@@ -82,6 +83,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
82{ 83{
83 if (pmd_none(*pmd)) { 84 if (pmd_none(*pmd)) {
84 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 85 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
86 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
85 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 87 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
86 if (page_table != pte_offset_kernel(pmd, 0)) 88 if (page_table != pte_offset_kernel(pmd, 0))
87 BUG(); 89 BUG();
@@ -345,6 +347,8 @@ static void __init pagetable_init (void)
345 /* Init entries of the first-level page table to the zero page */ 347 /* Init entries of the first-level page table to the zero page */
346 for (i = 0; i < PTRS_PER_PGD; i++) 348 for (i = 0; i < PTRS_PER_PGD; i++)
347 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
350#else
351 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
348#endif 352#endif
349 353
350 /* Enable PSE if available */ 354 /* Enable PSE if available */
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index e223b1d4981c..412ebbd8adb0 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -60,6 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
60 address = __pa(address); 60 address = __pa(address);
61 addr = address & LARGE_PAGE_MASK; 61 addr = address & LARGE_PAGE_MASK;
62 pbase = (pte_t *)page_address(base); 62 pbase = (pte_t *)page_address(base);
63 paravirt_alloc_pt(page_to_pfn(base));
63 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 64 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
64 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, 65 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
65 addr == address ? prot : ref_prot)); 66 addr == address ? prot : ref_prot));
@@ -172,6 +173,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
172 if (!PageReserved(kpte_page)) { 173 if (!PageReserved(kpte_page)) {
173 if (cpu_has_pse && (page_private(kpte_page) == 0)) { 174 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
174 ClearPagePrivate(kpte_page); 175 ClearPagePrivate(kpte_page);
176 paravirt_release_pt(page_to_pfn(kpte_page));
175 list_add(&kpte_page->lru, &df_list); 177 list_add(&kpte_page->lru, &df_list);
176 revert_page(kpte_page, address); 178 revert_page(kpte_page, address);
177 } 179 }
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index f349eaf450b0..b5f538f52272 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -248,9 +248,15 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
248 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 248 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
249 swapper_pg_dir + USER_PTRS_PER_PGD, 249 swapper_pg_dir + USER_PTRS_PER_PGD,
250 KERNEL_PGD_PTRS); 250 KERNEL_PGD_PTRS);
251
251 if (PTRS_PER_PMD > 1) 252 if (PTRS_PER_PMD > 1)
252 return; 253 return;
253 254
255 /* must happen under lock */
256 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
257 __pa(swapper_pg_dir) >> PAGE_SHIFT,
258 USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
259
254 pgd_list_add(pgd); 260 pgd_list_add(pgd);
255 spin_unlock_irqrestore(&pgd_lock, flags); 261 spin_unlock_irqrestore(&pgd_lock, flags);
256} 262}
@@ -260,6 +266,7 @@ void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
260{ 266{
261 unsigned long flags; /* can be called from interrupt context */ 267 unsigned long flags; /* can be called from interrupt context */
262 268
269 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
263 spin_lock_irqsave(&pgd_lock, flags); 270 spin_lock_irqsave(&pgd_lock, flags);
264 pgd_list_del(pgd); 271 pgd_list_del(pgd);
265 spin_unlock_irqrestore(&pgd_lock, flags); 272 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -277,13 +284,18 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
277 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 284 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
278 if (!pmd) 285 if (!pmd)
279 goto out_oom; 286 goto out_oom;
287 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
280 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 288 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
281 } 289 }
282 return pgd; 290 return pgd;
283 291
284out_oom: 292out_oom:
285 for (i--; i >= 0; i--) 293 for (i--; i >= 0; i--) {
286 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 294 pgd_t pgdent = pgd[i];
295 void* pmd = (void *)__va(pgd_val(pgdent)-1);
296 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
297 kmem_cache_free(pmd_cache, pmd);
298 }
287 kmem_cache_free(pgd_cache, pgd); 299 kmem_cache_free(pgd_cache, pgd);
288 return NULL; 300 return NULL;
289} 301}
@@ -294,8 +306,12 @@ void pgd_free(pgd_t *pgd)
294 306
295 /* in the PAE case user pgd entries are overwritten before usage */ 307 /* in the PAE case user pgd entries are overwritten before usage */
296 if (PTRS_PER_PMD > 1) 308 if (PTRS_PER_PMD > 1)
297 for (i = 0; i < USER_PTRS_PER_PGD; ++i) 309 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
298 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 310 pgd_t pgdent = pgd[i];
311 void* pmd = (void *)__va(pgd_val(pgdent)-1);
312 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
313 kmem_cache_free(pmd_cache, pmd);
314 }
299 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 315 /* in the non-PAE case, free_pgtables() clears user pgd entries */
300 kmem_cache_free(pgd_cache, pgd); 316 kmem_cache_free(pgd_cache, pgd);
301} 317}