From d01ad8dd56527be72947b4b9997bb2c05783c3ed Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: Improve handling of kernel mappings in change_page_attr Fix various broken corner cases in i386 and x86-64 change_page_attr. AK: split off from tighten kernel image access rights Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-i386/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index c3b58d473a55..143ddc42b86f 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -159,6 +159,7 @@ void paging_init(void); extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -166,6 +167,7 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) -- cgit v1.2.2 From 3dc494e86d1c93afd4c66385f270899dbfae483d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Add pagetable accessors to pack and unpack pagetable entries Add a set of accessors to pack, unpack and modify page table entries (at all levels). This allows a paravirt implementation to control the contents of pgd/pmd/pte entries. For example, Xen uses this to convert the (pseudo-)physical address into a machine address when populating a pagetable entry, and converting back to pphys address when an entry is read. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 143ddc42b86f..147f2553784d 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -266,6 +266,8 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) #define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) + +#define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif /* -- cgit v1.2.2 From b239fb2501117bf3aeb4dd6926edd855be92333d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Hooks to set up initial pagetable This patch introduces paravirt_ops hooks to control how the kernel's initial pagetable is set up. In the case of a native boot, the very early bootstrap code creates a simple non-PAE pagetable to map the kernel and physical memory. When the VM subsystem is initialized, it creates a proper pagetable which respects the PAE mode, large pages, etc. When booting under a hypervisor, there are many possibilities for what paging environment the hypervisor establishes for the guest kernel, so the constructon of the kernel's pagetable depends on the hypervisor. In the case of Xen, the hypervisor boots the kernel with a fully constructed pagetable, which is already using PAE if necessary. Also, Xen requires particular care when constructing pagetables to make sure all pagetables are always mapped read-only. In order to make this easier, kernel's initial pagetable construction has been changed to only allocate and initialize a pagetable page if there's no page already present in the pagetable. This allows the Xen paravirt backend to make a copy of the hypervisor-provided pagetable, allowing the kernel to establish any more mappings it needs while keeping the existing ones. A slightly subtle point which is worth highlighting here is that Xen requires all kernel mappings to share the same pte_t pages between all pagetables, so that updating a kernel page's mapping in one pagetable is reflected in all other pagetables. This makes it possible to allocate a page and attach it to a pagetable without having to explicitly enumerate that page's mapping in all pagetables. And: +From: "Eric W. Biederman" If we don't set the leaf page table entries it is quite possible that will inherit and incorrect page table entry from the initial boot page table setup in head.S. So we need to redo the effort here, so we pick up PSE, PGE and the like. Hypervisors like Xen require that their page tables be read-only, which is slightly incompatible with our low identity mappings, however I discussed this with Jeremy he has modified the Xen early set_pte function to avoid problems in this area. Signed-off-by: Eric W. Biederman Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: William Irwin Cc: Ingo Molnar --- include/asm-i386/pgtable.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 147f2553784d..0790ad6ed440 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -514,6 +514,22 @@ do { \ * tables contain all the necessary information. */ #define update_mmu_cache(vma,address,pte) do { } while (0) + +void native_pagetable_setup_start(pgd_t *base); +void native_pagetable_setup_done(pgd_t *base); + +#ifndef CONFIG_PARAVIRT +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} +#endif /* !CONFIG_PARAVIRT */ + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_FLATMEM -- cgit v1.2.2 From 5311ab62cdc7788784971ed816ce85e926f3e994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allow paravirt backend to choose kernel PMD sharing Normally when running in PAE mode, the 4th PMD maps the kernel address space, which can be shared among all processes (since they all need the same kernel mappings). Xen, however, does not allow guests to have the kernel pmd shared between page tables, so parameterize pgtable.c to allow both modes of operation. There are several side-effects of this. One is that vmalloc will update the kernel address space mappings, and those updates need to be propagated into all processes if the kernel mappings are not intrinsically shared. In the non-PAE case, this is done by maintaining a pgd_list of all processes; this list is used when all process pagetables must be updated. pgd_list is threaded via otherwise unused entries in the page structure for the pgd, which means that the pgd must be page-sized for this to work. Normally the PAE pgd is only 4x64 byte entries large, but Xen requires the PAE pgd to page aligned anyway, so this patch forces the pgd to be page aligned+sized when the kernel pmd is unshared, to accomodate both these requirements. Also, since there may be several distinct kernel pmds (if the user/kernel split is below 3G), there's no point in allocating them from a slab cache; they're just allocated with get_free_page and initialized appropriately. (Of course the could be cached if there is just a single kernel pmd - which is the default with a 3G user/kernel split - but it doesn't seem worthwhile to add yet another case into this code). [ Many thanks to wli for review comments. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: William Lee Irwin III Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: Christoph Lameter Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/asm-i386/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 0790ad6ed440..5b88a6a1278e 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -243,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } +extern void vmalloc_sync_all(void); + #ifdef CONFIG_X86_PAE # include #else -- cgit v1.2.2 From a27fe809b82c5e18932fcceded28d0d1481ce7bb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: revert map_pt_hook. Back out the map_pt_hook to clear the way for kmap_atomic_pte. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/pgtable.h | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 5b88a6a1278e..6599f2aa91b7 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -267,7 +267,6 @@ extern void vmalloc_sync_all(void); */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) -#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) #define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif @@ -476,24 +475,10 @@ extern pte_t *lookup_address(unsigned long address); #endif #if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ -({ \ - pte_t *__ptep; \ - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ - __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE0);\ - paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ - __ptep = __ptep + pte_index(address); \ - __ptep; \ -}) -#define pte_offset_map_nested(dir, address) \ -({ \ - pte_t *__ptep; \ - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ - __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE1);\ - paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ - __ptep = __ptep + pte_index(address); \ - __ptep; \ -}) +#define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) +#define pte_offset_map_nested(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else -- cgit v1.2.2 From ce6234b5298902aaec831a67d5f8d9bd2ef5a488 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add kmap_atomic_pte for mapping highpte pages Xen and VMI both have special requirements when mapping a highmem pte page into the kernel address space. These can be dealt with by adding a new kmap_atomic_pte() function for mapping highptes, and hooking it into the paravirt_ops infrastructure. Xen specifically wants to map the pte page RO, so this patch exposes a helper function, kmap_atomic_prot, which maps the page with the specified page protections. This also adds a kmap_flush_unused() function to clear out the cached kmap mappings. Xen needs this to clear out any potential stray RW mappings of pages which will become part of a pagetable. [ Zach - vmi.c will need some attention after this patch. It wasn't immediately obvious to me what needs to be done. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 6599f2aa91b7..befc697821e5 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -476,9 +476,9 @@ extern pte_t *lookup_address(unsigned long address); #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) #define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else -- cgit v1.2.2 From 4cdd9c8931767e1c56a51a1078d33a8c340f4405 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: drop unused ptep_get_and_clear In shadow mode hypervisors, ptep_get_and_clear achieves the desired purpose of keeping the shadows in sync by issuing a native_get_and_clear, followed by a call to pte_update, which indicates the PTE has been modified. Direct mode hypervisors (Xen) have no need for this anyway, and will trap the update using writable pagetables. This means no hypervisor makes use of ptep_get_and_clear; there is no reason to have it in the paravirt-ops structure. Change confusing terminology about raw vs. native functions into consistent use of native_pte_xxx for operations which do not invoke paravirt-ops. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen --- include/asm-i386/pgtable.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index befc697821e5..e7ddd2341309 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -267,8 +267,6 @@ extern void vmalloc_sync_all(void); */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) - -#define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif /* @@ -335,7 +333,7 @@ do { \ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = raw_ptep_get_and_clear(ptep); + pte_t pte = native_ptep_get_and_clear(ptep); pte_update(mm, addr, ptep); return pte; } -- cgit v1.2.2 From c2c1accd4b2f9c82fb89d40611c7f581948db255 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte clear optimization When exiting from an address space, no special hypervisor notification of page table updates needs to occur; direct page table hypervisors, such as Xen, switch to another address space first (init_mm) and unprotects the page tables to avoid the cost of trapping to the hypervisor for each pte_clear. Shadow mode hypervisors, such as VMI and lhype don't need to do the extra work of calling through paravirt-ops, and can just directly clear the page table entries without notifiying the hypervisor, since all the page tables are about to be freed. So introduce native_pte_clear functions which bypass any paravirt-ops notification. This results in a significant performance win for VMI and removes some indirect calls from zap_pte_range. Note the 3-level paging already had a native_pte_clear function, thus demanding argument conformance and extra args for the 2-level definition. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index e7ddd2341309..00e97a9d367e 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -344,7 +344,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long pte_t pte; if (full) { pte = *ptep; - pte_clear(mm, addr, ptep); + native_pte_clear(mm, addr, ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } -- cgit v1.2.2 From 9e5e3162b2d5e4466187ecd63c9eec2de33cb7bc Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte simplify ops Add comment and condense code to make use of native_local_ptep_get_and_clear function. Also, it turns out the 2-level and 3-level paging definitions were identical, so move the common definition into pgtable.h Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 00e97a9d367e..c6b8b944120c 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -269,6 +269,16 @@ extern void vmalloc_sync_all(void); #define pte_update_defer(mm, addr, ptep) do { } while (0) #endif +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; +} + /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware @@ -343,8 +353,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long { pte_t pte; if (full) { - pte = *ptep; - native_pte_clear(mm, addr, ptep); + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } -- cgit v1.2.2