aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZhang, Yanmin <yanmin_zhang@linux.intel.com>2006-03-22 03:08:50 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-22 10:54:03 -0500
commit8f860591ffb29738cf5539b6fbf27f50dcdeb380 (patch)
tree4265e45c4a79d86a16cd5175a836e8c531be8117
parentaed75ff3caafce404d9be7f0c088716375be5279 (diff)
[PATCH] Enable mprotect on huge pages
2.6.16-rc3 uses hugetlb on-demand paging, but it doesn_t support hugetlb mprotect. From: David Gibson <david@gibson.dropbear.id.au> Remove a test from the mprotect() path which checks that the mprotect()ed range on a hugepage VMA is hugepage aligned (yes, really, the sense of is_aligned_hugepage_range() is the opposite of what you'd guess :-/). In fact, we don't need this test. If the given addresses match the beginning/end of a hugepage VMA they must already be suitably aligned. If they don't, then mprotect_fixup() will attempt to split the VMA. The very first test in split_vma() will check for a badly aligned address on a hugepage VMA and return -EINVAL if necessary. From: "Chen, Kenneth W" <kenneth.w.chen@intel.com> On i386 and x86-64, pte flag _PAGE_PSE collides with _PAGE_PROTNONE. The identify of hugetlb pte is lost when changing page protection via mprotect. A page fault occurs later will trigger a bug check in huge_pte_alloc(). The fix is to always make new pte a hugetlb pte and also to clean up legacy code where _PAGE_PRESENT is forced on in the pre-faulting day. Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: "David S. Miller" <davem@davemloft.net> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/asm-i386/pgtable.h5
-rw-r--r--include/asm-ia64/pgtable.h2
-rw-r--r--include/asm-x86_64/pgtable.h4
-rw-r--r--include/linux/hugetlb.h4
-rw-r--r--mm/hugetlb.c29
-rw-r--r--mm/mprotect.c12
6 files changed, 43 insertions, 13 deletions
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 088a945bf26b..ee056c41a9fb 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -219,13 +219,12 @@ extern unsigned long pg0[];
219 * The following only work if pte_present() is true. 219 * The following only work if pte_present() is true.
220 * Undefined behaviour if not.. 220 * Undefined behaviour if not..
221 */ 221 */
222#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
223static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } 222static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
224static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } 223static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
225static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } 224static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
226static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } 225static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
227static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } 226static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
228static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; } 227static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; }
229 228
230/* 229/*
231 * The following only works if pte_present() is not true. 230 * The following only works if pte_present() is not true.
@@ -242,7 +241,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return
242static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } 241static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
243static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } 242static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
244static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } 243static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
245static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; } 244static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
246 245
247#ifdef CONFIG_X86_PAE 246#ifdef CONFIG_X86_PAE
248# include <asm/pgtable-3level.h> 247# include <asm/pgtable-3level.h>
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index e2560c58384b..5890972a69bf 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -314,7 +314,7 @@ ia64_phys_addr_valid (unsigned long addr)
314#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) 314#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
315#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) 315#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
316#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) 316#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
317#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P)) 317#define pte_mkhuge(pte) (__pte(pte_val(pte)))
318 318
319/* 319/*
320 * Macro to a page protection value as "uncacheable". Note that "protection" is really a 320 * Macro to a page protection value as "uncacheable". Note that "protection" is really a
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 715fd94cf577..a617d364d08d 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -273,7 +273,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
273static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 273static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
274static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } 274static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
275static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } 275static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
276static inline int pte_huge(pte_t pte) { return (pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; } 276static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
277 277
278static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } 278static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
279static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } 279static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
@@ -285,7 +285,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _
285static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } 285static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
286static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } 286static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
287static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } 287static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
288static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; } 288static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
289 289
290struct vm_area_struct; 290struct vm_area_struct;
291 291
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 68d82ad6b17c..fa83836b63d2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -41,6 +41,8 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
41 pmd_t *pmd, int write); 41 pmd_t *pmd, int write);
42int is_aligned_hugepage_range(unsigned long addr, unsigned long len); 42int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
43int pmd_huge(pmd_t pmd); 43int pmd_huge(pmd_t pmd);
44void hugetlb_change_protection(struct vm_area_struct *vma,
45 unsigned long address, unsigned long end, pgprot_t newprot);
44 46
45#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE 47#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
46#define is_hugepage_only_range(mm, addr, len) 0 48#define is_hugepage_only_range(mm, addr, len) 0
@@ -101,6 +103,8 @@ static inline unsigned long hugetlb_total_pages(void)
101#define free_huge_page(p) ({ (void)(p); BUG(); }) 103#define free_huge_page(p) ({ (void)(p); BUG(); })
102#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) 104#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
103 105
106#define hugetlb_change_protection(vma, address, end, newprot)
107
104#ifndef HPAGE_MASK 108#ifndef HPAGE_MASK
105#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ 109#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */
106#define HPAGE_SIZE PAGE_SIZE 110#define HPAGE_SIZE PAGE_SIZE
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 20117a4b8ab6..783098f6cf8e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -565,3 +565,32 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
565 565
566 return i; 566 return i;
567} 567}
568
569void hugetlb_change_protection(struct vm_area_struct *vma,
570 unsigned long address, unsigned long end, pgprot_t newprot)
571{
572 struct mm_struct *mm = vma->vm_mm;
573 unsigned long start = address;
574 pte_t *ptep;
575 pte_t pte;
576
577 BUG_ON(address >= end);
578 flush_cache_range(vma, address, end);
579
580 spin_lock(&mm->page_table_lock);
581 for (; address < end; address += HPAGE_SIZE) {
582 ptep = huge_pte_offset(mm, address);
583 if (!ptep)
584 continue;
585 if (!pte_none(*ptep)) {
586 pte = huge_ptep_get_and_clear(mm, address, ptep);
587 pte = pte_mkhuge(pte_modify(pte, newprot));
588 set_huge_pte_at(mm, address, ptep, pte);
589 lazy_mmu_prot_update(pte);
590 }
591 }
592 spin_unlock(&mm->page_table_lock);
593
594 flush_tlb_range(vma, start, end);
595}
596
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 653b8571c1ed..4c14d4289b61 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -124,7 +124,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
124 * a MAP_NORESERVE private mapping to writable will now reserve. 124 * a MAP_NORESERVE private mapping to writable will now reserve.
125 */ 125 */
126 if (newflags & VM_WRITE) { 126 if (newflags & VM_WRITE) {
127 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { 127 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
128 charged = nrpages; 128 charged = nrpages;
129 if (security_vm_enough_memory(charged)) 129 if (security_vm_enough_memory(charged))
130 return -ENOMEM; 130 return -ENOMEM;
@@ -166,7 +166,10 @@ success:
166 */ 166 */
167 vma->vm_flags = newflags; 167 vma->vm_flags = newflags;
168 vma->vm_page_prot = newprot; 168 vma->vm_page_prot = newprot;
169 change_protection(vma, start, end, newprot); 169 if (is_vm_hugetlb_page(vma))
170 hugetlb_change_protection(vma, start, end, newprot);
171 else
172 change_protection(vma, start, end, newprot);
170 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 173 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
171 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 174 vm_stat_account(mm, newflags, vma->vm_file, nrpages);
172 return 0; 175 return 0;
@@ -240,11 +243,6 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
240 243
241 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 244 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
242 245
243 if (is_vm_hugetlb_page(vma)) {
244 error = -EACCES;
245 goto out;
246 }
247
248 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 246 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
249 247
250 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 248 /* newflags >> 4 shift VM_MAY% in place of VM_% */