aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Borntraeger <borntraeger@de.ibm.com>2008-10-28 06:10:15 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-10-28 06:12:03 -0400
commit250cf776f74b5932a1977d0489cae9206e2351dd (patch)
tree71bb6778e65e8efbdd0cac09a64e4e1194b98c3f
parent2c78091405d6f54748b1fac78c45f2a799e3073a (diff)
[S390] pgtables: Fix race in enable_sie vs. page table ops
The current enable_sie code sets the mm->context.pgstes bit to tell dup_mm that the new mm should have extended page tables. This bit is also used by the s390 specific page table primitives to decide about the page table layout - which means context.pgstes has two meanings. This can cause any kind of bugs. For example - e.g. shrink_zone can call ptep_clear_flush_young while enable_sie is running. ptep_clear_flush_young will test for context.pgstes. Since enable_sie changed that value of the old struct mm without changing the page table layout ptep_clear_flush_young will do the wrong thing. The solution is to split pgstes into two bits - one for the allocation - one for the current state Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/include/asm/mmu.h3
-rw-r--r--arch/s390/include/asm/mmu_context.h19
-rw-r--r--arch/s390/include/asm/pgtable.h8
-rw-r--r--arch/s390/mm/pgtable.c16
4 files changed, 30 insertions, 16 deletions
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 5dd5e7b3476f..d2b4ff831477 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -7,7 +7,8 @@ typedef struct {
7 unsigned long asce_bits; 7 unsigned long asce_bits;
8 unsigned long asce_limit; 8 unsigned long asce_limit;
9 int noexec; 9 int noexec;
10 int pgstes; 10 int has_pgste; /* The mmu context has extended page tables */
11 int alloc_pgste; /* cloned contexts will have extended page tables */
11} mm_context_t; 12} mm_context_t;
12 13
13#endif 14#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 4c2fbf48c9c4..28ec870655af 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -20,12 +20,25 @@ static inline int init_new_context(struct task_struct *tsk,
20#ifdef CONFIG_64BIT 20#ifdef CONFIG_64BIT
21 mm->context.asce_bits |= _ASCE_TYPE_REGION3; 21 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
22#endif 22#endif
23 if (current->mm->context.pgstes) { 23 if (current->mm->context.alloc_pgste) {
24 /*
25 * alloc_pgste indicates, that any NEW context will be created
26 * with extended page tables. The old context is unchanged. The
27 * page table allocation and the page table operations will
28 * look at has_pgste to distinguish normal and extended page
29 * tables. The only way to create extended page tables is to
30 * set alloc_pgste and then create a new context (e.g. dup_mm).
31 * The page table allocation is called after init_new_context
32 * and if has_pgste is set, it will create extended page
33 * tables.
34 */
24 mm->context.noexec = 0; 35 mm->context.noexec = 0;
25 mm->context.pgstes = 1; 36 mm->context.has_pgste = 1;
37 mm->context.alloc_pgste = 1;
26 } else { 38 } else {
27 mm->context.noexec = s390_noexec; 39 mm->context.noexec = s390_noexec;
28 mm->context.pgstes = 0; 40 mm->context.has_pgste = 0;
41 mm->context.alloc_pgste = 0;
29 } 42 }
30 mm->context.asce_limit = STACK_TOP_MAX; 43 mm->context.asce_limit = STACK_TOP_MAX;
31 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); 44 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1a928f84afd6..7fc76133b3e4 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -679,7 +679,7 @@ static inline void pmd_clear(pmd_t *pmd)
679 679
680static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 680static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
681{ 681{
682 if (mm->context.pgstes) 682 if (mm->context.has_pgste)
683 ptep_rcp_copy(ptep); 683 ptep_rcp_copy(ptep);
684 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 684 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
685 if (mm->context.noexec) 685 if (mm->context.noexec)
@@ -763,7 +763,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
763 struct page *page; 763 struct page *page;
764 unsigned int skey; 764 unsigned int skey;
765 765
766 if (!mm->context.pgstes) 766 if (!mm->context.has_pgste)
767 return -EINVAL; 767 return -EINVAL;
768 rcp_lock(ptep); 768 rcp_lock(ptep);
769 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 769 pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
@@ -794,7 +794,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
794 int young; 794 int young;
795 unsigned long *pgste; 795 unsigned long *pgste;
796 796
797 if (!vma->vm_mm->context.pgstes) 797 if (!vma->vm_mm->context.has_pgste)
798 return 0; 798 return 0;
799 physpage = pte_val(*ptep) & PAGE_MASK; 799 physpage = pte_val(*ptep) & PAGE_MASK;
800 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 800 pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
@@ -844,7 +844,7 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
844static inline void ptep_invalidate(struct mm_struct *mm, 844static inline void ptep_invalidate(struct mm_struct *mm,
845 unsigned long address, pte_t *ptep) 845 unsigned long address, pte_t *ptep)
846{ 846{
847 if (mm->context.pgstes) { 847 if (mm->context.has_pgste) {
848 rcp_lock(ptep); 848 rcp_lock(ptep);
849 __ptep_ipte(address, ptep); 849 __ptep_ipte(address, ptep);
850 ptep_rcp_copy(ptep); 850 ptep_rcp_copy(ptep);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3d98ba82ea67..ef3635b52fc0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
169 unsigned long *table; 169 unsigned long *table;
170 unsigned long bits; 170 unsigned long bits;
171 171
172 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; 172 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
173 spin_lock(&mm->page_table_lock); 173 spin_lock(&mm->page_table_lock);
174 page = NULL; 174 page = NULL;
175 if (!list_empty(&mm->context.pgtable_list)) { 175 if (!list_empty(&mm->context.pgtable_list)) {
@@ -186,7 +186,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
186 pgtable_page_ctor(page); 186 pgtable_page_ctor(page);
187 page->flags &= ~FRAG_MASK; 187 page->flags &= ~FRAG_MASK;
188 table = (unsigned long *) page_to_phys(page); 188 table = (unsigned long *) page_to_phys(page);
189 if (mm->context.pgstes) 189 if (mm->context.has_pgste)
190 clear_table_pgstes(table); 190 clear_table_pgstes(table);
191 else 191 else
192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
@@ -210,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
210 struct page *page; 210 struct page *page;
211 unsigned long bits; 211 unsigned long bits;
212 212
213 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; 213 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
216 spin_lock(&mm->page_table_lock); 216 spin_lock(&mm->page_table_lock);
@@ -257,7 +257,7 @@ int s390_enable_sie(void)
257 struct mm_struct *mm, *old_mm; 257 struct mm_struct *mm, *old_mm;
258 258
259 /* Do we have pgstes? if yes, we are done */ 259 /* Do we have pgstes? if yes, we are done */
260 if (tsk->mm->context.pgstes) 260 if (tsk->mm->context.has_pgste)
261 return 0; 261 return 0;
262 262
263 /* lets check if we are allowed to replace the mm */ 263 /* lets check if we are allowed to replace the mm */
@@ -269,14 +269,14 @@ int s390_enable_sie(void)
269 } 269 }
270 task_unlock(tsk); 270 task_unlock(tsk);
271 271
272 /* we copy the mm with pgstes enabled */ 272 /* we copy the mm and let dup_mm create the page tables with_pgstes */
273 tsk->mm->context.pgstes = 1; 273 tsk->mm->context.alloc_pgste = 1;
274 mm = dup_mm(tsk); 274 mm = dup_mm(tsk);
275 tsk->mm->context.pgstes = 0; 275 tsk->mm->context.alloc_pgste = 0;
276 if (!mm) 276 if (!mm)
277 return -ENOMEM; 277 return -ENOMEM;
278 278
279 /* Now lets check again if somebody attached ptrace etc */ 279 /* Now lets check again if something happened */
280 task_lock(tsk); 280 task_lock(tsk);
281 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 281 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
282 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { 282 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {