diff options
author | Christian Borntraeger <borntraeger@de.ibm.com> | 2008-10-28 06:10:15 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-10-28 06:12:03 -0400 |
commit | 250cf776f74b5932a1977d0489cae9206e2351dd (patch) | |
tree | 71bb6778e65e8efbdd0cac09a64e4e1194b98c3f /arch/s390 | |
parent | 2c78091405d6f54748b1fac78c45f2a799e3073a (diff) |
[S390] pgtables: Fix race in enable_sie vs. page table ops
The current enable_sie code sets the mm->context.pgstes bit to tell
dup_mm that the new mm should have extended page tables. This bit is also
used by the s390 specific page table primitives to decide about the page
table layout - which means context.pgstes has two meanings. This can cause
any kind of bugs. For example - e.g. shrink_zone can call
ptep_clear_flush_young while enable_sie is running. ptep_clear_flush_young
will test for context.pgstes. Since enable_sie changed that value of the old
struct mm without changing the page table layout ptep_clear_flush_young will
do the wrong thing.
The solution is to split pgstes into two bits
- one for the allocation
- one for the current state
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/mmu.h | 3 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu_context.h | 19 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 8 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 16 |
4 files changed, 30 insertions, 16 deletions
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 5dd5e7b3476f..d2b4ff831477 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h | |||
@@ -7,7 +7,8 @@ typedef struct { | |||
7 | unsigned long asce_bits; | 7 | unsigned long asce_bits; |
8 | unsigned long asce_limit; | 8 | unsigned long asce_limit; |
9 | int noexec; | 9 | int noexec; |
10 | int pgstes; | 10 | int has_pgste; /* The mmu context has extended page tables */ |
11 | int alloc_pgste; /* cloned contexts will have extended page tables */ | ||
11 | } mm_context_t; | 12 | } mm_context_t; |
12 | 13 | ||
13 | #endif | 14 | #endif |
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 4c2fbf48c9c4..28ec870655af 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h | |||
@@ -20,12 +20,25 @@ static inline int init_new_context(struct task_struct *tsk, | |||
20 | #ifdef CONFIG_64BIT | 20 | #ifdef CONFIG_64BIT |
21 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; | 21 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; |
22 | #endif | 22 | #endif |
23 | if (current->mm->context.pgstes) { | 23 | if (current->mm->context.alloc_pgste) { |
24 | /* | ||
25 | * alloc_pgste indicates, that any NEW context will be created | ||
26 | * with extended page tables. The old context is unchanged. The | ||
27 | * page table allocation and the page table operations will | ||
28 | * look at has_pgste to distinguish normal and extended page | ||
29 | * tables. The only way to create extended page tables is to | ||
30 | * set alloc_pgste and then create a new context (e.g. dup_mm). | ||
31 | * The page table allocation is called after init_new_context | ||
32 | * and if has_pgste is set, it will create extended page | ||
33 | * tables. | ||
34 | */ | ||
24 | mm->context.noexec = 0; | 35 | mm->context.noexec = 0; |
25 | mm->context.pgstes = 1; | 36 | mm->context.has_pgste = 1; |
37 | mm->context.alloc_pgste = 1; | ||
26 | } else { | 38 | } else { |
27 | mm->context.noexec = s390_noexec; | 39 | mm->context.noexec = s390_noexec; |
28 | mm->context.pgstes = 0; | 40 | mm->context.has_pgste = 0; |
41 | mm->context.alloc_pgste = 0; | ||
29 | } | 42 | } |
30 | mm->context.asce_limit = STACK_TOP_MAX; | 43 | mm->context.asce_limit = STACK_TOP_MAX; |
31 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | 44 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); |
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1a928f84afd6..7fc76133b3e4 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -679,7 +679,7 @@ static inline void pmd_clear(pmd_t *pmd) | |||
679 | 679 | ||
680 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 680 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
681 | { | 681 | { |
682 | if (mm->context.pgstes) | 682 | if (mm->context.has_pgste) |
683 | ptep_rcp_copy(ptep); | 683 | ptep_rcp_copy(ptep); |
684 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 684 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
685 | if (mm->context.noexec) | 685 | if (mm->context.noexec) |
@@ -763,7 +763,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, | |||
763 | struct page *page; | 763 | struct page *page; |
764 | unsigned int skey; | 764 | unsigned int skey; |
765 | 765 | ||
766 | if (!mm->context.pgstes) | 766 | if (!mm->context.has_pgste) |
767 | return -EINVAL; | 767 | return -EINVAL; |
768 | rcp_lock(ptep); | 768 | rcp_lock(ptep); |
769 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 769 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); |
@@ -794,7 +794,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
794 | int young; | 794 | int young; |
795 | unsigned long *pgste; | 795 | unsigned long *pgste; |
796 | 796 | ||
797 | if (!vma->vm_mm->context.pgstes) | 797 | if (!vma->vm_mm->context.has_pgste) |
798 | return 0; | 798 | return 0; |
799 | physpage = pte_val(*ptep) & PAGE_MASK; | 799 | physpage = pte_val(*ptep) & PAGE_MASK; |
800 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 800 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); |
@@ -844,7 +844,7 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) | |||
844 | static inline void ptep_invalidate(struct mm_struct *mm, | 844 | static inline void ptep_invalidate(struct mm_struct *mm, |
845 | unsigned long address, pte_t *ptep) | 845 | unsigned long address, pte_t *ptep) |
846 | { | 846 | { |
847 | if (mm->context.pgstes) { | 847 | if (mm->context.has_pgste) { |
848 | rcp_lock(ptep); | 848 | rcp_lock(ptep); |
849 | __ptep_ipte(address, ptep); | 849 | __ptep_ipte(address, ptep); |
850 | ptep_rcp_copy(ptep); | 850 | ptep_rcp_copy(ptep); |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3d98ba82ea67..ef3635b52fc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
169 | unsigned long *table; | 169 | unsigned long *table; |
170 | unsigned long bits; | 170 | unsigned long bits; |
171 | 171 | ||
172 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; | 172 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; |
173 | spin_lock(&mm->page_table_lock); | 173 | spin_lock(&mm->page_table_lock); |
174 | page = NULL; | 174 | page = NULL; |
175 | if (!list_empty(&mm->context.pgtable_list)) { | 175 | if (!list_empty(&mm->context.pgtable_list)) { |
@@ -186,7 +186,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
186 | pgtable_page_ctor(page); | 186 | pgtable_page_ctor(page); |
187 | page->flags &= ~FRAG_MASK; | 187 | page->flags &= ~FRAG_MASK; |
188 | table = (unsigned long *) page_to_phys(page); | 188 | table = (unsigned long *) page_to_phys(page); |
189 | if (mm->context.pgstes) | 189 | if (mm->context.has_pgste) |
190 | clear_table_pgstes(table); | 190 | clear_table_pgstes(table); |
191 | else | 191 | else |
192 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 192 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); |
@@ -210,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
210 | struct page *page; | 210 | struct page *page; |
211 | unsigned long bits; | 211 | unsigned long bits; |
212 | 212 | ||
213 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; | 213 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; |
214 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 214 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); |
215 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 215 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
216 | spin_lock(&mm->page_table_lock); | 216 | spin_lock(&mm->page_table_lock); |
@@ -257,7 +257,7 @@ int s390_enable_sie(void) | |||
257 | struct mm_struct *mm, *old_mm; | 257 | struct mm_struct *mm, *old_mm; |
258 | 258 | ||
259 | /* Do we have pgstes? if yes, we are done */ | 259 | /* Do we have pgstes? if yes, we are done */ |
260 | if (tsk->mm->context.pgstes) | 260 | if (tsk->mm->context.has_pgste) |
261 | return 0; | 261 | return 0; |
262 | 262 | ||
263 | /* lets check if we are allowed to replace the mm */ | 263 | /* lets check if we are allowed to replace the mm */ |
@@ -269,14 +269,14 @@ int s390_enable_sie(void) | |||
269 | } | 269 | } |
270 | task_unlock(tsk); | 270 | task_unlock(tsk); |
271 | 271 | ||
272 | /* we copy the mm with pgstes enabled */ | 272 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ |
273 | tsk->mm->context.pgstes = 1; | 273 | tsk->mm->context.alloc_pgste = 1; |
274 | mm = dup_mm(tsk); | 274 | mm = dup_mm(tsk); |
275 | tsk->mm->context.pgstes = 0; | 275 | tsk->mm->context.alloc_pgste = 0; |
276 | if (!mm) | 276 | if (!mm) |
277 | return -ENOMEM; | 277 | return -ENOMEM; |
278 | 278 | ||
279 | /* Now lets check again if somebody attached ptrace etc */ | 279 | /* Now lets check again if something happened */ |
280 | task_lock(tsk); | 280 | task_lock(tsk); |
281 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 281 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || |
282 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { | 282 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { |