diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-02-09 12:24:37 -0500 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-02-09 12:24:41 -0500 |
commit | 6252d702c5311ce916caf75ed82e5c8245171c92 (patch) | |
tree | 3490f27b5f888ff2c1ec915d4e7201000f37a771 /include | |
parent | 5a216a20837c5f5fa1ca4b8ae8991ffd96b08e6f (diff) |
[S390] dynamic page tables.
Add support for different number of page table levels dependent
on the highest address used for a process. This will cause a 31 bit
process to use a two level page table instead of the four level page
table that is the default after the pud has been introduced. Likewise
a normal 64 bit process will use three levels instead of four. Only
if a process runs out of the 4 tera bytes which can be addressed with
a three level page table the fourth level is dynamically added. Then
the process can use up to 8 peta byte.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-s390/elf.h | 2 | ||||
-rw-r--r-- | include/asm-s390/mmu.h | 1 | ||||
-rw-r--r-- | include/asm-s390/mmu_context.h | 8 | ||||
-rw-r--r-- | include/asm-s390/pgalloc.h | 24 | ||||
-rw-r--r-- | include/asm-s390/pgtable.h | 38 | ||||
-rw-r--r-- | include/asm-s390/processor.h | 25 | ||||
-rw-r--r-- | include/asm-s390/tlb.h | 10 |
7 files changed, 63 insertions, 45 deletions
diff --git a/include/asm-s390/elf.h b/include/asm-s390/elf.h index b760cd4de385..b3ac262c4582 100644 --- a/include/asm-s390/elf.h +++ b/include/asm-s390/elf.h | |||
@@ -138,7 +138,7 @@ typedef s390_regs elf_gregset_t; | |||
138 | use of this is to invoke "./ld.so someprog" to test out a new version of | 138 | use of this is to invoke "./ld.so someprog" to test out a new version of |
139 | the loader. We need to make sure that it is out of the way of the program | 139 | the loader. We need to make sure that it is out of the way of the program |
140 | that it will "exec", and that there is sufficient room for the brk. */ | 140 | that it will "exec", and that there is sufficient room for the brk. */ |
141 | #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) | 141 | #define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) |
142 | 142 | ||
143 | /* Wow, the "main" arch needs arch dependent functions too.. :) */ | 143 | /* Wow, the "main" arch needs arch dependent functions too.. :) */ |
144 | 144 | ||
diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h index 13ec4215f437..1698e29c5b20 100644 --- a/include/asm-s390/mmu.h +++ b/include/asm-s390/mmu.h | |||
@@ -5,6 +5,7 @@ typedef struct { | |||
5 | struct list_head crst_list; | 5 | struct list_head crst_list; |
6 | struct list_head pgtable_list; | 6 | struct list_head pgtable_list; |
7 | unsigned long asce_bits; | 7 | unsigned long asce_bits; |
8 | unsigned long asce_limit; | ||
8 | int noexec; | 9 | int noexec; |
9 | } mm_context_t; | 10 | } mm_context_t; |
10 | 11 | ||
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index b3ea3e199921..b5a34c6f91a9 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h | |||
@@ -18,9 +18,11 @@ static inline int init_new_context(struct task_struct *tsk, | |||
18 | { | 18 | { |
19 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; | 19 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; |
20 | #ifdef CONFIG_64BIT | 20 | #ifdef CONFIG_64BIT |
21 | mm->context.asce_bits |= _ASCE_TYPE_REGION2; | 21 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; |
22 | #endif | 22 | #endif |
23 | mm->context.noexec = s390_noexec; | 23 | mm->context.noexec = s390_noexec; |
24 | mm->context.asce_limit = STACK_TOP_MAX; | ||
25 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | ||
24 | return 0; | 26 | return 0; |
25 | } | 27 | } |
26 | 28 | ||
@@ -47,13 +49,12 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) | |||
47 | /* Load home space page table origin. */ | 49 | /* Load home space page table origin. */ |
48 | asm volatile(LCTL_OPCODE" 13,13,%0" | 50 | asm volatile(LCTL_OPCODE" 13,13,%0" |
49 | : : "m" (S390_lowcore.user_asce) ); | 51 | : : "m" (S390_lowcore.user_asce) ); |
52 | set_fs(current->thread.mm_segment); | ||
50 | } | 53 | } |
51 | 54 | ||
52 | static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | 55 | static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, |
53 | struct task_struct *tsk) | 56 | struct task_struct *tsk) |
54 | { | 57 | { |
55 | if (unlikely(prev == next)) | ||
56 | return; | ||
57 | cpu_set(smp_processor_id(), next->cpu_vm_mask); | 58 | cpu_set(smp_processor_id(), next->cpu_vm_mask); |
58 | update_mm(next, tsk); | 59 | update_mm(next, tsk); |
59 | } | 60 | } |
@@ -65,7 +66,6 @@ static inline void activate_mm(struct mm_struct *prev, | |||
65 | struct mm_struct *next) | 66 | struct mm_struct *next) |
66 | { | 67 | { |
67 | switch_mm(prev, next, current); | 68 | switch_mm(prev, next, current); |
68 | set_fs(current->thread.mm_segment); | ||
69 | } | 69 | } |
70 | 70 | ||
71 | #endif /* __S390_MMU_CONTEXT_H */ | 71 | #endif /* __S390_MMU_CONTEXT_H */ |
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index cc47dd65a499..f5b2bf3d7c1d 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h | |||
@@ -73,9 +73,16 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) | |||
73 | 73 | ||
74 | static inline unsigned long pgd_entry_type(struct mm_struct *mm) | 74 | static inline unsigned long pgd_entry_type(struct mm_struct *mm) |
75 | { | 75 | { |
76 | if (mm->context.asce_limit <= (1UL << 31)) | ||
77 | return _SEGMENT_ENTRY_EMPTY; | ||
78 | if (mm->context.asce_limit <= (1UL << 42)) | ||
79 | return _REGION3_ENTRY_EMPTY; | ||
76 | return _REGION2_ENTRY_EMPTY; | 80 | return _REGION2_ENTRY_EMPTY; |
77 | } | 81 | } |
78 | 82 | ||
83 | int crst_table_upgrade(struct mm_struct *, unsigned long limit); | ||
84 | void crst_table_downgrade(struct mm_struct *, unsigned long limit); | ||
85 | |||
79 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) | 86 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) |
80 | { | 87 | { |
81 | unsigned long *table = crst_table_alloc(mm, mm->context.noexec); | 88 | unsigned long *table = crst_table_alloc(mm, mm->context.noexec); |
@@ -102,12 +109,12 @@ static inline void pgd_populate_kernel(struct mm_struct *mm, | |||
102 | 109 | ||
103 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | 110 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) |
104 | { | 111 | { |
105 | pgd_t *shadow_pgd = get_shadow_table(pgd); | ||
106 | pud_t *shadow_pud = get_shadow_table(pud); | ||
107 | |||
108 | if (shadow_pgd && shadow_pud) | ||
109 | pgd_populate_kernel(mm, shadow_pgd, shadow_pud); | ||
110 | pgd_populate_kernel(mm, pgd, pud); | 112 | pgd_populate_kernel(mm, pgd, pud); |
113 | if (mm->context.noexec) { | ||
114 | pgd = get_shadow_table(pgd); | ||
115 | pud = get_shadow_table(pud); | ||
116 | pgd_populate_kernel(mm, pgd, pud); | ||
117 | } | ||
111 | } | 118 | } |
112 | 119 | ||
113 | static inline void pud_populate_kernel(struct mm_struct *mm, | 120 | static inline void pud_populate_kernel(struct mm_struct *mm, |
@@ -130,14 +137,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
130 | 137 | ||
131 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) | 138 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) |
132 | { | 139 | { |
133 | unsigned long *crst; | ||
134 | |||
135 | INIT_LIST_HEAD(&mm->context.crst_list); | 140 | INIT_LIST_HEAD(&mm->context.crst_list); |
136 | INIT_LIST_HEAD(&mm->context.pgtable_list); | 141 | INIT_LIST_HEAD(&mm->context.pgtable_list); |
137 | crst = crst_table_alloc(mm, s390_noexec); | 142 | return (pgd_t *) crst_table_alloc(mm, s390_noexec); |
138 | if (crst) | ||
139 | crst_table_init(crst, pgd_entry_type(mm)); | ||
140 | return (pgd_t *) crst; | ||
141 | } | 143 | } |
142 | #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) | 144 | #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) |
143 | 145 | ||
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 8f473a718111..65154dc9a9e5 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h | |||
@@ -421,36 +421,54 @@ static inline int pud_bad(pud_t pud) { return 0; } | |||
421 | 421 | ||
422 | static inline int pgd_present(pgd_t pgd) | 422 | static inline int pgd_present(pgd_t pgd) |
423 | { | 423 | { |
424 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||
425 | return 1; | ||
424 | return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; | 426 | return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; |
425 | } | 427 | } |
426 | 428 | ||
427 | static inline int pgd_none(pgd_t pgd) | 429 | static inline int pgd_none(pgd_t pgd) |
428 | { | 430 | { |
431 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||
432 | return 0; | ||
429 | return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; | 433 | return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; |
430 | } | 434 | } |
431 | 435 | ||
432 | static inline int pgd_bad(pgd_t pgd) | 436 | static inline int pgd_bad(pgd_t pgd) |
433 | { | 437 | { |
438 | /* | ||
439 | * With dynamic page table levels the pgd can be a region table | ||
440 | * entry or a segment table entry. Check for the bit that are | ||
441 | * invalid for either table entry. | ||
442 | */ | ||
434 | unsigned long mask = | 443 | unsigned long mask = |
435 | ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & | 444 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & |
436 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; | 445 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; |
437 | return (pgd_val(pgd) & mask) != 0; | 446 | return (pgd_val(pgd) & mask) != 0; |
438 | } | 447 | } |
439 | 448 | ||
440 | static inline int pud_present(pud_t pud) | 449 | static inline int pud_present(pud_t pud) |
441 | { | 450 | { |
451 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | ||
452 | return 1; | ||
442 | return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; | 453 | return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; |
443 | } | 454 | } |
444 | 455 | ||
445 | static inline int pud_none(pud_t pud) | 456 | static inline int pud_none(pud_t pud) |
446 | { | 457 | { |
458 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | ||
459 | return 0; | ||
447 | return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; | 460 | return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; |
448 | } | 461 | } |
449 | 462 | ||
450 | static inline int pud_bad(pud_t pud) | 463 | static inline int pud_bad(pud_t pud) |
451 | { | 464 | { |
465 | /* | ||
466 | * With dynamic page table levels the pud can be a region table | ||
467 | * entry or a segment table entry. Check for the bit that are | ||
468 | * invalid for either table entry. | ||
469 | */ | ||
452 | unsigned long mask = | 470 | unsigned long mask = |
453 | ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & | 471 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & |
454 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; | 472 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; |
455 | return (pud_val(pud) & mask) != 0; | 473 | return (pud_val(pud) & mask) != 0; |
456 | } | 474 | } |
@@ -535,7 +553,8 @@ static inline int pte_young(pte_t pte) | |||
535 | 553 | ||
536 | static inline void pgd_clear_kernel(pgd_t * pgd) | 554 | static inline void pgd_clear_kernel(pgd_t * pgd) |
537 | { | 555 | { |
538 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | 556 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) |
557 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | ||
539 | } | 558 | } |
540 | 559 | ||
541 | static inline void pgd_clear(pgd_t * pgd) | 560 | static inline void pgd_clear(pgd_t * pgd) |
@@ -549,10 +568,11 @@ static inline void pgd_clear(pgd_t * pgd) | |||
549 | 568 | ||
550 | static inline void pud_clear_kernel(pud_t *pud) | 569 | static inline void pud_clear_kernel(pud_t *pud) |
551 | { | 570 | { |
552 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; | 571 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
572 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; | ||
553 | } | 573 | } |
554 | 574 | ||
555 | static inline void pud_clear(pud_t * pud) | 575 | static inline void pud_clear(pud_t *pud) |
556 | { | 576 | { |
557 | pud_t *shadow = get_shadow_table(pud); | 577 | pud_t *shadow = get_shadow_table(pud); |
558 | 578 | ||
@@ -841,13 +861,17 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) | |||
841 | 861 | ||
842 | static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) | 862 | static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) |
843 | { | 863 | { |
844 | pud_t *pud = (pud_t *) pgd_deref(*pgd); | 864 | pud_t *pud = (pud_t *) pgd; |
865 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||
866 | pud = (pud_t *) pgd_deref(*pgd); | ||
845 | return pud + pud_index(address); | 867 | return pud + pud_index(address); |
846 | } | 868 | } |
847 | 869 | ||
848 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | 870 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) |
849 | { | 871 | { |
850 | pmd_t *pmd = (pmd_t *) pud_deref(*pud); | 872 | pmd_t *pmd = (pmd_t *) pud; |
873 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | ||
874 | pmd = (pmd_t *) pud_deref(*pud); | ||
851 | return pmd + pmd_index(address); | 875 | return pmd + pmd_index(address); |
852 | } | 876 | } |
853 | 877 | ||
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index 5a21f457d583..51d88912aa20 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h | |||
@@ -81,11 +81,12 @@ extern int get_cpu_capability(unsigned int *); | |||
81 | 81 | ||
82 | #ifndef __s390x__ | 82 | #ifndef __s390x__ |
83 | #define STACK_TOP (1UL << 31) | 83 | #define STACK_TOP (1UL << 31) |
84 | #define STACK_TOP_MAX (1UL << 31) | ||
84 | #else /* __s390x__ */ | 85 | #else /* __s390x__ */ |
85 | #define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:53)) | 86 | #define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) |
87 | #define STACK_TOP_MAX (1UL << 42) | ||
86 | #endif /* __s390x__ */ | 88 | #endif /* __s390x__ */ |
87 | 89 | ||
88 | #define STACK_TOP_MAX STACK_TOP | ||
89 | 90 | ||
90 | #endif | 91 | #endif |
91 | 92 | ||
@@ -142,8 +143,6 @@ struct stack_frame { | |||
142 | /* | 143 | /* |
143 | * Do necessary setup to start up a new thread. | 144 | * Do necessary setup to start up a new thread. |
144 | */ | 145 | */ |
145 | #ifndef __s390x__ | ||
146 | |||
147 | #define start_thread(regs, new_psw, new_stackp) do { \ | 146 | #define start_thread(regs, new_psw, new_stackp) do { \ |
148 | set_fs(USER_DS); \ | 147 | set_fs(USER_DS); \ |
149 | regs->psw.mask = psw_user_bits; \ | 148 | regs->psw.mask = psw_user_bits; \ |
@@ -151,24 +150,6 @@ struct stack_frame { | |||
151 | regs->gprs[15] = new_stackp ; \ | 150 | regs->gprs[15] = new_stackp ; \ |
152 | } while (0) | 151 | } while (0) |
153 | 152 | ||
154 | #else /* __s390x__ */ | ||
155 | |||
156 | #define start_thread(regs, new_psw, new_stackp) do { \ | ||
157 | set_fs(USER_DS); \ | ||
158 | regs->psw.mask = psw_user_bits; \ | ||
159 | regs->psw.addr = new_psw; \ | ||
160 | regs->gprs[15] = new_stackp; \ | ||
161 | } while (0) | ||
162 | |||
163 | #define start_thread31(regs, new_psw, new_stackp) do { \ | ||
164 | set_fs(USER_DS); \ | ||
165 | regs->psw.mask = psw_user32_bits; \ | ||
166 | regs->psw.addr = new_psw; \ | ||
167 | regs->gprs[15] = new_stackp; \ | ||
168 | } while (0) | ||
169 | |||
170 | #endif /* __s390x__ */ | ||
171 | |||
172 | /* Forward declaration, a strange C thing */ | 153 | /* Forward declaration, a strange C thing */ |
173 | struct task_struct; | 154 | struct task_struct; |
174 | struct mm_struct; | 155 | struct mm_struct; |
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 9b2ddb7aac49..3d8a96d39d9d 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h | |||
@@ -109,10 +109,15 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte) | |||
109 | /* | 109 | /* |
110 | * pmd_free_tlb frees a pmd table and clears the CRSTE for the | 110 | * pmd_free_tlb frees a pmd table and clears the CRSTE for the |
111 | * segment table entry from the tlb. | 111 | * segment table entry from the tlb. |
112 | * If the mm uses a two level page table the single pmd is freed | ||
113 | * as the pgd. pmd_free_tlb checks the asce_limit against 2GB | ||
114 | * to avoid the double free of the pmd in this case. | ||
112 | */ | 115 | */ |
113 | static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 116 | static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
114 | { | 117 | { |
115 | #ifdef __s390x__ | 118 | #ifdef __s390x__ |
119 | if (tlb->mm->context.asce_limit <= (1UL << 31)) | ||
120 | return; | ||
116 | if (!tlb->fullmm) { | 121 | if (!tlb->fullmm) { |
117 | tlb->array[--tlb->nr_pxds] = pmd; | 122 | tlb->array[--tlb->nr_pxds] = pmd; |
118 | if (tlb->nr_ptes >= tlb->nr_pxds) | 123 | if (tlb->nr_ptes >= tlb->nr_pxds) |
@@ -125,10 +130,15 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | |||
125 | /* | 130 | /* |
126 | * pud_free_tlb frees a pud table and clears the CRSTE for the | 131 | * pud_free_tlb frees a pud table and clears the CRSTE for the |
127 | * region third table entry from the tlb. | 132 | * region third table entry from the tlb. |
133 | * If the mm uses a three level page table the single pud is freed | ||
134 | * as the pgd. pud_free_tlb checks the asce_limit against 4TB | ||
135 | * to avoid the double free of the pud in this case. | ||
128 | */ | 136 | */ |
129 | static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 137 | static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
130 | { | 138 | { |
131 | #ifdef __s390x__ | 139 | #ifdef __s390x__ |
140 | if (tlb->mm->context.asce_limit <= (1UL << 42)) | ||
141 | return; | ||
132 | if (!tlb->fullmm) { | 142 | if (!tlb->fullmm) { |
133 | tlb->array[--tlb->nr_pxds] = pud; | 143 | tlb->array[--tlb->nr_pxds] = pud; |
134 | if (tlb->nr_ptes >= tlb->nr_pxds) | 144 | if (tlb->nr_ptes >= tlb->nr_pxds) |