diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-02-09 12:24:37 -0500 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-02-09 12:24:41 -0500 |
commit | 6252d702c5311ce916caf75ed82e5c8245171c92 (patch) | |
tree | 3490f27b5f888ff2c1ec915d4e7201000f37a771 | |
parent | 5a216a20837c5f5fa1ca4b8ae8991ffd96b08e6f (diff) |
[S390] dynamic page tables.
Add support for different number of page table levels dependent
on the highest address used for a process. This will cause a 31 bit
process to use a two level page table instead of the four level page
table that is the default after the pud has been introduced. Likewise
a normal 64 bit process will use three levels instead of four. Only
if a process runs out of the 4 tera bytes which can be addressed with
a three level page table the fourth level is dynamically added. Then
the process can use up to 8 peta byte.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r-- | arch/s390/kernel/binfmt_elf32.c | 11 | ||||
-rw-r--r-- | arch/s390/kernel/traps.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 40 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 65 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 74 | ||||
-rw-r--r-- | include/asm-s390/elf.h | 2 | ||||
-rw-r--r-- | include/asm-s390/mmu.h | 1 | ||||
-rw-r--r-- | include/asm-s390/mmu_context.h | 8 | ||||
-rw-r--r-- | include/asm-s390/pgalloc.h | 24 | ||||
-rw-r--r-- | include/asm-s390/pgtable.h | 38 | ||||
-rw-r--r-- | include/asm-s390/processor.h | 25 | ||||
-rw-r--r-- | include/asm-s390/tlb.h | 10 |
13 files changed, 258 insertions, 48 deletions
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c index f1e40ca00d8d..3e1c315b736d 100644 --- a/arch/s390/kernel/binfmt_elf32.c +++ b/arch/s390/kernel/binfmt_elf32.c | |||
@@ -134,6 +134,7 @@ static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) | |||
134 | } | 134 | } |
135 | 135 | ||
136 | #include <asm/processor.h> | 136 | #include <asm/processor.h> |
137 | #include <asm/pgalloc.h> | ||
137 | #include <linux/module.h> | 138 | #include <linux/module.h> |
138 | #include <linux/elfcore.h> | 139 | #include <linux/elfcore.h> |
139 | #include <linux/binfmts.h> | 140 | #include <linux/binfmts.h> |
@@ -183,6 +184,16 @@ struct elf_prpsinfo32 | |||
183 | #undef start_thread | 184 | #undef start_thread |
184 | #define start_thread start_thread31 | 185 | #define start_thread start_thread31 |
185 | 186 | ||
187 | static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw, | ||
188 | unsigned long new_stackp) | ||
189 | { | ||
190 | set_fs(USER_DS); | ||
191 | regs->psw.mask = psw_user32_bits; | ||
192 | regs->psw.addr = new_psw; | ||
193 | regs->gprs[15] = new_stackp; | ||
194 | crst_table_downgrade(current->mm, 1UL << 31); | ||
195 | } | ||
196 | |||
186 | MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries," | 197 | MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries," |
187 | " Copyright 2000 IBM Corporation"); | 198 | " Copyright 2000 IBM Corporation"); |
188 | MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>"); | 199 | MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>"); |
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a4d29025ddbd..60f728aeaf12 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c | |||
@@ -60,6 +60,7 @@ int sysctl_userprocess_debug = 0; | |||
60 | extern pgm_check_handler_t do_protection_exception; | 60 | extern pgm_check_handler_t do_protection_exception; |
61 | extern pgm_check_handler_t do_dat_exception; | 61 | extern pgm_check_handler_t do_dat_exception; |
62 | extern pgm_check_handler_t do_monitor_call; | 62 | extern pgm_check_handler_t do_monitor_call; |
63 | extern pgm_check_handler_t do_asce_exception; | ||
63 | 64 | ||
64 | #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) | 65 | #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) |
65 | 66 | ||
@@ -730,7 +731,7 @@ void __init trap_init(void) | |||
730 | pgm_check_table[0x12] = &translation_exception; | 731 | pgm_check_table[0x12] = &translation_exception; |
731 | pgm_check_table[0x13] = &special_op_exception; | 732 | pgm_check_table[0x13] = &special_op_exception; |
732 | #ifdef CONFIG_64BIT | 733 | #ifdef CONFIG_64BIT |
733 | pgm_check_table[0x38] = &do_dat_exception; | 734 | pgm_check_table[0x38] = &do_asce_exception; |
734 | pgm_check_table[0x39] = &do_dat_exception; | 735 | pgm_check_table[0x39] = &do_dat_exception; |
735 | pgm_check_table[0x3A] = &do_dat_exception; | 736 | pgm_check_table[0x3A] = &do_dat_exception; |
736 | pgm_check_table[0x3B] = &do_dat_exception; | 737 | pgm_check_table[0x3B] = &do_dat_exception; |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2456b52ed068..ed13d429a487 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/system.h> | 32 | #include <asm/system.h> |
33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
34 | #include <asm/s390_ext.h> | 34 | #include <asm/s390_ext.h> |
35 | #include <asm/mmu_context.h> | ||
35 | 36 | ||
36 | #ifndef CONFIG_64BIT | 37 | #ifndef CONFIG_64BIT |
37 | #define __FAIL_ADDR_MASK 0x7ffff000 | 38 | #define __FAIL_ADDR_MASK 0x7ffff000 |
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code) | |||
444 | do_exception(regs, error_code & 0xff, 0); | 445 | do_exception(regs, error_code & 0xff, 0); |
445 | } | 446 | } |
446 | 447 | ||
448 | #ifdef CONFIG_64BIT | ||
449 | void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) | ||
450 | { | ||
451 | struct mm_struct *mm; | ||
452 | struct vm_area_struct *vma; | ||
453 | unsigned long address; | ||
454 | int space; | ||
455 | |||
456 | mm = current->mm; | ||
457 | address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; | ||
458 | space = check_space(current); | ||
459 | |||
460 | if (unlikely(space == 0 || in_atomic() || !mm)) | ||
461 | goto no_context; | ||
462 | |||
463 | local_irq_enable(); | ||
464 | |||
465 | down_read(&mm->mmap_sem); | ||
466 | vma = find_vma(mm, address); | ||
467 | up_read(&mm->mmap_sem); | ||
468 | |||
469 | if (vma) { | ||
470 | update_mm(mm, current); | ||
471 | return; | ||
472 | } | ||
473 | |||
474 | /* User mode accesses just cause a SIGSEGV */ | ||
475 | if (regs->psw.mask & PSW_MASK_PSTATE) { | ||
476 | current->thread.prot_addr = address; | ||
477 | current->thread.trap_no = error_code; | ||
478 | do_sigsegv(regs, error_code, SEGV_MAPERR, address); | ||
479 | return; | ||
480 | } | ||
481 | |||
482 | no_context: | ||
483 | do_no_context(regs, error_code, address); | ||
484 | } | ||
485 | #endif | ||
486 | |||
447 | #ifdef CONFIG_PFAULT | 487 | #ifdef CONFIG_PFAULT |
448 | /* | 488 | /* |
449 | * 'pfault' pseudo page faults routines. | 489 | * 'pfault' pseudo page faults routines. |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 248a71010700..8053245fe259 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -112,8 +112,9 @@ void __init paging_init(void) | |||
112 | init_mm.pgd = swapper_pg_dir; | 112 | init_mm.pgd = swapper_pg_dir; |
113 | S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; | 113 | S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; |
114 | #ifdef CONFIG_64BIT | 114 | #ifdef CONFIG_64BIT |
115 | S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; | 115 | /* A three level page table (4TB) is enough for the kernel space. */ |
116 | pgd_type = _REGION2_ENTRY_EMPTY; | 116 | S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; |
117 | pgd_type = _REGION3_ENTRY_EMPTY; | ||
117 | #else | 118 | #else |
118 | S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; | 119 | S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; |
119 | pgd_type = _SEGMENT_ENTRY_EMPTY; | 120 | pgd_type = _SEGMENT_ENTRY_EMPTY; |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 356257c171de..5932a824547a 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/personality.h> | 27 | #include <linux/personality.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <asm/pgalloc.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Top of mmap area (just below the process stack). | 33 | * Top of mmap area (just below the process stack). |
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void) | |||
62 | current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; | 63 | current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; |
63 | } | 64 | } |
64 | 65 | ||
66 | #ifndef CONFIG_64BIT | ||
67 | |||
65 | /* | 68 | /* |
66 | * This function, called very early during the creation of a new | 69 | * This function, called very early during the creation of a new |
67 | * process VM image, sets up which VM layout function to use: | 70 | * process VM image, sets up which VM layout function to use: |
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
84 | } | 87 | } |
85 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); | 88 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); |
86 | 89 | ||
90 | #else | ||
91 | |||
92 | static unsigned long | ||
93 | s390_get_unmapped_area(struct file *filp, unsigned long addr, | ||
94 | unsigned long len, unsigned long pgoff, unsigned long flags) | ||
95 | { | ||
96 | struct mm_struct *mm = current->mm; | ||
97 | int rc; | ||
98 | |||
99 | addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags); | ||
100 | if (addr & ~PAGE_MASK) | ||
101 | return addr; | ||
102 | if (unlikely(mm->context.asce_limit < addr + len)) { | ||
103 | rc = crst_table_upgrade(mm, addr + len); | ||
104 | if (rc) | ||
105 | return (unsigned long) rc; | ||
106 | } | ||
107 | return addr; | ||
108 | } | ||
109 | |||
110 | static unsigned long | ||
111 | s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | ||
112 | const unsigned long len, const unsigned long pgoff, | ||
113 | const unsigned long flags) | ||
114 | { | ||
115 | struct mm_struct *mm = current->mm; | ||
116 | unsigned long addr = addr0; | ||
117 | int rc; | ||
118 | |||
119 | addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); | ||
120 | if (addr & ~PAGE_MASK) | ||
121 | return addr; | ||
122 | if (unlikely(mm->context.asce_limit < addr + len)) { | ||
123 | rc = crst_table_upgrade(mm, addr + len); | ||
124 | if (rc) | ||
125 | return (unsigned long) rc; | ||
126 | } | ||
127 | return addr; | ||
128 | } | ||
129 | /* | ||
130 | * This function, called very early during the creation of a new | ||
131 | * process VM image, sets up which VM layout function to use: | ||
132 | */ | ||
133 | void arch_pick_mmap_layout(struct mm_struct *mm) | ||
134 | { | ||
135 | /* | ||
136 | * Fall back to the standard layout if the personality | ||
137 | * bit is set, or if the expected stack growth is unlimited: | ||
138 | */ | ||
139 | if (mmap_is_legacy()) { | ||
140 | mm->mmap_base = TASK_UNMAPPED_BASE; | ||
141 | mm->get_unmapped_area = s390_get_unmapped_area; | ||
142 | mm->unmap_area = arch_unmap_area; | ||
143 | } else { | ||
144 | mm->mmap_base = mmap_base(); | ||
145 | mm->get_unmapped_area = s390_get_unmapped_area_topdown; | ||
146 | mm->unmap_area = arch_unmap_area_topdown; | ||
147 | } | ||
148 | } | ||
149 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); | ||
150 | |||
151 | #endif | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 809e77893039..fd072013f88c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
24 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
25 | #include <asm/tlbflush.h> | 25 | #include <asm/tlbflush.h> |
26 | #include <asm/mmu_context.h> | ||
26 | 27 | ||
27 | #ifndef CONFIG_64BIT | 28 | #ifndef CONFIG_64BIT |
28 | #define ALLOC_ORDER 1 | 29 | #define ALLOC_ORDER 1 |
@@ -70,6 +71,79 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) | |||
70 | free_pages((unsigned long) table, ALLOC_ORDER); | 71 | free_pages((unsigned long) table, ALLOC_ORDER); |
71 | } | 72 | } |
72 | 73 | ||
74 | #ifdef CONFIG_64BIT | ||
75 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | ||
76 | { | ||
77 | unsigned long *table, *pgd; | ||
78 | unsigned long entry; | ||
79 | |||
80 | BUG_ON(limit > (1UL << 53)); | ||
81 | repeat: | ||
82 | table = crst_table_alloc(mm, mm->context.noexec); | ||
83 | if (!table) | ||
84 | return -ENOMEM; | ||
85 | spin_lock(&mm->page_table_lock); | ||
86 | if (mm->context.asce_limit < limit) { | ||
87 | pgd = (unsigned long *) mm->pgd; | ||
88 | if (mm->context.asce_limit <= (1UL << 31)) { | ||
89 | entry = _REGION3_ENTRY_EMPTY; | ||
90 | mm->context.asce_limit = 1UL << 42; | ||
91 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
92 | _ASCE_USER_BITS | | ||
93 | _ASCE_TYPE_REGION3; | ||
94 | } else { | ||
95 | entry = _REGION2_ENTRY_EMPTY; | ||
96 | mm->context.asce_limit = 1UL << 53; | ||
97 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
98 | _ASCE_USER_BITS | | ||
99 | _ASCE_TYPE_REGION2; | ||
100 | } | ||
101 | crst_table_init(table, entry); | ||
102 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | ||
103 | mm->pgd = (pgd_t *) table; | ||
104 | table = NULL; | ||
105 | } | ||
106 | spin_unlock(&mm->page_table_lock); | ||
107 | if (table) | ||
108 | crst_table_free(mm, table); | ||
109 | if (mm->context.asce_limit < limit) | ||
110 | goto repeat; | ||
111 | update_mm(mm, current); | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | ||
116 | { | ||
117 | pgd_t *pgd; | ||
118 | |||
119 | if (mm->context.asce_limit <= limit) | ||
120 | return; | ||
121 | __tlb_flush_mm(mm); | ||
122 | while (mm->context.asce_limit > limit) { | ||
123 | pgd = mm->pgd; | ||
124 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | ||
125 | case _REGION_ENTRY_TYPE_R2: | ||
126 | mm->context.asce_limit = 1UL << 42; | ||
127 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
128 | _ASCE_USER_BITS | | ||
129 | _ASCE_TYPE_REGION3; | ||
130 | break; | ||
131 | case _REGION_ENTRY_TYPE_R3: | ||
132 | mm->context.asce_limit = 1UL << 31; | ||
133 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
134 | _ASCE_USER_BITS | | ||
135 | _ASCE_TYPE_SEGMENT; | ||
136 | break; | ||
137 | default: | ||
138 | BUG(); | ||
139 | } | ||
140 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | ||
141 | crst_table_free(mm, (unsigned long *) pgd); | ||
142 | } | ||
143 | update_mm(mm, current); | ||
144 | } | ||
145 | #endif | ||
146 | |||
73 | /* | 147 | /* |
74 | * page table entry allocation/free routines. | 148 | * page table entry allocation/free routines. |
75 | */ | 149 | */ |
diff --git a/include/asm-s390/elf.h b/include/asm-s390/elf.h index b760cd4de385..b3ac262c4582 100644 --- a/include/asm-s390/elf.h +++ b/include/asm-s390/elf.h | |||
@@ -138,7 +138,7 @@ typedef s390_regs elf_gregset_t; | |||
138 | use of this is to invoke "./ld.so someprog" to test out a new version of | 138 | use of this is to invoke "./ld.so someprog" to test out a new version of |
139 | the loader. We need to make sure that it is out of the way of the program | 139 | the loader. We need to make sure that it is out of the way of the program |
140 | that it will "exec", and that there is sufficient room for the brk. */ | 140 | that it will "exec", and that there is sufficient room for the brk. */ |
141 | #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) | 141 | #define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) |
142 | 142 | ||
143 | /* Wow, the "main" arch needs arch dependent functions too.. :) */ | 143 | /* Wow, the "main" arch needs arch dependent functions too.. :) */ |
144 | 144 | ||
diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h index 13ec4215f437..1698e29c5b20 100644 --- a/include/asm-s390/mmu.h +++ b/include/asm-s390/mmu.h | |||
@@ -5,6 +5,7 @@ typedef struct { | |||
5 | struct list_head crst_list; | 5 | struct list_head crst_list; |
6 | struct list_head pgtable_list; | 6 | struct list_head pgtable_list; |
7 | unsigned long asce_bits; | 7 | unsigned long asce_bits; |
8 | unsigned long asce_limit; | ||
8 | int noexec; | 9 | int noexec; |
9 | } mm_context_t; | 10 | } mm_context_t; |
10 | 11 | ||
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index b3ea3e199921..b5a34c6f91a9 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h | |||
@@ -18,9 +18,11 @@ static inline int init_new_context(struct task_struct *tsk, | |||
18 | { | 18 | { |
19 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; | 19 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; |
20 | #ifdef CONFIG_64BIT | 20 | #ifdef CONFIG_64BIT |
21 | mm->context.asce_bits |= _ASCE_TYPE_REGION2; | 21 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; |
22 | #endif | 22 | #endif |
23 | mm->context.noexec = s390_noexec; | 23 | mm->context.noexec = s390_noexec; |
24 | mm->context.asce_limit = STACK_TOP_MAX; | ||
25 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | ||
24 | return 0; | 26 | return 0; |
25 | } | 27 | } |
26 | 28 | ||
@@ -47,13 +49,12 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) | |||
47 | /* Load home space page table origin. */ | 49 | /* Load home space page table origin. */ |
48 | asm volatile(LCTL_OPCODE" 13,13,%0" | 50 | asm volatile(LCTL_OPCODE" 13,13,%0" |
49 | : : "m" (S390_lowcore.user_asce) ); | 51 | : : "m" (S390_lowcore.user_asce) ); |
52 | set_fs(current->thread.mm_segment); | ||
50 | } | 53 | } |
51 | 54 | ||
52 | static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | 55 | static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, |
53 | struct task_struct *tsk) | 56 | struct task_struct *tsk) |
54 | { | 57 | { |
55 | if (unlikely(prev == next)) | ||
56 | return; | ||
57 | cpu_set(smp_processor_id(), next->cpu_vm_mask); | 58 | cpu_set(smp_processor_id(), next->cpu_vm_mask); |
58 | update_mm(next, tsk); | 59 | update_mm(next, tsk); |
59 | } | 60 | } |
@@ -65,7 +66,6 @@ static inline void activate_mm(struct mm_struct *prev, | |||
65 | struct mm_struct *next) | 66 | struct mm_struct *next) |
66 | { | 67 | { |
67 | switch_mm(prev, next, current); | 68 | switch_mm(prev, next, current); |
68 | set_fs(current->thread.mm_segment); | ||
69 | } | 69 | } |
70 | 70 | ||
71 | #endif /* __S390_MMU_CONTEXT_H */ | 71 | #endif /* __S390_MMU_CONTEXT_H */ |
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index cc47dd65a499..f5b2bf3d7c1d 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h | |||
@@ -73,9 +73,16 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) | |||
73 | 73 | ||
74 | static inline unsigned long pgd_entry_type(struct mm_struct *mm) | 74 | static inline unsigned long pgd_entry_type(struct mm_struct *mm) |
75 | { | 75 | { |
76 | if (mm->context.asce_limit <= (1UL << 31)) | ||
77 | return _SEGMENT_ENTRY_EMPTY; | ||
78 | if (mm->context.asce_limit <= (1UL << 42)) | ||
79 | return _REGION3_ENTRY_EMPTY; | ||
76 | return _REGION2_ENTRY_EMPTY; | 80 | return _REGION2_ENTRY_EMPTY; |
77 | } | 81 | } |
78 | 82 | ||
83 | int crst_table_upgrade(struct mm_struct *, unsigned long limit); | ||
84 | void crst_table_downgrade(struct mm_struct *, unsigned long limit); | ||
85 | |||
79 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) | 86 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) |
80 | { | 87 | { |
81 | unsigned long *table = crst_table_alloc(mm, mm->context.noexec); | 88 | unsigned long *table = crst_table_alloc(mm, mm->context.noexec); |
@@ -102,12 +109,12 @@ static inline void pgd_populate_kernel(struct mm_struct *mm, | |||
102 | 109 | ||
103 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | 110 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) |
104 | { | 111 | { |
105 | pgd_t *shadow_pgd = get_shadow_table(pgd); | ||
106 | pud_t *shadow_pud = get_shadow_table(pud); | ||
107 | |||
108 | if (shadow_pgd && shadow_pud) | ||
109 | pgd_populate_kernel(mm, shadow_pgd, shadow_pud); | ||
110 | pgd_populate_kernel(mm, pgd, pud); | 112 | pgd_populate_kernel(mm, pgd, pud); |
113 | if (mm->context.noexec) { | ||
114 | pgd = get_shadow_table(pgd); | ||
115 | pud = get_shadow_table(pud); | ||
116 | pgd_populate_kernel(mm, pgd, pud); | ||
117 | } | ||
111 | } | 118 | } |
112 | 119 | ||
113 | static inline void pud_populate_kernel(struct mm_struct *mm, | 120 | static inline void pud_populate_kernel(struct mm_struct *mm, |
@@ -130,14 +137,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
130 | 137 | ||
131 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) | 138 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) |
132 | { | 139 | { |
133 | unsigned long *crst; | ||
134 | |||
135 | INIT_LIST_HEAD(&mm->context.crst_list); | 140 | INIT_LIST_HEAD(&mm->context.crst_list); |
136 | INIT_LIST_HEAD(&mm->context.pgtable_list); | 141 | INIT_LIST_HEAD(&mm->context.pgtable_list); |
137 | crst = crst_table_alloc(mm, s390_noexec); | 142 | return (pgd_t *) crst_table_alloc(mm, s390_noexec); |
138 | if (crst) | ||
139 | crst_table_init(crst, pgd_entry_type(mm)); | ||
140 | return (pgd_t *) crst; | ||
141 | } | 143 | } |
142 | #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) | 144 | #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) |
143 | 145 | ||
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 8f473a718111..65154dc9a9e5 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h | |||
@@ -421,36 +421,54 @@ static inline int pud_bad(pud_t pud) { return 0; } | |||
421 | 421 | ||
422 | static inline int pgd_present(pgd_t pgd) | 422 | static inline int pgd_present(pgd_t pgd) |
423 | { | 423 | { |
424 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||
425 | return 1; | ||
424 | return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; | 426 | return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; |
425 | } | 427 | } |
426 | 428 | ||
427 | static inline int pgd_none(pgd_t pgd) | 429 | static inline int pgd_none(pgd_t pgd) |
428 | { | 430 | { |
431 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||
432 | return 0; | ||
429 | return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; | 433 | return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; |
430 | } | 434 | } |
431 | 435 | ||
432 | static inline int pgd_bad(pgd_t pgd) | 436 | static inline int pgd_bad(pgd_t pgd) |
433 | { | 437 | { |
438 | /* | ||
439 | * With dynamic page table levels the pgd can be a region table | ||
440 | * entry or a segment table entry. Check for the bit that are | ||
441 | * invalid for either table entry. | ||
442 | */ | ||
434 | unsigned long mask = | 443 | unsigned long mask = |
435 | ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & | 444 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & |
436 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; | 445 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; |
437 | return (pgd_val(pgd) & mask) != 0; | 446 | return (pgd_val(pgd) & mask) != 0; |
438 | } | 447 | } |
439 | 448 | ||
440 | static inline int pud_present(pud_t pud) | 449 | static inline int pud_present(pud_t pud) |
441 | { | 450 | { |
451 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | ||
452 | return 1; | ||
442 | return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; | 453 | return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; |
443 | } | 454 | } |
444 | 455 | ||
445 | static inline int pud_none(pud_t pud) | 456 | static inline int pud_none(pud_t pud) |
446 | { | 457 | { |
458 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | ||
459 | return 0; | ||
447 | return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; | 460 | return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; |
448 | } | 461 | } |
449 | 462 | ||
450 | static inline int pud_bad(pud_t pud) | 463 | static inline int pud_bad(pud_t pud) |
451 | { | 464 | { |
465 | /* | ||
466 | * With dynamic page table levels the pud can be a region table | ||
467 | * entry or a segment table entry. Check for the bit that are | ||
468 | * invalid for either table entry. | ||
469 | */ | ||
452 | unsigned long mask = | 470 | unsigned long mask = |
453 | ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & | 471 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & |
454 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; | 472 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; |
455 | return (pud_val(pud) & mask) != 0; | 473 | return (pud_val(pud) & mask) != 0; |
456 | } | 474 | } |
@@ -535,7 +553,8 @@ static inline int pte_young(pte_t pte) | |||
535 | 553 | ||
536 | static inline void pgd_clear_kernel(pgd_t * pgd) | 554 | static inline void pgd_clear_kernel(pgd_t * pgd) |
537 | { | 555 | { |
538 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | 556 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) |
557 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | ||
539 | } | 558 | } |
540 | 559 | ||
541 | static inline void pgd_clear(pgd_t * pgd) | 560 | static inline void pgd_clear(pgd_t * pgd) |
@@ -549,10 +568,11 @@ static inline void pgd_clear(pgd_t * pgd) | |||
549 | 568 | ||
550 | static inline void pud_clear_kernel(pud_t *pud) | 569 | static inline void pud_clear_kernel(pud_t *pud) |
551 | { | 570 | { |
552 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; | 571 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
572 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; | ||
553 | } | 573 | } |
554 | 574 | ||
555 | static inline void pud_clear(pud_t * pud) | 575 | static inline void pud_clear(pud_t *pud) |
556 | { | 576 | { |
557 | pud_t *shadow = get_shadow_table(pud); | 577 | pud_t *shadow = get_shadow_table(pud); |
558 | 578 | ||
@@ -841,13 +861,17 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) | |||
841 | 861 | ||
842 | static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) | 862 | static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) |
843 | { | 863 | { |
844 | pud_t *pud = (pud_t *) pgd_deref(*pgd); | 864 | pud_t *pud = (pud_t *) pgd; |
865 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||
866 | pud = (pud_t *) pgd_deref(*pgd); | ||
845 | return pud + pud_index(address); | 867 | return pud + pud_index(address); |
846 | } | 868 | } |
847 | 869 | ||
848 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | 870 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) |
849 | { | 871 | { |
850 | pmd_t *pmd = (pmd_t *) pud_deref(*pud); | 872 | pmd_t *pmd = (pmd_t *) pud; |
873 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | ||
874 | pmd = (pmd_t *) pud_deref(*pud); | ||
851 | return pmd + pmd_index(address); | 875 | return pmd + pmd_index(address); |
852 | } | 876 | } |
853 | 877 | ||
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index 5a21f457d583..51d88912aa20 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h | |||
@@ -81,11 +81,12 @@ extern int get_cpu_capability(unsigned int *); | |||
81 | 81 | ||
82 | #ifndef __s390x__ | 82 | #ifndef __s390x__ |
83 | #define STACK_TOP (1UL << 31) | 83 | #define STACK_TOP (1UL << 31) |
84 | #define STACK_TOP_MAX (1UL << 31) | ||
84 | #else /* __s390x__ */ | 85 | #else /* __s390x__ */ |
85 | #define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:53)) | 86 | #define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) |
87 | #define STACK_TOP_MAX (1UL << 42) | ||
86 | #endif /* __s390x__ */ | 88 | #endif /* __s390x__ */ |
87 | 89 | ||
88 | #define STACK_TOP_MAX STACK_TOP | ||
89 | 90 | ||
90 | #endif | 91 | #endif |
91 | 92 | ||
@@ -142,8 +143,6 @@ struct stack_frame { | |||
142 | /* | 143 | /* |
143 | * Do necessary setup to start up a new thread. | 144 | * Do necessary setup to start up a new thread. |
144 | */ | 145 | */ |
145 | #ifndef __s390x__ | ||
146 | |||
147 | #define start_thread(regs, new_psw, new_stackp) do { \ | 146 | #define start_thread(regs, new_psw, new_stackp) do { \ |
148 | set_fs(USER_DS); \ | 147 | set_fs(USER_DS); \ |
149 | regs->psw.mask = psw_user_bits; \ | 148 | regs->psw.mask = psw_user_bits; \ |
@@ -151,24 +150,6 @@ struct stack_frame { | |||
151 | regs->gprs[15] = new_stackp ; \ | 150 | regs->gprs[15] = new_stackp ; \ |
152 | } while (0) | 151 | } while (0) |
153 | 152 | ||
154 | #else /* __s390x__ */ | ||
155 | |||
156 | #define start_thread(regs, new_psw, new_stackp) do { \ | ||
157 | set_fs(USER_DS); \ | ||
158 | regs->psw.mask = psw_user_bits; \ | ||
159 | regs->psw.addr = new_psw; \ | ||
160 | regs->gprs[15] = new_stackp; \ | ||
161 | } while (0) | ||
162 | |||
163 | #define start_thread31(regs, new_psw, new_stackp) do { \ | ||
164 | set_fs(USER_DS); \ | ||
165 | regs->psw.mask = psw_user32_bits; \ | ||
166 | regs->psw.addr = new_psw; \ | ||
167 | regs->gprs[15] = new_stackp; \ | ||
168 | } while (0) | ||
169 | |||
170 | #endif /* __s390x__ */ | ||
171 | |||
172 | /* Forward declaration, a strange C thing */ | 153 | /* Forward declaration, a strange C thing */ |
173 | struct task_struct; | 154 | struct task_struct; |
174 | struct mm_struct; | 155 | struct mm_struct; |
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 9b2ddb7aac49..3d8a96d39d9d 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h | |||
@@ -109,10 +109,15 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte) | |||
109 | /* | 109 | /* |
110 | * pmd_free_tlb frees a pmd table and clears the CRSTE for the | 110 | * pmd_free_tlb frees a pmd table and clears the CRSTE for the |
111 | * segment table entry from the tlb. | 111 | * segment table entry from the tlb. |
112 | * If the mm uses a two level page table the single pmd is freed | ||
113 | * as the pgd. pmd_free_tlb checks the asce_limit against 2GB | ||
114 | * to avoid the double free of the pmd in this case. | ||
112 | */ | 115 | */ |
113 | static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 116 | static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
114 | { | 117 | { |
115 | #ifdef __s390x__ | 118 | #ifdef __s390x__ |
119 | if (tlb->mm->context.asce_limit <= (1UL << 31)) | ||
120 | return; | ||
116 | if (!tlb->fullmm) { | 121 | if (!tlb->fullmm) { |
117 | tlb->array[--tlb->nr_pxds] = pmd; | 122 | tlb->array[--tlb->nr_pxds] = pmd; |
118 | if (tlb->nr_ptes >= tlb->nr_pxds) | 123 | if (tlb->nr_ptes >= tlb->nr_pxds) |
@@ -125,10 +130,15 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | |||
125 | /* | 130 | /* |
126 | * pud_free_tlb frees a pud table and clears the CRSTE for the | 131 | * pud_free_tlb frees a pud table and clears the CRSTE for the |
127 | * region third table entry from the tlb. | 132 | * region third table entry from the tlb. |
133 | * If the mm uses a three level page table the single pud is freed | ||
134 | * as the pgd. pud_free_tlb checks the asce_limit against 4TB | ||
135 | * to avoid the double free of the pud in this case. | ||
128 | */ | 136 | */ |
129 | static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 137 | static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
130 | { | 138 | { |
131 | #ifdef __s390x__ | 139 | #ifdef __s390x__ |
140 | if (tlb->mm->context.asce_limit <= (1UL << 42)) | ||
141 | return; | ||
132 | if (!tlb->fullmm) { | 142 | if (!tlb->fullmm) { |
133 | tlb->array[--tlb->nr_pxds] = pud; | 143 | tlb->array[--tlb->nr_pxds] = pud; |
134 | if (tlb->nr_ptes >= tlb->nr_pxds) | 144 | if (tlb->nr_ptes >= tlb->nr_pxds) |