aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2008-02-09 12:24:37 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-02-09 12:24:41 -0500
commit6252d702c5311ce916caf75ed82e5c8245171c92 (patch)
tree3490f27b5f888ff2c1ec915d4e7201000f37a771
parent5a216a20837c5f5fa1ca4b8ae8991ffd96b08e6f (diff)
[S390] dynamic page tables.
Add support for different number of page table levels dependent on the highest address used for a process. This will cause a 31 bit process to use a two level page table instead of the four level page table that is the default after the pud has been introduced. Likewise a normal 64 bit process will use three levels instead of four. Only if a process runs out of the 4 tera bytes which can be addressed with a three level page table the fourth level is dynamically added. Then the process can use up to 8 peta byte. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/kernel/binfmt_elf32.c11
-rw-r--r--arch/s390/kernel/traps.c3
-rw-r--r--arch/s390/mm/fault.c40
-rw-r--r--arch/s390/mm/init.c5
-rw-r--r--arch/s390/mm/mmap.c65
-rw-r--r--arch/s390/mm/pgtable.c74
-rw-r--r--include/asm-s390/elf.h2
-rw-r--r--include/asm-s390/mmu.h1
-rw-r--r--include/asm-s390/mmu_context.h8
-rw-r--r--include/asm-s390/pgalloc.h24
-rw-r--r--include/asm-s390/pgtable.h38
-rw-r--r--include/asm-s390/processor.h25
-rw-r--r--include/asm-s390/tlb.h10
13 files changed, 258 insertions, 48 deletions
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
index f1e40ca00d8d..3e1c315b736d 100644
--- a/arch/s390/kernel/binfmt_elf32.c
+++ b/arch/s390/kernel/binfmt_elf32.c
@@ -134,6 +134,7 @@ static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
134} 134}
135 135
136#include <asm/processor.h> 136#include <asm/processor.h>
137#include <asm/pgalloc.h>
137#include <linux/module.h> 138#include <linux/module.h>
138#include <linux/elfcore.h> 139#include <linux/elfcore.h>
139#include <linux/binfmts.h> 140#include <linux/binfmts.h>
@@ -183,6 +184,16 @@ struct elf_prpsinfo32
183#undef start_thread 184#undef start_thread
184#define start_thread start_thread31 185#define start_thread start_thread31
185 186
187static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
188 unsigned long new_stackp)
189{
190 set_fs(USER_DS);
191 regs->psw.mask = psw_user32_bits;
192 regs->psw.addr = new_psw;
193 regs->gprs[15] = new_stackp;
194 crst_table_downgrade(current->mm, 1UL << 31);
195}
196
186MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries," 197MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
187 " Copyright 2000 IBM Corporation"); 198 " Copyright 2000 IBM Corporation");
188MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>"); 199MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index a4d29025ddbd..60f728aeaf12 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -60,6 +60,7 @@ int sysctl_userprocess_debug = 0;
60extern pgm_check_handler_t do_protection_exception; 60extern pgm_check_handler_t do_protection_exception;
61extern pgm_check_handler_t do_dat_exception; 61extern pgm_check_handler_t do_dat_exception;
62extern pgm_check_handler_t do_monitor_call; 62extern pgm_check_handler_t do_monitor_call;
63extern pgm_check_handler_t do_asce_exception;
63 64
64#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) 65#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
65 66
@@ -730,7 +731,7 @@ void __init trap_init(void)
730 pgm_check_table[0x12] = &translation_exception; 731 pgm_check_table[0x12] = &translation_exception;
731 pgm_check_table[0x13] = &special_op_exception; 732 pgm_check_table[0x13] = &special_op_exception;
732#ifdef CONFIG_64BIT 733#ifdef CONFIG_64BIT
733 pgm_check_table[0x38] = &do_dat_exception; 734 pgm_check_table[0x38] = &do_asce_exception;
734 pgm_check_table[0x39] = &do_dat_exception; 735 pgm_check_table[0x39] = &do_dat_exception;
735 pgm_check_table[0x3A] = &do_dat_exception; 736 pgm_check_table[0x3A] = &do_dat_exception;
736 pgm_check_table[0x3B] = &do_dat_exception; 737 pgm_check_table[0x3B] = &do_dat_exception;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2456b52ed068..ed13d429a487 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
32#include <asm/system.h> 32#include <asm/system.h>
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34#include <asm/s390_ext.h> 34#include <asm/s390_ext.h>
35#include <asm/mmu_context.h>
35 36
36#ifndef CONFIG_64BIT 37#ifndef CONFIG_64BIT
37#define __FAIL_ADDR_MASK 0x7ffff000 38#define __FAIL_ADDR_MASK 0x7ffff000
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
444 do_exception(regs, error_code & 0xff, 0); 445 do_exception(regs, error_code & 0xff, 0);
445} 446}
446 447
448#ifdef CONFIG_64BIT
449void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
450{
451 struct mm_struct *mm;
452 struct vm_area_struct *vma;
453 unsigned long address;
454 int space;
455
456 mm = current->mm;
457 address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
458 space = check_space(current);
459
460 if (unlikely(space == 0 || in_atomic() || !mm))
461 goto no_context;
462
463 local_irq_enable();
464
465 down_read(&mm->mmap_sem);
466 vma = find_vma(mm, address);
467 up_read(&mm->mmap_sem);
468
469 if (vma) {
470 update_mm(mm, current);
471 return;
472 }
473
474 /* User mode accesses just cause a SIGSEGV */
475 if (regs->psw.mask & PSW_MASK_PSTATE) {
476 current->thread.prot_addr = address;
477 current->thread.trap_no = error_code;
478 do_sigsegv(regs, error_code, SEGV_MAPERR, address);
479 return;
480 }
481
482no_context:
483 do_no_context(regs, error_code, address);
484}
485#endif
486
447#ifdef CONFIG_PFAULT 487#ifdef CONFIG_PFAULT
448/* 488/*
449 * 'pfault' pseudo page faults routines. 489 * 'pfault' pseudo page faults routines.
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 248a71010700..8053245fe259 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -112,8 +112,9 @@ void __init paging_init(void)
112 init_mm.pgd = swapper_pg_dir; 112 init_mm.pgd = swapper_pg_dir;
113 S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; 113 S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
114#ifdef CONFIG_64BIT 114#ifdef CONFIG_64BIT
115 S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 115 /* A three level page table (4TB) is enough for the kernel space. */
116 pgd_type = _REGION2_ENTRY_EMPTY; 116 S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
117 pgd_type = _REGION3_ENTRY_EMPTY;
117#else 118#else
118 S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; 119 S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
119 pgd_type = _SEGMENT_ENTRY_EMPTY; 120 pgd_type = _SEGMENT_ENTRY_EMPTY;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 356257c171de..5932a824547a 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
27#include <linux/personality.h> 27#include <linux/personality.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <asm/pgalloc.h>
30 31
31/* 32/*
32 * Top of mmap area (just below the process stack). 33 * Top of mmap area (just below the process stack).
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void)
62 current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; 63 current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
63} 64}
64 65
66#ifndef CONFIG_64BIT
67
65/* 68/*
66 * This function, called very early during the creation of a new 69 * This function, called very early during the creation of a new
67 * process VM image, sets up which VM layout function to use: 70 * process VM image, sets up which VM layout function to use:
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
84} 87}
85EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); 88EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
86 89
90#else
91
92static unsigned long
93s390_get_unmapped_area(struct file *filp, unsigned long addr,
94 unsigned long len, unsigned long pgoff, unsigned long flags)
95{
96 struct mm_struct *mm = current->mm;
97 int rc;
98
99 addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
100 if (addr & ~PAGE_MASK)
101 return addr;
102 if (unlikely(mm->context.asce_limit < addr + len)) {
103 rc = crst_table_upgrade(mm, addr + len);
104 if (rc)
105 return (unsigned long) rc;
106 }
107 return addr;
108}
109
110static unsigned long
111s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
112 const unsigned long len, const unsigned long pgoff,
113 const unsigned long flags)
114{
115 struct mm_struct *mm = current->mm;
116 unsigned long addr = addr0;
117 int rc;
118
119 addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
120 if (addr & ~PAGE_MASK)
121 return addr;
122 if (unlikely(mm->context.asce_limit < addr + len)) {
123 rc = crst_table_upgrade(mm, addr + len);
124 if (rc)
125 return (unsigned long) rc;
126 }
127 return addr;
128}
129/*
130 * This function, called very early during the creation of a new
131 * process VM image, sets up which VM layout function to use:
132 */
133void arch_pick_mmap_layout(struct mm_struct *mm)
134{
135 /*
136 * Fall back to the standard layout if the personality
137 * bit is set, or if the expected stack growth is unlimited:
138 */
139 if (mmap_is_legacy()) {
140 mm->mmap_base = TASK_UNMAPPED_BASE;
141 mm->get_unmapped_area = s390_get_unmapped_area;
142 mm->unmap_area = arch_unmap_area;
143 } else {
144 mm->mmap_base = mmap_base();
145 mm->get_unmapped_area = s390_get_unmapped_area_topdown;
146 mm->unmap_area = arch_unmap_area_topdown;
147 }
148}
149EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
150
151#endif
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 809e77893039..fd072013f88c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
23#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/tlb.h> 24#include <asm/tlb.h>
25#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
26#include <asm/mmu_context.h>
26 27
27#ifndef CONFIG_64BIT 28#ifndef CONFIG_64BIT
28#define ALLOC_ORDER 1 29#define ALLOC_ORDER 1
@@ -70,6 +71,79 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
70 free_pages((unsigned long) table, ALLOC_ORDER); 71 free_pages((unsigned long) table, ALLOC_ORDER);
71} 72}
72 73
74#ifdef CONFIG_64BIT
75int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
76{
77 unsigned long *table, *pgd;
78 unsigned long entry;
79
80 BUG_ON(limit > (1UL << 53));
81repeat:
82 table = crst_table_alloc(mm, mm->context.noexec);
83 if (!table)
84 return -ENOMEM;
85 spin_lock(&mm->page_table_lock);
86 if (mm->context.asce_limit < limit) {
87 pgd = (unsigned long *) mm->pgd;
88 if (mm->context.asce_limit <= (1UL << 31)) {
89 entry = _REGION3_ENTRY_EMPTY;
90 mm->context.asce_limit = 1UL << 42;
91 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
92 _ASCE_USER_BITS |
93 _ASCE_TYPE_REGION3;
94 } else {
95 entry = _REGION2_ENTRY_EMPTY;
96 mm->context.asce_limit = 1UL << 53;
97 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
98 _ASCE_USER_BITS |
99 _ASCE_TYPE_REGION2;
100 }
101 crst_table_init(table, entry);
102 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
103 mm->pgd = (pgd_t *) table;
104 table = NULL;
105 }
106 spin_unlock(&mm->page_table_lock);
107 if (table)
108 crst_table_free(mm, table);
109 if (mm->context.asce_limit < limit)
110 goto repeat;
111 update_mm(mm, current);
112 return 0;
113}
114
115void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
116{
117 pgd_t *pgd;
118
119 if (mm->context.asce_limit <= limit)
120 return;
121 __tlb_flush_mm(mm);
122 while (mm->context.asce_limit > limit) {
123 pgd = mm->pgd;
124 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
125 case _REGION_ENTRY_TYPE_R2:
126 mm->context.asce_limit = 1UL << 42;
127 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128 _ASCE_USER_BITS |
129 _ASCE_TYPE_REGION3;
130 break;
131 case _REGION_ENTRY_TYPE_R3:
132 mm->context.asce_limit = 1UL << 31;
133 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
134 _ASCE_USER_BITS |
135 _ASCE_TYPE_SEGMENT;
136 break;
137 default:
138 BUG();
139 }
140 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
141 crst_table_free(mm, (unsigned long *) pgd);
142 }
143 update_mm(mm, current);
144}
145#endif
146
73/* 147/*
74 * page table entry allocation/free routines. 148 * page table entry allocation/free routines.
75 */ 149 */
diff --git a/include/asm-s390/elf.h b/include/asm-s390/elf.h
index b760cd4de385..b3ac262c4582 100644
--- a/include/asm-s390/elf.h
+++ b/include/asm-s390/elf.h
@@ -138,7 +138,7 @@ typedef s390_regs elf_gregset_t;
138 use of this is to invoke "./ld.so someprog" to test out a new version of 138 use of this is to invoke "./ld.so someprog" to test out a new version of
139 the loader. We need to make sure that it is out of the way of the program 139 the loader. We need to make sure that it is out of the way of the program
140 that it will "exec", and that there is sufficient room for the brk. */ 140 that it will "exec", and that there is sufficient room for the brk. */
141#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) 141#define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2)
142 142
143/* Wow, the "main" arch needs arch dependent functions too.. :) */ 143/* Wow, the "main" arch needs arch dependent functions too.. :) */
144 144
diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h
index 13ec4215f437..1698e29c5b20 100644
--- a/include/asm-s390/mmu.h
+++ b/include/asm-s390/mmu.h
@@ -5,6 +5,7 @@ typedef struct {
5 struct list_head crst_list; 5 struct list_head crst_list;
6 struct list_head pgtable_list; 6 struct list_head pgtable_list;
7 unsigned long asce_bits; 7 unsigned long asce_bits;
8 unsigned long asce_limit;
8 int noexec; 9 int noexec;
9} mm_context_t; 10} mm_context_t;
10 11
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h
index b3ea3e199921..b5a34c6f91a9 100644
--- a/include/asm-s390/mmu_context.h
+++ b/include/asm-s390/mmu_context.h
@@ -18,9 +18,11 @@ static inline int init_new_context(struct task_struct *tsk,
18{ 18{
19 mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; 19 mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
20#ifdef CONFIG_64BIT 20#ifdef CONFIG_64BIT
21 mm->context.asce_bits |= _ASCE_TYPE_REGION2; 21 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
22#endif 22#endif
23 mm->context.noexec = s390_noexec; 23 mm->context.noexec = s390_noexec;
24 mm->context.asce_limit = STACK_TOP_MAX;
25 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
24 return 0; 26 return 0;
25} 27}
26 28
@@ -47,13 +49,12 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
47 /* Load home space page table origin. */ 49 /* Load home space page table origin. */
48 asm volatile(LCTL_OPCODE" 13,13,%0" 50 asm volatile(LCTL_OPCODE" 13,13,%0"
49 : : "m" (S390_lowcore.user_asce) ); 51 : : "m" (S390_lowcore.user_asce) );
52 set_fs(current->thread.mm_segment);
50} 53}
51 54
52static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 55static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
53 struct task_struct *tsk) 56 struct task_struct *tsk)
54{ 57{
55 if (unlikely(prev == next))
56 return;
57 cpu_set(smp_processor_id(), next->cpu_vm_mask); 58 cpu_set(smp_processor_id(), next->cpu_vm_mask);
58 update_mm(next, tsk); 59 update_mm(next, tsk);
59} 60}
@@ -65,7 +66,6 @@ static inline void activate_mm(struct mm_struct *prev,
65 struct mm_struct *next) 66 struct mm_struct *next)
66{ 67{
67 switch_mm(prev, next, current); 68 switch_mm(prev, next, current);
68 set_fs(current->thread.mm_segment);
69} 69}
70 70
71#endif /* __S390_MMU_CONTEXT_H */ 71#endif /* __S390_MMU_CONTEXT_H */
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index cc47dd65a499..f5b2bf3d7c1d 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -73,9 +73,16 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
73 73
74static inline unsigned long pgd_entry_type(struct mm_struct *mm) 74static inline unsigned long pgd_entry_type(struct mm_struct *mm)
75{ 75{
76 if (mm->context.asce_limit <= (1UL << 31))
77 return _SEGMENT_ENTRY_EMPTY;
78 if (mm->context.asce_limit <= (1UL << 42))
79 return _REGION3_ENTRY_EMPTY;
76 return _REGION2_ENTRY_EMPTY; 80 return _REGION2_ENTRY_EMPTY;
77} 81}
78 82
83int crst_table_upgrade(struct mm_struct *, unsigned long limit);
84void crst_table_downgrade(struct mm_struct *, unsigned long limit);
85
79static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) 86static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
80{ 87{
81 unsigned long *table = crst_table_alloc(mm, mm->context.noexec); 88 unsigned long *table = crst_table_alloc(mm, mm->context.noexec);
@@ -102,12 +109,12 @@ static inline void pgd_populate_kernel(struct mm_struct *mm,
102 109
103static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 110static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
104{ 111{
105 pgd_t *shadow_pgd = get_shadow_table(pgd);
106 pud_t *shadow_pud = get_shadow_table(pud);
107
108 if (shadow_pgd && shadow_pud)
109 pgd_populate_kernel(mm, shadow_pgd, shadow_pud);
110 pgd_populate_kernel(mm, pgd, pud); 112 pgd_populate_kernel(mm, pgd, pud);
113 if (mm->context.noexec) {
114 pgd = get_shadow_table(pgd);
115 pud = get_shadow_table(pud);
116 pgd_populate_kernel(mm, pgd, pud);
117 }
111} 118}
112 119
113static inline void pud_populate_kernel(struct mm_struct *mm, 120static inline void pud_populate_kernel(struct mm_struct *mm,
@@ -130,14 +137,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
130 137
131static inline pgd_t *pgd_alloc(struct mm_struct *mm) 138static inline pgd_t *pgd_alloc(struct mm_struct *mm)
132{ 139{
133 unsigned long *crst;
134
135 INIT_LIST_HEAD(&mm->context.crst_list); 140 INIT_LIST_HEAD(&mm->context.crst_list);
136 INIT_LIST_HEAD(&mm->context.pgtable_list); 141 INIT_LIST_HEAD(&mm->context.pgtable_list);
137 crst = crst_table_alloc(mm, s390_noexec); 142 return (pgd_t *) crst_table_alloc(mm, s390_noexec);
138 if (crst)
139 crst_table_init(crst, pgd_entry_type(mm));
140 return (pgd_t *) crst;
141} 143}
142#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) 144#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
143 145
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 8f473a718111..65154dc9a9e5 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -421,36 +421,54 @@ static inline int pud_bad(pud_t pud) { return 0; }
421 421
422static inline int pgd_present(pgd_t pgd) 422static inline int pgd_present(pgd_t pgd)
423{ 423{
424 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
425 return 1;
424 return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; 426 return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
425} 427}
426 428
427static inline int pgd_none(pgd_t pgd) 429static inline int pgd_none(pgd_t pgd)
428{ 430{
431 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
432 return 0;
429 return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; 433 return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL;
430} 434}
431 435
432static inline int pgd_bad(pgd_t pgd) 436static inline int pgd_bad(pgd_t pgd)
433{ 437{
438 /*
439 * With dynamic page table levels the pgd can be a region table
440 * entry or a segment table entry. Check for the bit that are
441 * invalid for either table entry.
442 */
434 unsigned long mask = 443 unsigned long mask =
435 ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & 444 ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
436 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; 445 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
437 return (pgd_val(pgd) & mask) != 0; 446 return (pgd_val(pgd) & mask) != 0;
438} 447}
439 448
440static inline int pud_present(pud_t pud) 449static inline int pud_present(pud_t pud)
441{ 450{
451 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
452 return 1;
442 return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; 453 return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
443} 454}
444 455
445static inline int pud_none(pud_t pud) 456static inline int pud_none(pud_t pud)
446{ 457{
458 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
459 return 0;
447 return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; 460 return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL;
448} 461}
449 462
450static inline int pud_bad(pud_t pud) 463static inline int pud_bad(pud_t pud)
451{ 464{
465 /*
466 * With dynamic page table levels the pud can be a region table
467 * entry or a segment table entry. Check for the bit that are
468 * invalid for either table entry.
469 */
452 unsigned long mask = 470 unsigned long mask =
453 ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & 471 ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
454 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; 472 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
455 return (pud_val(pud) & mask) != 0; 473 return (pud_val(pud) & mask) != 0;
456} 474}
@@ -535,7 +553,8 @@ static inline int pte_young(pte_t pte)
535 553
536static inline void pgd_clear_kernel(pgd_t * pgd) 554static inline void pgd_clear_kernel(pgd_t * pgd)
537{ 555{
538 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; 556 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
557 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
539} 558}
540 559
541static inline void pgd_clear(pgd_t * pgd) 560static inline void pgd_clear(pgd_t * pgd)
@@ -549,10 +568,11 @@ static inline void pgd_clear(pgd_t * pgd)
549 568
550static inline void pud_clear_kernel(pud_t *pud) 569static inline void pud_clear_kernel(pud_t *pud)
551{ 570{
552 pud_val(*pud) = _REGION3_ENTRY_EMPTY; 571 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
572 pud_val(*pud) = _REGION3_ENTRY_EMPTY;
553} 573}
554 574
555static inline void pud_clear(pud_t * pud) 575static inline void pud_clear(pud_t *pud)
556{ 576{
557 pud_t *shadow = get_shadow_table(pud); 577 pud_t *shadow = get_shadow_table(pud);
558 578
@@ -841,13 +861,17 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
841 861
842static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) 862static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
843{ 863{
844 pud_t *pud = (pud_t *) pgd_deref(*pgd); 864 pud_t *pud = (pud_t *) pgd;
865 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
866 pud = (pud_t *) pgd_deref(*pgd);
845 return pud + pud_index(address); 867 return pud + pud_index(address);
846} 868}
847 869
848static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) 870static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
849{ 871{
850 pmd_t *pmd = (pmd_t *) pud_deref(*pud); 872 pmd_t *pmd = (pmd_t *) pud;
873 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
874 pmd = (pmd_t *) pud_deref(*pud);
851 return pmd + pmd_index(address); 875 return pmd + pmd_index(address);
852} 876}
853 877
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 5a21f457d583..51d88912aa20 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -81,11 +81,12 @@ extern int get_cpu_capability(unsigned int *);
81 81
82#ifndef __s390x__ 82#ifndef __s390x__
83#define STACK_TOP (1UL << 31) 83#define STACK_TOP (1UL << 31)
84#define STACK_TOP_MAX (1UL << 31)
84#else /* __s390x__ */ 85#else /* __s390x__ */
85#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:53)) 86#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
87#define STACK_TOP_MAX (1UL << 42)
86#endif /* __s390x__ */ 88#endif /* __s390x__ */
87 89
88#define STACK_TOP_MAX STACK_TOP
89 90
90#endif 91#endif
91 92
@@ -142,8 +143,6 @@ struct stack_frame {
142/* 143/*
143 * Do necessary setup to start up a new thread. 144 * Do necessary setup to start up a new thread.
144 */ 145 */
145#ifndef __s390x__
146
147#define start_thread(regs, new_psw, new_stackp) do { \ 146#define start_thread(regs, new_psw, new_stackp) do { \
148 set_fs(USER_DS); \ 147 set_fs(USER_DS); \
149 regs->psw.mask = psw_user_bits; \ 148 regs->psw.mask = psw_user_bits; \
@@ -151,24 +150,6 @@ struct stack_frame {
151 regs->gprs[15] = new_stackp ; \ 150 regs->gprs[15] = new_stackp ; \
152} while (0) 151} while (0)
153 152
154#else /* __s390x__ */
155
156#define start_thread(regs, new_psw, new_stackp) do { \
157 set_fs(USER_DS); \
158 regs->psw.mask = psw_user_bits; \
159 regs->psw.addr = new_psw; \
160 regs->gprs[15] = new_stackp; \
161} while (0)
162
163#define start_thread31(regs, new_psw, new_stackp) do { \
164 set_fs(USER_DS); \
165 regs->psw.mask = psw_user32_bits; \
166 regs->psw.addr = new_psw; \
167 regs->gprs[15] = new_stackp; \
168} while (0)
169
170#endif /* __s390x__ */
171
172/* Forward declaration, a strange C thing */ 153/* Forward declaration, a strange C thing */
173struct task_struct; 154struct task_struct;
174struct mm_struct; 155struct mm_struct;
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h
index 9b2ddb7aac49..3d8a96d39d9d 100644
--- a/include/asm-s390/tlb.h
+++ b/include/asm-s390/tlb.h
@@ -109,10 +109,15 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte)
109/* 109/*
110 * pmd_free_tlb frees a pmd table and clears the CRSTE for the 110 * pmd_free_tlb frees a pmd table and clears the CRSTE for the
111 * segment table entry from the tlb. 111 * segment table entry from the tlb.
112 * If the mm uses a two level page table the single pmd is freed
113 * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
114 * to avoid the double free of the pmd in this case.
112 */ 115 */
113static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) 116static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
114{ 117{
115#ifdef __s390x__ 118#ifdef __s390x__
119 if (tlb->mm->context.asce_limit <= (1UL << 31))
120 return;
116 if (!tlb->fullmm) { 121 if (!tlb->fullmm) {
117 tlb->array[--tlb->nr_pxds] = pmd; 122 tlb->array[--tlb->nr_pxds] = pmd;
118 if (tlb->nr_ptes >= tlb->nr_pxds) 123 if (tlb->nr_ptes >= tlb->nr_pxds)
@@ -125,10 +130,15 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
125/* 130/*
126 * pud_free_tlb frees a pud table and clears the CRSTE for the 131 * pud_free_tlb frees a pud table and clears the CRSTE for the
127 * region third table entry from the tlb. 132 * region third table entry from the tlb.
133 * If the mm uses a three level page table the single pud is freed
134 * as the pgd. pud_free_tlb checks the asce_limit against 4TB
135 * to avoid the double free of the pud in this case.
128 */ 136 */
129static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) 137static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
130{ 138{
131#ifdef __s390x__ 139#ifdef __s390x__
140 if (tlb->mm->context.asce_limit <= (1UL << 42))
141 return;
132 if (!tlb->fullmm) { 142 if (!tlb->fullmm) {
133 tlb->array[--tlb->nr_pxds] = pud; 143 tlb->array[--tlb->nr_pxds] = pud;
134 if (tlb->nr_ptes >= tlb->nr_pxds) 144 if (tlb->nr_ptes >= tlb->nr_pxds)