aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2008-02-09 12:24:37 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-02-09 12:24:41 -0500
commit6252d702c5311ce916caf75ed82e5c8245171c92 (patch)
tree3490f27b5f888ff2c1ec915d4e7201000f37a771 /arch
parent5a216a20837c5f5fa1ca4b8ae8991ffd96b08e6f (diff)
[S390] dynamic page tables.
Add support for different number of page table levels dependent on the highest address used for a process. This will cause a 31 bit process to use a two level page table instead of the four level page table that is the default after the pud has been introduced. Likewise a normal 64 bit process will use three levels instead of four. Only if a process runs out of the 4 tera bytes which can be addressed with a three level page table the fourth level is dynamically added. Then the process can use up to 8 peta byte. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/kernel/binfmt_elf32.c11
-rw-r--r--arch/s390/kernel/traps.c3
-rw-r--r--arch/s390/mm/fault.c40
-rw-r--r--arch/s390/mm/init.c5
-rw-r--r--arch/s390/mm/mmap.c65
-rw-r--r--arch/s390/mm/pgtable.c74
6 files changed, 195 insertions, 3 deletions
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
index f1e40ca00d8d..3e1c315b736d 100644
--- a/arch/s390/kernel/binfmt_elf32.c
+++ b/arch/s390/kernel/binfmt_elf32.c
@@ -134,6 +134,7 @@ static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
134} 134}
135 135
136#include <asm/processor.h> 136#include <asm/processor.h>
137#include <asm/pgalloc.h>
137#include <linux/module.h> 138#include <linux/module.h>
138#include <linux/elfcore.h> 139#include <linux/elfcore.h>
139#include <linux/binfmts.h> 140#include <linux/binfmts.h>
@@ -183,6 +184,16 @@ struct elf_prpsinfo32
183#undef start_thread 184#undef start_thread
184#define start_thread start_thread31 185#define start_thread start_thread31
185 186
187static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
188 unsigned long new_stackp)
189{
190 set_fs(USER_DS);
191 regs->psw.mask = psw_user32_bits;
192 regs->psw.addr = new_psw;
193 regs->gprs[15] = new_stackp;
194 crst_table_downgrade(current->mm, 1UL << 31);
195}
196
186MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries," 197MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
187 " Copyright 2000 IBM Corporation"); 198 " Copyright 2000 IBM Corporation");
188MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>"); 199MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index a4d29025ddbd..60f728aeaf12 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -60,6 +60,7 @@ int sysctl_userprocess_debug = 0;
60extern pgm_check_handler_t do_protection_exception; 60extern pgm_check_handler_t do_protection_exception;
61extern pgm_check_handler_t do_dat_exception; 61extern pgm_check_handler_t do_dat_exception;
62extern pgm_check_handler_t do_monitor_call; 62extern pgm_check_handler_t do_monitor_call;
63extern pgm_check_handler_t do_asce_exception;
63 64
64#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) 65#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
65 66
@@ -730,7 +731,7 @@ void __init trap_init(void)
730 pgm_check_table[0x12] = &translation_exception; 731 pgm_check_table[0x12] = &translation_exception;
731 pgm_check_table[0x13] = &special_op_exception; 732 pgm_check_table[0x13] = &special_op_exception;
732#ifdef CONFIG_64BIT 733#ifdef CONFIG_64BIT
733 pgm_check_table[0x38] = &do_dat_exception; 734 pgm_check_table[0x38] = &do_asce_exception;
734 pgm_check_table[0x39] = &do_dat_exception; 735 pgm_check_table[0x39] = &do_dat_exception;
735 pgm_check_table[0x3A] = &do_dat_exception; 736 pgm_check_table[0x3A] = &do_dat_exception;
736 pgm_check_table[0x3B] = &do_dat_exception; 737 pgm_check_table[0x3B] = &do_dat_exception;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2456b52ed068..ed13d429a487 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
32#include <asm/system.h> 32#include <asm/system.h>
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34#include <asm/s390_ext.h> 34#include <asm/s390_ext.h>
35#include <asm/mmu_context.h>
35 36
36#ifndef CONFIG_64BIT 37#ifndef CONFIG_64BIT
37#define __FAIL_ADDR_MASK 0x7ffff000 38#define __FAIL_ADDR_MASK 0x7ffff000
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
444 do_exception(regs, error_code & 0xff, 0); 445 do_exception(regs, error_code & 0xff, 0);
445} 446}
446 447
448#ifdef CONFIG_64BIT
449void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
450{
451 struct mm_struct *mm;
452 struct vm_area_struct *vma;
453 unsigned long address;
454 int space;
455
456 mm = current->mm;
457 address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
458 space = check_space(current);
459
460 if (unlikely(space == 0 || in_atomic() || !mm))
461 goto no_context;
462
463 local_irq_enable();
464
465 down_read(&mm->mmap_sem);
466 vma = find_vma(mm, address);
467 up_read(&mm->mmap_sem);
468
469 if (vma) {
470 update_mm(mm, current);
471 return;
472 }
473
474 /* User mode accesses just cause a SIGSEGV */
475 if (regs->psw.mask & PSW_MASK_PSTATE) {
476 current->thread.prot_addr = address;
477 current->thread.trap_no = error_code;
478 do_sigsegv(regs, error_code, SEGV_MAPERR, address);
479 return;
480 }
481
482no_context:
483 do_no_context(regs, error_code, address);
484}
485#endif
486
447#ifdef CONFIG_PFAULT 487#ifdef CONFIG_PFAULT
448/* 488/*
449 * 'pfault' pseudo page faults routines. 489 * 'pfault' pseudo page faults routines.
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 248a71010700..8053245fe259 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -112,8 +112,9 @@ void __init paging_init(void)
112 init_mm.pgd = swapper_pg_dir; 112 init_mm.pgd = swapper_pg_dir;
113 S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; 113 S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
114#ifdef CONFIG_64BIT 114#ifdef CONFIG_64BIT
115 S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 115 /* A three level page table (4TB) is enough for the kernel space. */
116 pgd_type = _REGION2_ENTRY_EMPTY; 116 S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
117 pgd_type = _REGION3_ENTRY_EMPTY;
117#else 118#else
118 S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; 119 S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
119 pgd_type = _SEGMENT_ENTRY_EMPTY; 120 pgd_type = _SEGMENT_ENTRY_EMPTY;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 356257c171de..5932a824547a 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
27#include <linux/personality.h> 27#include <linux/personality.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <asm/pgalloc.h>
30 31
31/* 32/*
32 * Top of mmap area (just below the process stack). 33 * Top of mmap area (just below the process stack).
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void)
62 current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; 63 current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
63} 64}
64 65
66#ifndef CONFIG_64BIT
67
65/* 68/*
66 * This function, called very early during the creation of a new 69 * This function, called very early during the creation of a new
67 * process VM image, sets up which VM layout function to use: 70 * process VM image, sets up which VM layout function to use:
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
84} 87}
85EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); 88EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
86 89
90#else
91
92static unsigned long
93s390_get_unmapped_area(struct file *filp, unsigned long addr,
94 unsigned long len, unsigned long pgoff, unsigned long flags)
95{
96 struct mm_struct *mm = current->mm;
97 int rc;
98
99 addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
100 if (addr & ~PAGE_MASK)
101 return addr;
102 if (unlikely(mm->context.asce_limit < addr + len)) {
103 rc = crst_table_upgrade(mm, addr + len);
104 if (rc)
105 return (unsigned long) rc;
106 }
107 return addr;
108}
109
110static unsigned long
111s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
112 const unsigned long len, const unsigned long pgoff,
113 const unsigned long flags)
114{
115 struct mm_struct *mm = current->mm;
116 unsigned long addr = addr0;
117 int rc;
118
119 addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
120 if (addr & ~PAGE_MASK)
121 return addr;
122 if (unlikely(mm->context.asce_limit < addr + len)) {
123 rc = crst_table_upgrade(mm, addr + len);
124 if (rc)
125 return (unsigned long) rc;
126 }
127 return addr;
128}
129/*
130 * This function, called very early during the creation of a new
131 * process VM image, sets up which VM layout function to use:
132 */
133void arch_pick_mmap_layout(struct mm_struct *mm)
134{
135 /*
136 * Fall back to the standard layout if the personality
137 * bit is set, or if the expected stack growth is unlimited:
138 */
139 if (mmap_is_legacy()) {
140 mm->mmap_base = TASK_UNMAPPED_BASE;
141 mm->get_unmapped_area = s390_get_unmapped_area;
142 mm->unmap_area = arch_unmap_area;
143 } else {
144 mm->mmap_base = mmap_base();
145 mm->get_unmapped_area = s390_get_unmapped_area_topdown;
146 mm->unmap_area = arch_unmap_area_topdown;
147 }
148}
149EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
150
151#endif
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 809e77893039..fd072013f88c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
23#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/tlb.h> 24#include <asm/tlb.h>
25#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
26#include <asm/mmu_context.h>
26 27
27#ifndef CONFIG_64BIT 28#ifndef CONFIG_64BIT
28#define ALLOC_ORDER 1 29#define ALLOC_ORDER 1
@@ -70,6 +71,79 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
70 free_pages((unsigned long) table, ALLOC_ORDER); 71 free_pages((unsigned long) table, ALLOC_ORDER);
71} 72}
72 73
74#ifdef CONFIG_64BIT
75int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
76{
77 unsigned long *table, *pgd;
78 unsigned long entry;
79
80 BUG_ON(limit > (1UL << 53));
81repeat:
82 table = crst_table_alloc(mm, mm->context.noexec);
83 if (!table)
84 return -ENOMEM;
85 spin_lock(&mm->page_table_lock);
86 if (mm->context.asce_limit < limit) {
87 pgd = (unsigned long *) mm->pgd;
88 if (mm->context.asce_limit <= (1UL << 31)) {
89 entry = _REGION3_ENTRY_EMPTY;
90 mm->context.asce_limit = 1UL << 42;
91 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
92 _ASCE_USER_BITS |
93 _ASCE_TYPE_REGION3;
94 } else {
95 entry = _REGION2_ENTRY_EMPTY;
96 mm->context.asce_limit = 1UL << 53;
97 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
98 _ASCE_USER_BITS |
99 _ASCE_TYPE_REGION2;
100 }
101 crst_table_init(table, entry);
102 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
103 mm->pgd = (pgd_t *) table;
104 table = NULL;
105 }
106 spin_unlock(&mm->page_table_lock);
107 if (table)
108 crst_table_free(mm, table);
109 if (mm->context.asce_limit < limit)
110 goto repeat;
111 update_mm(mm, current);
112 return 0;
113}
114
115void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
116{
117 pgd_t *pgd;
118
119 if (mm->context.asce_limit <= limit)
120 return;
121 __tlb_flush_mm(mm);
122 while (mm->context.asce_limit > limit) {
123 pgd = mm->pgd;
124 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
125 case _REGION_ENTRY_TYPE_R2:
126 mm->context.asce_limit = 1UL << 42;
127 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128 _ASCE_USER_BITS |
129 _ASCE_TYPE_REGION3;
130 break;
131 case _REGION_ENTRY_TYPE_R3:
132 mm->context.asce_limit = 1UL << 31;
133 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
134 _ASCE_USER_BITS |
135 _ASCE_TYPE_SEGMENT;
136 break;
137 default:
138 BUG();
139 }
140 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
141 crst_table_free(mm, (unsigned long *) pgd);
142 }
143 update_mm(mm, current);
144}
145#endif
146
73/* 147/*
74 * page table entry allocation/free routines. 148 * page table entry allocation/free routines.
75 */ 149 */