diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-02-09 12:24:37 -0500 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-02-09 12:24:41 -0500 |
commit | 6252d702c5311ce916caf75ed82e5c8245171c92 (patch) | |
tree | 3490f27b5f888ff2c1ec915d4e7201000f37a771 /arch/s390/mm | |
parent | 5a216a20837c5f5fa1ca4b8ae8991ffd96b08e6f (diff) |
[S390] dynamic page tables.
Add support for different number of page table levels dependent
on the highest address used for a process. This will cause a 31 bit
process to use a two level page table instead of the four level page
table that is the default after the pud has been introduced. Likewise
a normal 64 bit process will use three levels instead of four. Only
if a process runs out of the 4 tera bytes which can be addressed with
a three level page table the fourth level is dynamically added. Then
the process can use up to 8 peta byte.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/fault.c | 40 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 65 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 74 |
4 files changed, 182 insertions, 2 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2456b52ed068..ed13d429a487 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/system.h> | 32 | #include <asm/system.h> |
33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
34 | #include <asm/s390_ext.h> | 34 | #include <asm/s390_ext.h> |
35 | #include <asm/mmu_context.h> | ||
35 | 36 | ||
36 | #ifndef CONFIG_64BIT | 37 | #ifndef CONFIG_64BIT |
37 | #define __FAIL_ADDR_MASK 0x7ffff000 | 38 | #define __FAIL_ADDR_MASK 0x7ffff000 |
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code) | |||
444 | do_exception(regs, error_code & 0xff, 0); | 445 | do_exception(regs, error_code & 0xff, 0); |
445 | } | 446 | } |
446 | 447 | ||
448 | #ifdef CONFIG_64BIT | ||
449 | void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) | ||
450 | { | ||
451 | struct mm_struct *mm; | ||
452 | struct vm_area_struct *vma; | ||
453 | unsigned long address; | ||
454 | int space; | ||
455 | |||
456 | mm = current->mm; | ||
457 | address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; | ||
458 | space = check_space(current); | ||
459 | |||
460 | if (unlikely(space == 0 || in_atomic() || !mm)) | ||
461 | goto no_context; | ||
462 | |||
463 | local_irq_enable(); | ||
464 | |||
465 | down_read(&mm->mmap_sem); | ||
466 | vma = find_vma(mm, address); | ||
467 | up_read(&mm->mmap_sem); | ||
468 | |||
469 | if (vma) { | ||
470 | update_mm(mm, current); | ||
471 | return; | ||
472 | } | ||
473 | |||
474 | /* User mode accesses just cause a SIGSEGV */ | ||
475 | if (regs->psw.mask & PSW_MASK_PSTATE) { | ||
476 | current->thread.prot_addr = address; | ||
477 | current->thread.trap_no = error_code; | ||
478 | do_sigsegv(regs, error_code, SEGV_MAPERR, address); | ||
479 | return; | ||
480 | } | ||
481 | |||
482 | no_context: | ||
483 | do_no_context(regs, error_code, address); | ||
484 | } | ||
485 | #endif | ||
486 | |||
447 | #ifdef CONFIG_PFAULT | 487 | #ifdef CONFIG_PFAULT |
448 | /* | 488 | /* |
449 | * 'pfault' pseudo page faults routines. | 489 | * 'pfault' pseudo page faults routines. |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 248a71010700..8053245fe259 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -112,8 +112,9 @@ void __init paging_init(void) | |||
112 | init_mm.pgd = swapper_pg_dir; | 112 | init_mm.pgd = swapper_pg_dir; |
113 | S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; | 113 | S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; |
114 | #ifdef CONFIG_64BIT | 114 | #ifdef CONFIG_64BIT |
115 | S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; | 115 | /* A three level page table (4TB) is enough for the kernel space. */ |
116 | pgd_type = _REGION2_ENTRY_EMPTY; | 116 | S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; |
117 | pgd_type = _REGION3_ENTRY_EMPTY; | ||
117 | #else | 118 | #else |
118 | S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; | 119 | S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; |
119 | pgd_type = _SEGMENT_ENTRY_EMPTY; | 120 | pgd_type = _SEGMENT_ENTRY_EMPTY; |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 356257c171de..5932a824547a 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/personality.h> | 27 | #include <linux/personality.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <asm/pgalloc.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Top of mmap area (just below the process stack). | 33 | * Top of mmap area (just below the process stack). |
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void) | |||
62 | current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; | 63 | current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; |
63 | } | 64 | } |
64 | 65 | ||
66 | #ifndef CONFIG_64BIT | ||
67 | |||
65 | /* | 68 | /* |
66 | * This function, called very early during the creation of a new | 69 | * This function, called very early during the creation of a new |
67 | * process VM image, sets up which VM layout function to use: | 70 | * process VM image, sets up which VM layout function to use: |
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
84 | } | 87 | } |
85 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); | 88 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); |
86 | 89 | ||
90 | #else | ||
91 | |||
92 | static unsigned long | ||
93 | s390_get_unmapped_area(struct file *filp, unsigned long addr, | ||
94 | unsigned long len, unsigned long pgoff, unsigned long flags) | ||
95 | { | ||
96 | struct mm_struct *mm = current->mm; | ||
97 | int rc; | ||
98 | |||
99 | addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags); | ||
100 | if (addr & ~PAGE_MASK) | ||
101 | return addr; | ||
102 | if (unlikely(mm->context.asce_limit < addr + len)) { | ||
103 | rc = crst_table_upgrade(mm, addr + len); | ||
104 | if (rc) | ||
105 | return (unsigned long) rc; | ||
106 | } | ||
107 | return addr; | ||
108 | } | ||
109 | |||
110 | static unsigned long | ||
111 | s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | ||
112 | const unsigned long len, const unsigned long pgoff, | ||
113 | const unsigned long flags) | ||
114 | { | ||
115 | struct mm_struct *mm = current->mm; | ||
116 | unsigned long addr = addr0; | ||
117 | int rc; | ||
118 | |||
119 | addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); | ||
120 | if (addr & ~PAGE_MASK) | ||
121 | return addr; | ||
122 | if (unlikely(mm->context.asce_limit < addr + len)) { | ||
123 | rc = crst_table_upgrade(mm, addr + len); | ||
124 | if (rc) | ||
125 | return (unsigned long) rc; | ||
126 | } | ||
127 | return addr; | ||
128 | } | ||
129 | /* | ||
130 | * This function, called very early during the creation of a new | ||
131 | * process VM image, sets up which VM layout function to use: | ||
132 | */ | ||
133 | void arch_pick_mmap_layout(struct mm_struct *mm) | ||
134 | { | ||
135 | /* | ||
136 | * Fall back to the standard layout if the personality | ||
137 | * bit is set, or if the expected stack growth is unlimited: | ||
138 | */ | ||
139 | if (mmap_is_legacy()) { | ||
140 | mm->mmap_base = TASK_UNMAPPED_BASE; | ||
141 | mm->get_unmapped_area = s390_get_unmapped_area; | ||
142 | mm->unmap_area = arch_unmap_area; | ||
143 | } else { | ||
144 | mm->mmap_base = mmap_base(); | ||
145 | mm->get_unmapped_area = s390_get_unmapped_area_topdown; | ||
146 | mm->unmap_area = arch_unmap_area_topdown; | ||
147 | } | ||
148 | } | ||
149 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); | ||
150 | |||
151 | #endif | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 809e77893039..fd072013f88c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
24 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
25 | #include <asm/tlbflush.h> | 25 | #include <asm/tlbflush.h> |
26 | #include <asm/mmu_context.h> | ||
26 | 27 | ||
27 | #ifndef CONFIG_64BIT | 28 | #ifndef CONFIG_64BIT |
28 | #define ALLOC_ORDER 1 | 29 | #define ALLOC_ORDER 1 |
@@ -70,6 +71,79 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) | |||
70 | free_pages((unsigned long) table, ALLOC_ORDER); | 71 | free_pages((unsigned long) table, ALLOC_ORDER); |
71 | } | 72 | } |
72 | 73 | ||
74 | #ifdef CONFIG_64BIT | ||
75 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | ||
76 | { | ||
77 | unsigned long *table, *pgd; | ||
78 | unsigned long entry; | ||
79 | |||
80 | BUG_ON(limit > (1UL << 53)); | ||
81 | repeat: | ||
82 | table = crst_table_alloc(mm, mm->context.noexec); | ||
83 | if (!table) | ||
84 | return -ENOMEM; | ||
85 | spin_lock(&mm->page_table_lock); | ||
86 | if (mm->context.asce_limit < limit) { | ||
87 | pgd = (unsigned long *) mm->pgd; | ||
88 | if (mm->context.asce_limit <= (1UL << 31)) { | ||
89 | entry = _REGION3_ENTRY_EMPTY; | ||
90 | mm->context.asce_limit = 1UL << 42; | ||
91 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
92 | _ASCE_USER_BITS | | ||
93 | _ASCE_TYPE_REGION3; | ||
94 | } else { | ||
95 | entry = _REGION2_ENTRY_EMPTY; | ||
96 | mm->context.asce_limit = 1UL << 53; | ||
97 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
98 | _ASCE_USER_BITS | | ||
99 | _ASCE_TYPE_REGION2; | ||
100 | } | ||
101 | crst_table_init(table, entry); | ||
102 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | ||
103 | mm->pgd = (pgd_t *) table; | ||
104 | table = NULL; | ||
105 | } | ||
106 | spin_unlock(&mm->page_table_lock); | ||
107 | if (table) | ||
108 | crst_table_free(mm, table); | ||
109 | if (mm->context.asce_limit < limit) | ||
110 | goto repeat; | ||
111 | update_mm(mm, current); | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | ||
116 | { | ||
117 | pgd_t *pgd; | ||
118 | |||
119 | if (mm->context.asce_limit <= limit) | ||
120 | return; | ||
121 | __tlb_flush_mm(mm); | ||
122 | while (mm->context.asce_limit > limit) { | ||
123 | pgd = mm->pgd; | ||
124 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | ||
125 | case _REGION_ENTRY_TYPE_R2: | ||
126 | mm->context.asce_limit = 1UL << 42; | ||
127 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
128 | _ASCE_USER_BITS | | ||
129 | _ASCE_TYPE_REGION3; | ||
130 | break; | ||
131 | case _REGION_ENTRY_TYPE_R3: | ||
132 | mm->context.asce_limit = 1UL << 31; | ||
133 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | ||
134 | _ASCE_USER_BITS | | ||
135 | _ASCE_TYPE_SEGMENT; | ||
136 | break; | ||
137 | default: | ||
138 | BUG(); | ||
139 | } | ||
140 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | ||
141 | crst_table_free(mm, (unsigned long *) pgd); | ||
142 | } | ||
143 | update_mm(mm, current); | ||
144 | } | ||
145 | #endif | ||
146 | |||
73 | /* | 147 | /* |
74 | * page table entry allocation/free routines. | 148 | * page table entry allocation/free routines. |
75 | */ | 149 | */ |