aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
authorGerald Schaefer <geraldsc@de.ibm.com>2008-04-30 07:38:46 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-04-30 07:38:47 -0400
commit53492b1de46a7576170e865062ffcfc93bb5650b (patch)
treebee94e5b2e8c19c1a094a25023cb82572707feb4 /arch/s390
parent2e5061e40af88070984e3769eafb5a06022375fd (diff)
[S390] System z large page support.
This adds hugetlbfs support on System z, using both hardware large page support if available and software large page emulation on older hardware. Shared (large) page tables are implemented in software emulation mode, by using page->index of the first tail page from a compound large page to store page table information. Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/kernel/early.c16
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/setup.c10
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/fault.c3
-rw-r--r--arch/s390/mm/hugetlbpage.c134
-rw-r--r--arch/s390/mm/init.c23
-rw-r--r--arch/s390/mm/vmem.c55
8 files changed, 210 insertions, 35 deletions
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index bd188f6bd0e2..d0e09684b9ce 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -268,6 +268,19 @@ static noinline __init void setup_lowcore_early(void)
268 s390_base_pgm_handler_fn = early_pgm_check_handler; 268 s390_base_pgm_handler_fn = early_pgm_check_handler;
269} 269}
270 270
271static noinline __init void setup_hpage(void)
272{
273#ifndef CONFIG_DEBUG_PAGEALLOC
274 unsigned int facilities;
275
276 facilities = stfl();
277 if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
278 return;
279 machine_flags |= MACHINE_FLAG_HPAGE;
280 __ctl_set_bit(0, 23);
281#endif
282}
283
271static __init void detect_mvpg(void) 284static __init void detect_mvpg(void)
272{ 285{
273#ifndef CONFIG_64BIT 286#ifndef CONFIG_64BIT
@@ -360,6 +373,8 @@ static __init void detect_machine_facilities(void)
360 facilities = stfl(); 373 facilities = stfl();
361 if (facilities & (1 << 28)) 374 if (facilities & (1 << 28))
362 machine_flags |= MACHINE_FLAG_IDTE; 375 machine_flags |= MACHINE_FLAG_IDTE;
376 if (facilities & (1 << 23))
377 machine_flags |= MACHINE_FLAG_PFMF;
363 if (facilities & (1 << 4)) 378 if (facilities & (1 << 4))
364 machine_flags |= MACHINE_FLAG_MVCOS; 379 machine_flags |= MACHINE_FLAG_MVCOS;
365#endif 380#endif
@@ -388,6 +403,7 @@ void __init startup_init(void)
388 detect_diag9c(); 403 detect_diag9c();
389 detect_diag44(); 404 detect_diag44();
390 detect_machine_facilities(); 405 detect_machine_facilities();
406 setup_hpage();
391 sclp_read_info_early(); 407 sclp_read_info_early();
392 sclp_facilities_detect(); 408 sclp_facilities_detect();
393 memsize = sclp_memory_detect(); 409 memsize = sclp_memory_detect();
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 9c2c6f7d37e7..1d06961e87b3 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -129,7 +129,7 @@ startup_continue:
129 # virtual and never return ... 129 # virtual and never return ...
130 .align 16 130 .align 16
131.Lentry:.quad 0x0000000180000000,_stext 131.Lentry:.quad 0x0000000180000000,_stext
132.Lctl: .quad 0x04b50002 # cr0: various things 132.Lctl: .quad 0x04350002 # cr0: various things
133 .quad 0 # cr1: primary space segment table 133 .quad 0 # cr1: primary space segment table
134 .quad .Lduct # cr2: dispatchable unit control table 134 .quad .Lduct # cr2: dispatchable unit control table
135 .quad 0 # cr3: instruction authorization 135 .quad 0 # cr3: instruction authorization
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 694c6546ce64..2bc70b6e876a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -749,6 +749,9 @@ static void __init setup_hwcaps(void)
749 elf_hwcap |= 1UL << 6; 749 elf_hwcap |= 1UL << 6;
750 } 750 }
751 751
752 if (MACHINE_HAS_HPAGE)
753 elf_hwcap |= 1UL << 7;
754
752 switch (cpuinfo->cpu_id.machine) { 755 switch (cpuinfo->cpu_id.machine) {
753 case 0x9672: 756 case 0x9672:
754#if !defined(CONFIG_64BIT) 757#if !defined(CONFIG_64BIT)
@@ -872,8 +875,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
872 875
873static int show_cpuinfo(struct seq_file *m, void *v) 876static int show_cpuinfo(struct seq_file *m, void *v)
874{ 877{
875 static const char *hwcap_str[7] = { 878 static const char *hwcap_str[8] = {
876 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp" 879 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
880 "edat"
877 }; 881 };
878 struct cpuinfo_S390 *cpuinfo; 882 struct cpuinfo_S390 *cpuinfo;
879 unsigned long n = (unsigned long) v - 1; 883 unsigned long n = (unsigned long) v - 1;
@@ -888,7 +892,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
888 num_online_cpus(), loops_per_jiffy/(500000/HZ), 892 num_online_cpus(), loops_per_jiffy/(500000/HZ),
889 (loops_per_jiffy/(5000/HZ))%100); 893 (loops_per_jiffy/(5000/HZ))%100);
890 seq_puts(m, "features\t: "); 894 seq_puts(m, "features\t: ");
891 for (i = 0; i < 7; i++) 895 for (i = 0; i < 8; i++)
892 if (hwcap_str[i] && (elf_hwcap & (1UL << i))) 896 if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
893 seq_printf(m, "%s ", hwcap_str[i]); 897 seq_printf(m, "%s ", hwcap_str[i]);
894 seq_puts(m, "\n"); 898 seq_puts(m, "\n");
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 66401930f83e..fb988a48a754 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o 5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
6obj-$(CONFIG_CMM) += cmm.o 6obj-$(CONFIG_CMM) += cmm.o
7 7obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2650f46001d0..4d537205e83c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -28,6 +28,7 @@
28#include <linux/hardirq.h> 28#include <linux/hardirq.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/hugetlb.h>
31#include <asm/system.h> 32#include <asm/system.h>
32#include <asm/pgtable.h> 33#include <asm/pgtable.h>
33#include <asm/s390_ext.h> 34#include <asm/s390_ext.h>
@@ -367,6 +368,8 @@ good_area:
367 } 368 }
368 369
369survive: 370survive:
371 if (is_vm_hugetlb_page(vma))
372 address &= HPAGE_MASK;
370 /* 373 /*
371 * If for any reason at all we couldn't handle the fault, 374 * If for any reason at all we couldn't handle the fault,
372 * make sure we exit gracefully rather than endlessly redo 375 * make sure we exit gracefully rather than endlessly redo
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
new file mode 100644
index 000000000000..f4b6124fdb75
--- /dev/null
+++ b/arch/s390/mm/hugetlbpage.c
@@ -0,0 +1,134 @@
1/*
2 * IBM System z Huge TLB Page Support for Kernel.
3 *
4 * Copyright 2007 IBM Corp.
5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
6 */
7
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10
11
12void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
13 pte_t *pteptr, pte_t pteval)
14{
15 pmd_t *pmdp = (pmd_t *) pteptr;
16 pte_t shadow_pteval = pteval;
17 unsigned long mask;
18
19 if (!MACHINE_HAS_HPAGE) {
20 pteptr = (pte_t *) pte_page(pteval)[1].index;
21 mask = pte_val(pteval) &
22 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
23 pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
24 if (mm->context.noexec) {
25 pteptr += PTRS_PER_PTE;
26 pte_val(shadow_pteval) =
27 (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
28 }
29 }
30
31 pmd_val(*pmdp) = pte_val(pteval);
32 if (mm->context.noexec) {
33 pmdp = get_shadow_table(pmdp);
34 pmd_val(*pmdp) = pte_val(shadow_pteval);
35 }
36}
37
38int arch_prepare_hugepage(struct page *page)
39{
40 unsigned long addr = page_to_phys(page);
41 pte_t pte;
42 pte_t *ptep;
43 int i;
44
45 if (MACHINE_HAS_HPAGE)
46 return 0;
47
48 ptep = (pte_t *) pte_alloc_one(&init_mm, address);
49 if (!ptep)
50 return -ENOMEM;
51
52 pte = mk_pte(page, PAGE_RW);
53 for (i = 0; i < PTRS_PER_PTE; i++) {
54 set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
55 pte_val(pte) += PAGE_SIZE;
56 }
57 page[1].index = (unsigned long) ptep;
58 return 0;
59}
60
61void arch_release_hugepage(struct page *page)
62{
63 pte_t *ptep;
64
65 if (MACHINE_HAS_HPAGE)
66 return;
67
68 ptep = (pte_t *) page[1].index;
69 if (!ptep)
70 return;
71 pte_free(&init_mm, ptep);
72 page[1].index = 0;
73}
74
75pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
76{
77 pgd_t *pgdp;
78 pud_t *pudp;
79 pmd_t *pmdp = NULL;
80
81 pgdp = pgd_offset(mm, addr);
82 pudp = pud_alloc(mm, pgdp, addr);
83 if (pudp)
84 pmdp = pmd_alloc(mm, pudp, addr);
85 return (pte_t *) pmdp;
86}
87
88pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
89{
90 pgd_t *pgdp;
91 pud_t *pudp;
92 pmd_t *pmdp = NULL;
93
94 pgdp = pgd_offset(mm, addr);
95 if (pgd_present(*pgdp)) {
96 pudp = pud_offset(pgdp, addr);
97 if (pud_present(*pudp))
98 pmdp = pmd_offset(pudp, addr);
99 }
100 return (pte_t *) pmdp;
101}
102
103int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
104{
105 return 0;
106}
107
108struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
109 int write)
110{
111 return ERR_PTR(-EINVAL);
112}
113
114int pmd_huge(pmd_t pmd)
115{
116 if (!MACHINE_HAS_HPAGE)
117 return 0;
118
119 return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
120}
121
122struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
123 pmd_t *pmdp, int write)
124{
125 struct page *page;
126
127 if (!MACHINE_HAS_HPAGE)
128 return NULL;
129
130 page = pmd_page(*pmdp);
131 if (page)
132 page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
133 return page;
134}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 202c952a29b4..acc92f46a096 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -77,28 +77,6 @@ void show_mem(void)
77 printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); 77 printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE));
78} 78}
79 79
80static void __init setup_ro_region(void)
81{
82 pgd_t *pgd;
83 pud_t *pud;
84 pmd_t *pmd;
85 pte_t *pte;
86 pte_t new_pte;
87 unsigned long address, end;
88
89 address = ((unsigned long)&_stext) & PAGE_MASK;
90 end = PFN_ALIGN((unsigned long)&_eshared);
91
92 for (; address < end; address += PAGE_SIZE) {
93 pgd = pgd_offset_k(address);
94 pud = pud_offset(pgd, address);
95 pmd = pmd_offset(pud, address);
96 pte = pte_offset_kernel(pmd, address);
97 new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
98 *pte = new_pte;
99 }
100}
101
102/* 80/*
103 * paging_init() sets up the page tables 81 * paging_init() sets up the page tables
104 */ 82 */
@@ -121,7 +99,6 @@ void __init paging_init(void)
121 clear_table((unsigned long *) init_mm.pgd, pgd_type, 99 clear_table((unsigned long *) init_mm.pgd, pgd_type,
122 sizeof(unsigned long)*2048); 100 sizeof(unsigned long)*2048);
123 vmem_map_init(); 101 vmem_map_init();
124 setup_ro_region();
125 102
126 /* enable virtual mapping in kernel mode */ 103 /* enable virtual mapping in kernel mode */
127 __ctl_load(S390_lowcore.kernel_asce, 1, 1); 104 __ctl_load(S390_lowcore.kernel_asce, 1, 1);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 3ffc0211dc85..97bce6c97574 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -10,10 +10,12 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/hugetlb.h>
13#include <asm/pgalloc.h> 14#include <asm/pgalloc.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/setup.h> 16#include <asm/setup.h>
16#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h>
17 19
18static DEFINE_MUTEX(vmem_mutex); 20static DEFINE_MUTEX(vmem_mutex);
19 21
@@ -113,7 +115,7 @@ static pte_t __init_refok *vmem_pte_alloc(void)
113/* 115/*
114 * Add a physical memory range to the 1:1 mapping. 116 * Add a physical memory range to the 1:1 mapping.
115 */ 117 */
116static int vmem_add_range(unsigned long start, unsigned long size) 118static int vmem_add_range(unsigned long start, unsigned long size, int ro)
117{ 119{
118 unsigned long address; 120 unsigned long address;
119 pgd_t *pg_dir; 121 pgd_t *pg_dir;
@@ -140,7 +142,19 @@ static int vmem_add_range(unsigned long start, unsigned long size)
140 pud_populate_kernel(&init_mm, pu_dir, pm_dir); 142 pud_populate_kernel(&init_mm, pu_dir, pm_dir);
141 } 143 }
142 144
145 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
143 pm_dir = pmd_offset(pu_dir, address); 146 pm_dir = pmd_offset(pu_dir, address);
147
148#ifdef __s390x__
149 if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
150 (address + HPAGE_SIZE <= start + size) &&
151 (address >= HPAGE_SIZE)) {
152 pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
153 pmd_val(*pm_dir) = pte_val(pte);
154 address += HPAGE_SIZE - PAGE_SIZE;
155 continue;
156 }
157#endif
144 if (pmd_none(*pm_dir)) { 158 if (pmd_none(*pm_dir)) {
145 pt_dir = vmem_pte_alloc(); 159 pt_dir = vmem_pte_alloc();
146 if (!pt_dir) 160 if (!pt_dir)
@@ -149,7 +163,6 @@ static int vmem_add_range(unsigned long start, unsigned long size)
149 } 163 }
150 164
151 pt_dir = pte_offset_kernel(pm_dir, address); 165 pt_dir = pte_offset_kernel(pm_dir, address);
152 pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
153 *pt_dir = pte; 166 *pt_dir = pte;
154 } 167 }
155 ret = 0; 168 ret = 0;
@@ -180,6 +193,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
180 pm_dir = pmd_offset(pu_dir, address); 193 pm_dir = pmd_offset(pu_dir, address);
181 if (pmd_none(*pm_dir)) 194 if (pmd_none(*pm_dir))
182 continue; 195 continue;
196
197 if (pmd_huge(*pm_dir)) {
198 pmd_clear_kernel(pm_dir);
199 address += HPAGE_SIZE - PAGE_SIZE;
200 continue;
201 }
202
183 pt_dir = pte_offset_kernel(pm_dir, address); 203 pt_dir = pte_offset_kernel(pm_dir, address);
184 *pt_dir = pte; 204 *pt_dir = pte;
185 } 205 }
@@ -248,14 +268,14 @@ out:
248 return ret; 268 return ret;
249} 269}
250 270
251static int vmem_add_mem(unsigned long start, unsigned long size) 271static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
252{ 272{
253 int ret; 273 int ret;
254 274
255 ret = vmem_add_mem_map(start, size); 275 ret = vmem_add_mem_map(start, size);
256 if (ret) 276 if (ret)
257 return ret; 277 return ret;
258 return vmem_add_range(start, size); 278 return vmem_add_range(start, size, ro);
259} 279}
260 280
261/* 281/*
@@ -338,7 +358,7 @@ int add_shared_memory(unsigned long start, unsigned long size)
338 if (ret) 358 if (ret)
339 goto out_free; 359 goto out_free;
340 360
341 ret = vmem_add_mem(start, size); 361 ret = vmem_add_mem(start, size, 0);
342 if (ret) 362 if (ret)
343 goto out_remove; 363 goto out_remove;
344 364
@@ -374,14 +394,35 @@ out:
374 */ 394 */
375void __init vmem_map_init(void) 395void __init vmem_map_init(void)
376{ 396{
397 unsigned long ro_start, ro_end;
398 unsigned long start, end;
377 int i; 399 int i;
378 400
379 INIT_LIST_HEAD(&init_mm.context.crst_list); 401 INIT_LIST_HEAD(&init_mm.context.crst_list);
380 INIT_LIST_HEAD(&init_mm.context.pgtable_list); 402 INIT_LIST_HEAD(&init_mm.context.pgtable_list);
381 init_mm.context.noexec = 0; 403 init_mm.context.noexec = 0;
382 NODE_DATA(0)->node_mem_map = VMEM_MAP; 404 NODE_DATA(0)->node_mem_map = VMEM_MAP;
383 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) 405 ro_start = ((unsigned long)&_stext) & PAGE_MASK;
384 vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); 406 ro_end = PFN_ALIGN((unsigned long)&_eshared);
407 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
408 start = memory_chunk[i].addr;
409 end = memory_chunk[i].addr + memory_chunk[i].size;
410 if (start >= ro_end || end <= ro_start)
411 vmem_add_mem(start, end - start, 0);
412 else if (start >= ro_start && end <= ro_end)
413 vmem_add_mem(start, end - start, 1);
414 else if (start >= ro_start) {
415 vmem_add_mem(start, ro_end - start, 1);
416 vmem_add_mem(ro_end, end - ro_end, 0);
417 } else if (end < ro_end) {
418 vmem_add_mem(start, ro_start - start, 0);
419 vmem_add_mem(ro_start, end - ro_start, 1);
420 } else {
421 vmem_add_mem(start, ro_start - start, 0);
422 vmem_add_mem(ro_start, ro_end - ro_start, 1);
423 vmem_add_mem(ro_end, end - ro_end, 0);
424 }
425 }
385} 426}
386 427
387/* 428/*