aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerald Schaefer <geraldsc@de.ibm.com>2008-04-30 07:38:46 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-04-30 07:38:47 -0400
commit53492b1de46a7576170e865062ffcfc93bb5650b (patch)
treebee94e5b2e8c19c1a094a25023cb82572707feb4
parent2e5061e40af88070984e3769eafb5a06022375fd (diff)
[S390] System z large page support.
This adds hugetlbfs support on System z, using both hardware large page support if available and software large page emulation on older hardware. Shared (large) page tables are implemented in software emulation mode, by using page->index of the first tail page from a compound large page to store page table information. Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/kernel/early.c16
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/setup.c10
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/fault.c3
-rw-r--r--arch/s390/mm/hugetlbpage.c134
-rw-r--r--arch/s390/mm/init.c23
-rw-r--r--arch/s390/mm/vmem.c55
-rw-r--r--fs/Kconfig3
-rw-r--r--include/asm-s390/hugetlb.h183
-rw-r--r--include/asm-s390/page.h29
-rw-r--r--include/asm-s390/pgtable.h12
-rw-r--r--include/asm-s390/setup.h6
-rw-r--r--include/asm-s390/tlbflush.h1
14 files changed, 437 insertions, 42 deletions
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index bd188f6bd0e2..d0e09684b9ce 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -268,6 +268,19 @@ static noinline __init void setup_lowcore_early(void)
268 s390_base_pgm_handler_fn = early_pgm_check_handler; 268 s390_base_pgm_handler_fn = early_pgm_check_handler;
269} 269}
270 270
271static noinline __init void setup_hpage(void)
272{
273#ifndef CONFIG_DEBUG_PAGEALLOC
274 unsigned int facilities;
275
276 facilities = stfl();
277 if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
278 return;
279 machine_flags |= MACHINE_FLAG_HPAGE;
280 __ctl_set_bit(0, 23);
281#endif
282}
283
271static __init void detect_mvpg(void) 284static __init void detect_mvpg(void)
272{ 285{
273#ifndef CONFIG_64BIT 286#ifndef CONFIG_64BIT
@@ -360,6 +373,8 @@ static __init void detect_machine_facilities(void)
360 facilities = stfl(); 373 facilities = stfl();
361 if (facilities & (1 << 28)) 374 if (facilities & (1 << 28))
362 machine_flags |= MACHINE_FLAG_IDTE; 375 machine_flags |= MACHINE_FLAG_IDTE;
376 if (facilities & (1 << 23))
377 machine_flags |= MACHINE_FLAG_PFMF;
363 if (facilities & (1 << 4)) 378 if (facilities & (1 << 4))
364 machine_flags |= MACHINE_FLAG_MVCOS; 379 machine_flags |= MACHINE_FLAG_MVCOS;
365#endif 380#endif
@@ -388,6 +403,7 @@ void __init startup_init(void)
388 detect_diag9c(); 403 detect_diag9c();
389 detect_diag44(); 404 detect_diag44();
390 detect_machine_facilities(); 405 detect_machine_facilities();
406 setup_hpage();
391 sclp_read_info_early(); 407 sclp_read_info_early();
392 sclp_facilities_detect(); 408 sclp_facilities_detect();
393 memsize = sclp_memory_detect(); 409 memsize = sclp_memory_detect();
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 9c2c6f7d37e7..1d06961e87b3 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -129,7 +129,7 @@ startup_continue:
129 # virtual and never return ... 129 # virtual and never return ...
130 .align 16 130 .align 16
131.Lentry:.quad 0x0000000180000000,_stext 131.Lentry:.quad 0x0000000180000000,_stext
132.Lctl: .quad 0x04b50002 # cr0: various things 132.Lctl: .quad 0x04350002 # cr0: various things
133 .quad 0 # cr1: primary space segment table 133 .quad 0 # cr1: primary space segment table
134 .quad .Lduct # cr2: dispatchable unit control table 134 .quad .Lduct # cr2: dispatchable unit control table
135 .quad 0 # cr3: instruction authorization 135 .quad 0 # cr3: instruction authorization
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 694c6546ce64..2bc70b6e876a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -749,6 +749,9 @@ static void __init setup_hwcaps(void)
749 elf_hwcap |= 1UL << 6; 749 elf_hwcap |= 1UL << 6;
750 } 750 }
751 751
752 if (MACHINE_HAS_HPAGE)
753 elf_hwcap |= 1UL << 7;
754
752 switch (cpuinfo->cpu_id.machine) { 755 switch (cpuinfo->cpu_id.machine) {
753 case 0x9672: 756 case 0x9672:
754#if !defined(CONFIG_64BIT) 757#if !defined(CONFIG_64BIT)
@@ -872,8 +875,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
872 875
873static int show_cpuinfo(struct seq_file *m, void *v) 876static int show_cpuinfo(struct seq_file *m, void *v)
874{ 877{
875 static const char *hwcap_str[7] = { 878 static const char *hwcap_str[8] = {
876 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp" 879 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
880 "edat"
877 }; 881 };
878 struct cpuinfo_S390 *cpuinfo; 882 struct cpuinfo_S390 *cpuinfo;
879 unsigned long n = (unsigned long) v - 1; 883 unsigned long n = (unsigned long) v - 1;
@@ -888,7 +892,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
888 num_online_cpus(), loops_per_jiffy/(500000/HZ), 892 num_online_cpus(), loops_per_jiffy/(500000/HZ),
889 (loops_per_jiffy/(5000/HZ))%100); 893 (loops_per_jiffy/(5000/HZ))%100);
890 seq_puts(m, "features\t: "); 894 seq_puts(m, "features\t: ");
891 for (i = 0; i < 7; i++) 895 for (i = 0; i < 8; i++)
892 if (hwcap_str[i] && (elf_hwcap & (1UL << i))) 896 if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
893 seq_printf(m, "%s ", hwcap_str[i]); 897 seq_printf(m, "%s ", hwcap_str[i]);
894 seq_puts(m, "\n"); 898 seq_puts(m, "\n");
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 66401930f83e..fb988a48a754 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o 5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
6obj-$(CONFIG_CMM) += cmm.o 6obj-$(CONFIG_CMM) += cmm.o
7 7obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2650f46001d0..4d537205e83c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -28,6 +28,7 @@
28#include <linux/hardirq.h> 28#include <linux/hardirq.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/hugetlb.h>
31#include <asm/system.h> 32#include <asm/system.h>
32#include <asm/pgtable.h> 33#include <asm/pgtable.h>
33#include <asm/s390_ext.h> 34#include <asm/s390_ext.h>
@@ -367,6 +368,8 @@ good_area:
367 } 368 }
368 369
369survive: 370survive:
371 if (is_vm_hugetlb_page(vma))
372 address &= HPAGE_MASK;
370 /* 373 /*
371 * If for any reason at all we couldn't handle the fault, 374 * If for any reason at all we couldn't handle the fault,
372 * make sure we exit gracefully rather than endlessly redo 375 * make sure we exit gracefully rather than endlessly redo
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
new file mode 100644
index 000000000000..f4b6124fdb75
--- /dev/null
+++ b/arch/s390/mm/hugetlbpage.c
@@ -0,0 +1,134 @@
1/*
2 * IBM System z Huge TLB Page Support for Kernel.
3 *
4 * Copyright 2007 IBM Corp.
5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
6 */
7
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10
11
12void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
13 pte_t *pteptr, pte_t pteval)
14{
15 pmd_t *pmdp = (pmd_t *) pteptr;
16 pte_t shadow_pteval = pteval;
17 unsigned long mask;
18
19 if (!MACHINE_HAS_HPAGE) {
20 pteptr = (pte_t *) pte_page(pteval)[1].index;
21 mask = pte_val(pteval) &
22 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
23 pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
24 if (mm->context.noexec) {
25 pteptr += PTRS_PER_PTE;
26 pte_val(shadow_pteval) =
27 (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
28 }
29 }
30
31 pmd_val(*pmdp) = pte_val(pteval);
32 if (mm->context.noexec) {
33 pmdp = get_shadow_table(pmdp);
34 pmd_val(*pmdp) = pte_val(shadow_pteval);
35 }
36}
37
38int arch_prepare_hugepage(struct page *page)
39{
40 unsigned long addr = page_to_phys(page);
41 pte_t pte;
42 pte_t *ptep;
43 int i;
44
45 if (MACHINE_HAS_HPAGE)
46 return 0;
47
48 ptep = (pte_t *) pte_alloc_one(&init_mm, address);
49 if (!ptep)
50 return -ENOMEM;
51
52 pte = mk_pte(page, PAGE_RW);
53 for (i = 0; i < PTRS_PER_PTE; i++) {
54 set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
55 pte_val(pte) += PAGE_SIZE;
56 }
57 page[1].index = (unsigned long) ptep;
58 return 0;
59}
60
61void arch_release_hugepage(struct page *page)
62{
63 pte_t *ptep;
64
65 if (MACHINE_HAS_HPAGE)
66 return;
67
68 ptep = (pte_t *) page[1].index;
69 if (!ptep)
70 return;
71 pte_free(&init_mm, ptep);
72 page[1].index = 0;
73}
74
75pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
76{
77 pgd_t *pgdp;
78 pud_t *pudp;
79 pmd_t *pmdp = NULL;
80
81 pgdp = pgd_offset(mm, addr);
82 pudp = pud_alloc(mm, pgdp, addr);
83 if (pudp)
84 pmdp = pmd_alloc(mm, pudp, addr);
85 return (pte_t *) pmdp;
86}
87
88pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
89{
90 pgd_t *pgdp;
91 pud_t *pudp;
92 pmd_t *pmdp = NULL;
93
94 pgdp = pgd_offset(mm, addr);
95 if (pgd_present(*pgdp)) {
96 pudp = pud_offset(pgdp, addr);
97 if (pud_present(*pudp))
98 pmdp = pmd_offset(pudp, addr);
99 }
100 return (pte_t *) pmdp;
101}
102
103int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
104{
105 return 0;
106}
107
108struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
109 int write)
110{
111 return ERR_PTR(-EINVAL);
112}
113
114int pmd_huge(pmd_t pmd)
115{
116 if (!MACHINE_HAS_HPAGE)
117 return 0;
118
119 return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
120}
121
122struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
123 pmd_t *pmdp, int write)
124{
125 struct page *page;
126
127 if (!MACHINE_HAS_HPAGE)
128 return NULL;
129
130 page = pmd_page(*pmdp);
131 if (page)
132 page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
133 return page;
134}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 202c952a29b4..acc92f46a096 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -77,28 +77,6 @@ void show_mem(void)
77 printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); 77 printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE));
78} 78}
79 79
80static void __init setup_ro_region(void)
81{
82 pgd_t *pgd;
83 pud_t *pud;
84 pmd_t *pmd;
85 pte_t *pte;
86 pte_t new_pte;
87 unsigned long address, end;
88
89 address = ((unsigned long)&_stext) & PAGE_MASK;
90 end = PFN_ALIGN((unsigned long)&_eshared);
91
92 for (; address < end; address += PAGE_SIZE) {
93 pgd = pgd_offset_k(address);
94 pud = pud_offset(pgd, address);
95 pmd = pmd_offset(pud, address);
96 pte = pte_offset_kernel(pmd, address);
97 new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
98 *pte = new_pte;
99 }
100}
101
102/* 80/*
103 * paging_init() sets up the page tables 81 * paging_init() sets up the page tables
104 */ 82 */
@@ -121,7 +99,6 @@ void __init paging_init(void)
121 clear_table((unsigned long *) init_mm.pgd, pgd_type, 99 clear_table((unsigned long *) init_mm.pgd, pgd_type,
122 sizeof(unsigned long)*2048); 100 sizeof(unsigned long)*2048);
123 vmem_map_init(); 101 vmem_map_init();
124 setup_ro_region();
125 102
126 /* enable virtual mapping in kernel mode */ 103 /* enable virtual mapping in kernel mode */
127 __ctl_load(S390_lowcore.kernel_asce, 1, 1); 104 __ctl_load(S390_lowcore.kernel_asce, 1, 1);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 3ffc0211dc85..97bce6c97574 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -10,10 +10,12 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/hugetlb.h>
13#include <asm/pgalloc.h> 14#include <asm/pgalloc.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/setup.h> 16#include <asm/setup.h>
16#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h>
17 19
18static DEFINE_MUTEX(vmem_mutex); 20static DEFINE_MUTEX(vmem_mutex);
19 21
@@ -113,7 +115,7 @@ static pte_t __init_refok *vmem_pte_alloc(void)
113/* 115/*
114 * Add a physical memory range to the 1:1 mapping. 116 * Add a physical memory range to the 1:1 mapping.
115 */ 117 */
116static int vmem_add_range(unsigned long start, unsigned long size) 118static int vmem_add_range(unsigned long start, unsigned long size, int ro)
117{ 119{
118 unsigned long address; 120 unsigned long address;
119 pgd_t *pg_dir; 121 pgd_t *pg_dir;
@@ -140,7 +142,19 @@ static int vmem_add_range(unsigned long start, unsigned long size)
140 pud_populate_kernel(&init_mm, pu_dir, pm_dir); 142 pud_populate_kernel(&init_mm, pu_dir, pm_dir);
141 } 143 }
142 144
145 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
143 pm_dir = pmd_offset(pu_dir, address); 146 pm_dir = pmd_offset(pu_dir, address);
147
148#ifdef __s390x__
149 if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
150 (address + HPAGE_SIZE <= start + size) &&
151 (address >= HPAGE_SIZE)) {
152 pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
153 pmd_val(*pm_dir) = pte_val(pte);
154 address += HPAGE_SIZE - PAGE_SIZE;
155 continue;
156 }
157#endif
144 if (pmd_none(*pm_dir)) { 158 if (pmd_none(*pm_dir)) {
145 pt_dir = vmem_pte_alloc(); 159 pt_dir = vmem_pte_alloc();
146 if (!pt_dir) 160 if (!pt_dir)
@@ -149,7 +163,6 @@ static int vmem_add_range(unsigned long start, unsigned long size)
149 } 163 }
150 164
151 pt_dir = pte_offset_kernel(pm_dir, address); 165 pt_dir = pte_offset_kernel(pm_dir, address);
152 pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
153 *pt_dir = pte; 166 *pt_dir = pte;
154 } 167 }
155 ret = 0; 168 ret = 0;
@@ -180,6 +193,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
180 pm_dir = pmd_offset(pu_dir, address); 193 pm_dir = pmd_offset(pu_dir, address);
181 if (pmd_none(*pm_dir)) 194 if (pmd_none(*pm_dir))
182 continue; 195 continue;
196
197 if (pmd_huge(*pm_dir)) {
198 pmd_clear_kernel(pm_dir);
199 address += HPAGE_SIZE - PAGE_SIZE;
200 continue;
201 }
202
183 pt_dir = pte_offset_kernel(pm_dir, address); 203 pt_dir = pte_offset_kernel(pm_dir, address);
184 *pt_dir = pte; 204 *pt_dir = pte;
185 } 205 }
@@ -248,14 +268,14 @@ out:
248 return ret; 268 return ret;
249} 269}
250 270
251static int vmem_add_mem(unsigned long start, unsigned long size) 271static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
252{ 272{
253 int ret; 273 int ret;
254 274
255 ret = vmem_add_mem_map(start, size); 275 ret = vmem_add_mem_map(start, size);
256 if (ret) 276 if (ret)
257 return ret; 277 return ret;
258 return vmem_add_range(start, size); 278 return vmem_add_range(start, size, ro);
259} 279}
260 280
261/* 281/*
@@ -338,7 +358,7 @@ int add_shared_memory(unsigned long start, unsigned long size)
338 if (ret) 358 if (ret)
339 goto out_free; 359 goto out_free;
340 360
341 ret = vmem_add_mem(start, size); 361 ret = vmem_add_mem(start, size, 0);
342 if (ret) 362 if (ret)
343 goto out_remove; 363 goto out_remove;
344 364
@@ -374,14 +394,35 @@ out:
374 */ 394 */
375void __init vmem_map_init(void) 395void __init vmem_map_init(void)
376{ 396{
397 unsigned long ro_start, ro_end;
398 unsigned long start, end;
377 int i; 399 int i;
378 400
379 INIT_LIST_HEAD(&init_mm.context.crst_list); 401 INIT_LIST_HEAD(&init_mm.context.crst_list);
380 INIT_LIST_HEAD(&init_mm.context.pgtable_list); 402 INIT_LIST_HEAD(&init_mm.context.pgtable_list);
381 init_mm.context.noexec = 0; 403 init_mm.context.noexec = 0;
382 NODE_DATA(0)->node_mem_map = VMEM_MAP; 404 NODE_DATA(0)->node_mem_map = VMEM_MAP;
383 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) 405 ro_start = ((unsigned long)&_stext) & PAGE_MASK;
384 vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); 406 ro_end = PFN_ALIGN((unsigned long)&_eshared);
407 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
408 start = memory_chunk[i].addr;
409 end = memory_chunk[i].addr + memory_chunk[i].size;
410 if (start >= ro_end || end <= ro_start)
411 vmem_add_mem(start, end - start, 0);
412 else if (start >= ro_start && end <= ro_end)
413 vmem_add_mem(start, end - start, 1);
414 else if (start >= ro_start) {
415 vmem_add_mem(start, ro_end - start, 1);
416 vmem_add_mem(ro_end, end - ro_end, 0);
417 } else if (end < ro_end) {
418 vmem_add_mem(start, ro_start - start, 0);
419 vmem_add_mem(ro_start, end - ro_start, 1);
420 } else {
421 vmem_add_mem(start, ro_start - start, 0);
422 vmem_add_mem(ro_start, ro_end - ro_start, 1);
423 vmem_add_mem(ro_end, end - ro_end, 0);
424 }
425 }
385} 426}
386 427
387/* 428/*
diff --git a/fs/Kconfig b/fs/Kconfig
index 2e43d46f65d6..cf12c403b8c7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
1005 1005
1006config HUGETLBFS 1006config HUGETLBFS
1007 bool "HugeTLB file system support" 1007 bool "HugeTLB file system support"
1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN 1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
1009 (S390 && 64BIT) || BROKEN
1009 help 1010 help
1010 hugetlbfs is a filesystem backing for HugeTLB pages, based on 1011 hugetlbfs is a filesystem backing for HugeTLB pages, based on
1011 ramfs. For architectures that support it, say Y here and read 1012 ramfs. For architectures that support it, say Y here and read
diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h
new file mode 100644
index 000000000000..600a776f8f75
--- /dev/null
+++ b/include/asm-s390/hugetlb.h
@@ -0,0 +1,183 @@
1/*
2 * IBM System z Huge TLB Page Support for Kernel.
3 *
4 * Copyright IBM Corp. 2008
5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
6 */
7
8#ifndef _ASM_S390_HUGETLB_H
9#define _ASM_S390_HUGETLB_H
10
11#include <asm/page.h>
12#include <asm/pgtable.h>
13
14
15#define is_hugepage_only_range(mm, addr, len) 0
16#define hugetlb_free_pgd_range free_pgd_range
17
18void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
19 pte_t *ptep, pte_t pte);
20
21/*
22 * If the arch doesn't supply something else, assume that hugepage
23 * size aligned regions are ok without further preparation.
24 */
25static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
26{
27 if (len & ~HPAGE_MASK)
28 return -EINVAL;
29 if (addr & ~HPAGE_MASK)
30 return -EINVAL;
31 return 0;
32}
33
34#define hugetlb_prefault_arch_hook(mm) do { } while (0)
35
36int arch_prepare_hugepage(struct page *page);
37void arch_release_hugepage(struct page *page);
38
39static inline pte_t pte_mkhuge(pte_t pte)
40{
41 /*
42 * PROT_NONE needs to be remapped from the pte type to the ste type.
43 * The HW invalid bit is also different for pte and ste. The pte
44 * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE
45 * bit, so we don't have to clear it.
46 */
47 if (pte_val(pte) & _PAGE_INVALID) {
48 if (pte_val(pte) & _PAGE_SWT)
49 pte_val(pte) |= _HPAGE_TYPE_NONE;
50 pte_val(pte) |= _SEGMENT_ENTRY_INV;
51 }
52 /*
53 * Clear SW pte bits SWT and SWX, there are no SW bits in a segment
54 * table entry.
55 */
56 pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX);
57 /*
58 * Also set the change-override bit because we don't need dirty bit
59 * tracking for hugetlbfs pages.
60 */
61 pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
62 return pte;
63}
64
65static inline pte_t huge_pte_wrprotect(pte_t pte)
66{
67 pte_val(pte) |= _PAGE_RO;
68 return pte;
69}
70
71static inline int huge_pte_none(pte_t pte)
72{
73 return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
74 !(pte_val(pte) & _SEGMENT_ENTRY_RO);
75}
76
77static inline pte_t huge_ptep_get(pte_t *ptep)
78{
79 pte_t pte = *ptep;
80 unsigned long mask;
81
82 if (!MACHINE_HAS_HPAGE) {
83 ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
84 if (ptep) {
85 mask = pte_val(pte) &
86 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
87 pte = pte_mkhuge(*ptep);
88 pte_val(pte) |= mask;
89 }
90 }
91 return pte;
92}
93
94static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
95 unsigned long addr, pte_t *ptep)
96{
97 pte_t pte = huge_ptep_get(ptep);
98
99 pmd_clear((pmd_t *) ptep);
100 return pte;
101}
102
103static inline void __pmd_csp(pmd_t *pmdp)
104{
105 register unsigned long reg2 asm("2") = pmd_val(*pmdp);
106 register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
107 _SEGMENT_ENTRY_INV;
108 register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
109
110 asm volatile(
111 " csp %1,%3"
112 : "=m" (*pmdp)
113 : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
114 pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
115}
116
117static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
118{
119 unsigned long sto = (unsigned long) pmdp -
120 pmd_index(address) * sizeof(pmd_t);
121
122 if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
123 asm volatile(
124 " .insn rrf,0xb98e0000,%2,%3,0,0"
125 : "=m" (*pmdp)
126 : "m" (*pmdp), "a" (sto),
127 "a" ((address & HPAGE_MASK))
128 );
129 }
130 pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
131}
132
133static inline void huge_ptep_invalidate(struct mm_struct *mm,
134 unsigned long address, pte_t *ptep)
135{
136 pmd_t *pmdp = (pmd_t *) ptep;
137
138 if (!MACHINE_HAS_IDTE) {
139 __pmd_csp(pmdp);
140 if (mm->context.noexec) {
141 pmdp = get_shadow_table(pmdp);
142 __pmd_csp(pmdp);
143 }
144 return;
145 }
146
147 __pmd_idte(address, pmdp);
148 if (mm->context.noexec) {
149 pmdp = get_shadow_table(pmdp);
150 __pmd_idte(address, pmdp);
151 }
152 return;
153}
154
155#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
156({ \
157 int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
158 if (__changed) { \
159 huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
160 set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
161 } \
162 __changed; \
163})
164
165#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
166({ \
167 pte_t __pte = huge_ptep_get(__ptep); \
168 if (pte_write(__pte)) { \
169 if (atomic_read(&(__mm)->mm_users) > 1 || \
170 (__mm) != current->active_mm) \
171 huge_ptep_invalidate(__mm, __addr, __ptep); \
172 set_huge_pte_at(__mm, __addr, __ptep, \
173 huge_pte_wrprotect(__pte)); \
174 } \
175})
176
177static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
178 unsigned long address, pte_t *ptep)
179{
180 huge_ptep_invalidate(vma->vm_mm, address, ptep);
181}
182
183#endif /* _ASM_S390_HUGETLB_H */
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index fe7f92b6ae6d..b01e6fc9a295 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -19,17 +19,34 @@
19#define PAGE_DEFAULT_ACC 0 19#define PAGE_DEFAULT_ACC 0
20#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) 20#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
21 21
22#define HPAGE_SHIFT 20
23#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
24#define HPAGE_MASK (~(HPAGE_SIZE - 1))
25#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
26
27#define ARCH_HAS_SETCLEAR_HUGE_PTE
28#define ARCH_HAS_HUGE_PTE_TYPE
29#define ARCH_HAS_PREPARE_HUGEPAGE
30#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
31
22#include <asm/setup.h> 32#include <asm/setup.h>
23#ifndef __ASSEMBLY__ 33#ifndef __ASSEMBLY__
24 34
25static inline void clear_page(void *page) 35static inline void clear_page(void *page)
26{ 36{
27 register unsigned long reg1 asm ("1") = 0; 37 if (MACHINE_HAS_PFMF) {
28 register void *reg2 asm ("2") = page; 38 asm volatile(
29 register unsigned long reg3 asm ("3") = 4096; 39 " .insn rre,0xb9af0000,%0,%1"
30 asm volatile( 40 : : "d" (0x10000), "a" (page) : "memory", "cc");
31 " mvcl 2,0" 41 } else {
32 : "+d" (reg2), "+d" (reg3) : "d" (reg1) : "memory", "cc"); 42 register unsigned long reg1 asm ("1") = 0;
43 register void *reg2 asm ("2") = page;
44 register unsigned long reg3 asm ("3") = 4096;
45 asm volatile(
46 " mvcl 2,0"
47 : "+d" (reg2), "+d" (reg3) : "d" (reg1)
48 : "memory", "cc");
49 }
33} 50}
34 51
35static inline void copy_page(void *to, void *from) 52static inline void copy_page(void *to, void *from)
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index f8347ce9c5a1..fd336f2e2a7a 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -234,6 +234,15 @@ extern char empty_zero_page[PAGE_SIZE];
234#define _PAGE_TYPE_EX_RW 0x002 234#define _PAGE_TYPE_EX_RW 0x002
235 235
236/* 236/*
237 * Only four types for huge pages, using the invalid bit and protection bit
238 * of a segment table entry.
239 */
240#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */
241#define _HPAGE_TYPE_NONE 0x220
242#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */
243#define _HPAGE_TYPE_RW 0x000
244
245/*
237 * PTE type bits are rather complicated. handle_pte_fault uses pte_present, 246 * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
238 * pte_none and pte_file to find out the pte type WITHOUT holding the page 247 * pte_none and pte_file to find out the pte type WITHOUT holding the page
239 * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to 248 * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
@@ -325,6 +334,9 @@ extern char empty_zero_page[PAGE_SIZE];
325#define _SEGMENT_ENTRY (0) 334#define _SEGMENT_ENTRY (0)
326#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) 335#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
327 336
337#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
338#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
339
328#endif /* __s390x__ */ 340#endif /* __s390x__ */
329 341
330/* 342/*
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 3a9e458fd8c3..ba69674012a7 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -69,6 +69,8 @@ extern unsigned long machine_flags;
69#define MACHINE_FLAG_DIAG9C (1UL << 7) 69#define MACHINE_FLAG_DIAG9C (1UL << 7)
70#define MACHINE_FLAG_MVCOS (1UL << 8) 70#define MACHINE_FLAG_MVCOS (1UL << 8)
71#define MACHINE_FLAG_KVM (1UL << 9) 71#define MACHINE_FLAG_KVM (1UL << 9)
72#define MACHINE_FLAG_HPAGE (1UL << 10)
73#define MACHINE_FLAG_PFMF (1UL << 11)
72 74
73#define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM) 75#define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM)
74#define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM) 76#define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM)
@@ -82,6 +84,8 @@ extern unsigned long machine_flags;
82#define MACHINE_HAS_DIAG44 (1) 84#define MACHINE_HAS_DIAG44 (1)
83#define MACHINE_HAS_MVPG (machine_flags & MACHINE_FLAG_MVPG) 85#define MACHINE_HAS_MVPG (machine_flags & MACHINE_FLAG_MVPG)
84#define MACHINE_HAS_MVCOS (0) 86#define MACHINE_HAS_MVCOS (0)
87#define MACHINE_HAS_HPAGE (0)
88#define MACHINE_HAS_PFMF (0)
85#else /* __s390x__ */ 89#else /* __s390x__ */
86#define MACHINE_HAS_IEEE (1) 90#define MACHINE_HAS_IEEE (1)
87#define MACHINE_HAS_CSP (1) 91#define MACHINE_HAS_CSP (1)
@@ -89,6 +93,8 @@ extern unsigned long machine_flags;
89#define MACHINE_HAS_DIAG44 (machine_flags & MACHINE_FLAG_DIAG44) 93#define MACHINE_HAS_DIAG44 (machine_flags & MACHINE_FLAG_DIAG44)
90#define MACHINE_HAS_MVPG (1) 94#define MACHINE_HAS_MVPG (1)
91#define MACHINE_HAS_MVCOS (machine_flags & MACHINE_FLAG_MVCOS) 95#define MACHINE_HAS_MVCOS (machine_flags & MACHINE_FLAG_MVCOS)
96#define MACHINE_HAS_HPAGE (machine_flags & MACHINE_FLAG_HPAGE)
97#define MACHINE_HAS_PFMF (machine_flags & MACHINE_FLAG_PFMF)
92#endif /* __s390x__ */ 98#endif /* __s390x__ */
93 99
94#define MACHINE_HAS_SCLP (!MACHINE_IS_P390) 100#define MACHINE_HAS_SCLP (!MACHINE_IS_P390)
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 9e57a93d7de1..d60394b9745e 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -2,6 +2,7 @@
2#define _S390_TLBFLUSH_H 2#define _S390_TLBFLUSH_H
3 3
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <linux/sched.h>
5#include <asm/processor.h> 6#include <asm/processor.h>
6#include <asm/pgalloc.h> 7#include <asm/pgalloc.h>
7 8