diff options
author | Gerald Schaefer <geraldsc@de.ibm.com> | 2008-04-30 07:38:46 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-04-30 07:38:47 -0400 |
commit | 53492b1de46a7576170e865062ffcfc93bb5650b (patch) | |
tree | bee94e5b2e8c19c1a094a25023cb82572707feb4 | |
parent | 2e5061e40af88070984e3769eafb5a06022375fd (diff) |
[S390] System z large page support.
This adds hugetlbfs support on System z, using both hardware large page
support if available and software large page emulation on older hardware.
Shared (large) page tables are implemented in software emulation mode,
by using page->index of the first tail page from a compound large page
to store page table information.
Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r-- | arch/s390/kernel/early.c | 16 | ||||
-rw-r--r-- | arch/s390/kernel/head64.S | 2 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 10 | ||||
-rw-r--r-- | arch/s390/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 134 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 23 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 55 | ||||
-rw-r--r-- | fs/Kconfig | 3 | ||||
-rw-r--r-- | include/asm-s390/hugetlb.h | 183 | ||||
-rw-r--r-- | include/asm-s390/page.h | 29 | ||||
-rw-r--r-- | include/asm-s390/pgtable.h | 12 | ||||
-rw-r--r-- | include/asm-s390/setup.h | 6 | ||||
-rw-r--r-- | include/asm-s390/tlbflush.h | 1 |
14 files changed, 437 insertions, 42 deletions
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index bd188f6bd0e2..d0e09684b9ce 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c | |||
@@ -268,6 +268,19 @@ static noinline __init void setup_lowcore_early(void) | |||
268 | s390_base_pgm_handler_fn = early_pgm_check_handler; | 268 | s390_base_pgm_handler_fn = early_pgm_check_handler; |
269 | } | 269 | } |
270 | 270 | ||
271 | static noinline __init void setup_hpage(void) | ||
272 | { | ||
273 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
274 | unsigned int facilities; | ||
275 | |||
276 | facilities = stfl(); | ||
277 | if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29))) | ||
278 | return; | ||
279 | machine_flags |= MACHINE_FLAG_HPAGE; | ||
280 | __ctl_set_bit(0, 23); | ||
281 | #endif | ||
282 | } | ||
283 | |||
271 | static __init void detect_mvpg(void) | 284 | static __init void detect_mvpg(void) |
272 | { | 285 | { |
273 | #ifndef CONFIG_64BIT | 286 | #ifndef CONFIG_64BIT |
@@ -360,6 +373,8 @@ static __init void detect_machine_facilities(void) | |||
360 | facilities = stfl(); | 373 | facilities = stfl(); |
361 | if (facilities & (1 << 28)) | 374 | if (facilities & (1 << 28)) |
362 | machine_flags |= MACHINE_FLAG_IDTE; | 375 | machine_flags |= MACHINE_FLAG_IDTE; |
376 | if (facilities & (1 << 23)) | ||
377 | machine_flags |= MACHINE_FLAG_PFMF; | ||
363 | if (facilities & (1 << 4)) | 378 | if (facilities & (1 << 4)) |
364 | machine_flags |= MACHINE_FLAG_MVCOS; | 379 | machine_flags |= MACHINE_FLAG_MVCOS; |
365 | #endif | 380 | #endif |
@@ -388,6 +403,7 @@ void __init startup_init(void) | |||
388 | detect_diag9c(); | 403 | detect_diag9c(); |
389 | detect_diag44(); | 404 | detect_diag44(); |
390 | detect_machine_facilities(); | 405 | detect_machine_facilities(); |
406 | setup_hpage(); | ||
391 | sclp_read_info_early(); | 407 | sclp_read_info_early(); |
392 | sclp_facilities_detect(); | 408 | sclp_facilities_detect(); |
393 | memsize = sclp_memory_detect(); | 409 | memsize = sclp_memory_detect(); |
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 9c2c6f7d37e7..1d06961e87b3 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S | |||
@@ -129,7 +129,7 @@ startup_continue: | |||
129 | # virtual and never return ... | 129 | # virtual and never return ... |
130 | .align 16 | 130 | .align 16 |
131 | .Lentry:.quad 0x0000000180000000,_stext | 131 | .Lentry:.quad 0x0000000180000000,_stext |
132 | .Lctl: .quad 0x04b50002 # cr0: various things | 132 | .Lctl: .quad 0x04350002 # cr0: various things |
133 | .quad 0 # cr1: primary space segment table | 133 | .quad 0 # cr1: primary space segment table |
134 | .quad .Lduct # cr2: dispatchable unit control table | 134 | .quad .Lduct # cr2: dispatchable unit control table |
135 | .quad 0 # cr3: instruction authorization | 135 | .quad 0 # cr3: instruction authorization |
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 694c6546ce64..2bc70b6e876a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c | |||
@@ -749,6 +749,9 @@ static void __init setup_hwcaps(void) | |||
749 | elf_hwcap |= 1UL << 6; | 749 | elf_hwcap |= 1UL << 6; |
750 | } | 750 | } |
751 | 751 | ||
752 | if (MACHINE_HAS_HPAGE) | ||
753 | elf_hwcap |= 1UL << 7; | ||
754 | |||
752 | switch (cpuinfo->cpu_id.machine) { | 755 | switch (cpuinfo->cpu_id.machine) { |
753 | case 0x9672: | 756 | case 0x9672: |
754 | #if !defined(CONFIG_64BIT) | 757 | #if !defined(CONFIG_64BIT) |
@@ -872,8 +875,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) | |||
872 | 875 | ||
873 | static int show_cpuinfo(struct seq_file *m, void *v) | 876 | static int show_cpuinfo(struct seq_file *m, void *v) |
874 | { | 877 | { |
875 | static const char *hwcap_str[7] = { | 878 | static const char *hwcap_str[8] = { |
876 | "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp" | 879 | "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", |
880 | "edat" | ||
877 | }; | 881 | }; |
878 | struct cpuinfo_S390 *cpuinfo; | 882 | struct cpuinfo_S390 *cpuinfo; |
879 | unsigned long n = (unsigned long) v - 1; | 883 | unsigned long n = (unsigned long) v - 1; |
@@ -888,7 +892,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
888 | num_online_cpus(), loops_per_jiffy/(500000/HZ), | 892 | num_online_cpus(), loops_per_jiffy/(500000/HZ), |
889 | (loops_per_jiffy/(5000/HZ))%100); | 893 | (loops_per_jiffy/(5000/HZ))%100); |
890 | seq_puts(m, "features\t: "); | 894 | seq_puts(m, "features\t: "); |
891 | for (i = 0; i < 7; i++) | 895 | for (i = 0; i < 8; i++) |
892 | if (hwcap_str[i] && (elf_hwcap & (1UL << i))) | 896 | if (hwcap_str[i] && (elf_hwcap & (1UL << i))) |
893 | seq_printf(m, "%s ", hwcap_str[i]); | 897 | seq_printf(m, "%s ", hwcap_str[i]); |
894 | seq_puts(m, "\n"); | 898 | seq_puts(m, "\n"); |
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 66401930f83e..fb988a48a754 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o | 5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o |
6 | obj-$(CONFIG_CMM) += cmm.o | 6 | obj-$(CONFIG_CMM) += cmm.o |
7 | 7 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2650f46001d0..4d537205e83c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/hardirq.h> | 28 | #include <linux/hardirq.h> |
29 | #include <linux/kprobes.h> | 29 | #include <linux/kprobes.h> |
30 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
31 | #include <linux/hugetlb.h> | ||
31 | #include <asm/system.h> | 32 | #include <asm/system.h> |
32 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
33 | #include <asm/s390_ext.h> | 34 | #include <asm/s390_ext.h> |
@@ -367,6 +368,8 @@ good_area: | |||
367 | } | 368 | } |
368 | 369 | ||
369 | survive: | 370 | survive: |
371 | if (is_vm_hugetlb_page(vma)) | ||
372 | address &= HPAGE_MASK; | ||
370 | /* | 373 | /* |
371 | * If for any reason at all we couldn't handle the fault, | 374 | * If for any reason at all we couldn't handle the fault, |
372 | * make sure we exit gracefully rather than endlessly redo | 375 | * make sure we exit gracefully rather than endlessly redo |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c new file mode 100644 index 000000000000..f4b6124fdb75 --- /dev/null +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * IBM System z Huge TLB Page Support for Kernel. | ||
3 | * | ||
4 | * Copyright 2007 IBM Corp. | ||
5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/mm.h> | ||
9 | #include <linux/hugetlb.h> | ||
10 | |||
11 | |||
12 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
13 | pte_t *pteptr, pte_t pteval) | ||
14 | { | ||
15 | pmd_t *pmdp = (pmd_t *) pteptr; | ||
16 | pte_t shadow_pteval = pteval; | ||
17 | unsigned long mask; | ||
18 | |||
19 | if (!MACHINE_HAS_HPAGE) { | ||
20 | pteptr = (pte_t *) pte_page(pteval)[1].index; | ||
21 | mask = pte_val(pteval) & | ||
22 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); | ||
23 | pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; | ||
24 | if (mm->context.noexec) { | ||
25 | pteptr += PTRS_PER_PTE; | ||
26 | pte_val(shadow_pteval) = | ||
27 | (_SEGMENT_ENTRY + __pa(pteptr)) | mask; | ||
28 | } | ||
29 | } | ||
30 | |||
31 | pmd_val(*pmdp) = pte_val(pteval); | ||
32 | if (mm->context.noexec) { | ||
33 | pmdp = get_shadow_table(pmdp); | ||
34 | pmd_val(*pmdp) = pte_val(shadow_pteval); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | int arch_prepare_hugepage(struct page *page) | ||
39 | { | ||
40 | unsigned long addr = page_to_phys(page); | ||
41 | pte_t pte; | ||
42 | pte_t *ptep; | ||
43 | int i; | ||
44 | |||
45 | if (MACHINE_HAS_HPAGE) | ||
46 | return 0; | ||
47 | |||
48 | ptep = (pte_t *) pte_alloc_one(&init_mm, address); | ||
49 | if (!ptep) | ||
50 | return -ENOMEM; | ||
51 | |||
52 | pte = mk_pte(page, PAGE_RW); | ||
53 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
54 | set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); | ||
55 | pte_val(pte) += PAGE_SIZE; | ||
56 | } | ||
57 | page[1].index = (unsigned long) ptep; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | void arch_release_hugepage(struct page *page) | ||
62 | { | ||
63 | pte_t *ptep; | ||
64 | |||
65 | if (MACHINE_HAS_HPAGE) | ||
66 | return; | ||
67 | |||
68 | ptep = (pte_t *) page[1].index; | ||
69 | if (!ptep) | ||
70 | return; | ||
71 | pte_free(&init_mm, ptep); | ||
72 | page[1].index = 0; | ||
73 | } | ||
74 | |||
75 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | ||
76 | { | ||
77 | pgd_t *pgdp; | ||
78 | pud_t *pudp; | ||
79 | pmd_t *pmdp = NULL; | ||
80 | |||
81 | pgdp = pgd_offset(mm, addr); | ||
82 | pudp = pud_alloc(mm, pgdp, addr); | ||
83 | if (pudp) | ||
84 | pmdp = pmd_alloc(mm, pudp, addr); | ||
85 | return (pte_t *) pmdp; | ||
86 | } | ||
87 | |||
88 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
89 | { | ||
90 | pgd_t *pgdp; | ||
91 | pud_t *pudp; | ||
92 | pmd_t *pmdp = NULL; | ||
93 | |||
94 | pgdp = pgd_offset(mm, addr); | ||
95 | if (pgd_present(*pgdp)) { | ||
96 | pudp = pud_offset(pgdp, addr); | ||
97 | if (pud_present(*pudp)) | ||
98 | pmdp = pmd_offset(pudp, addr); | ||
99 | } | ||
100 | return (pte_t *) pmdp; | ||
101 | } | ||
102 | |||
103 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
104 | { | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
109 | int write) | ||
110 | { | ||
111 | return ERR_PTR(-EINVAL); | ||
112 | } | ||
113 | |||
114 | int pmd_huge(pmd_t pmd) | ||
115 | { | ||
116 | if (!MACHINE_HAS_HPAGE) | ||
117 | return 0; | ||
118 | |||
119 | return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); | ||
120 | } | ||
121 | |||
122 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
123 | pmd_t *pmdp, int write) | ||
124 | { | ||
125 | struct page *page; | ||
126 | |||
127 | if (!MACHINE_HAS_HPAGE) | ||
128 | return NULL; | ||
129 | |||
130 | page = pmd_page(*pmdp); | ||
131 | if (page) | ||
132 | page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); | ||
133 | return page; | ||
134 | } | ||
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 202c952a29b4..acc92f46a096 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -77,28 +77,6 @@ void show_mem(void) | |||
77 | printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); | 77 | printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); |
78 | } | 78 | } |
79 | 79 | ||
80 | static void __init setup_ro_region(void) | ||
81 | { | ||
82 | pgd_t *pgd; | ||
83 | pud_t *pud; | ||
84 | pmd_t *pmd; | ||
85 | pte_t *pte; | ||
86 | pte_t new_pte; | ||
87 | unsigned long address, end; | ||
88 | |||
89 | address = ((unsigned long)&_stext) & PAGE_MASK; | ||
90 | end = PFN_ALIGN((unsigned long)&_eshared); | ||
91 | |||
92 | for (; address < end; address += PAGE_SIZE) { | ||
93 | pgd = pgd_offset_k(address); | ||
94 | pud = pud_offset(pgd, address); | ||
95 | pmd = pmd_offset(pud, address); | ||
96 | pte = pte_offset_kernel(pmd, address); | ||
97 | new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); | ||
98 | *pte = new_pte; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | /* | 80 | /* |
103 | * paging_init() sets up the page tables | 81 | * paging_init() sets up the page tables |
104 | */ | 82 | */ |
@@ -121,7 +99,6 @@ void __init paging_init(void) | |||
121 | clear_table((unsigned long *) init_mm.pgd, pgd_type, | 99 | clear_table((unsigned long *) init_mm.pgd, pgd_type, |
122 | sizeof(unsigned long)*2048); | 100 | sizeof(unsigned long)*2048); |
123 | vmem_map_init(); | 101 | vmem_map_init(); |
124 | setup_ro_region(); | ||
125 | 102 | ||
126 | /* enable virtual mapping in kernel mode */ | 103 | /* enable virtual mapping in kernel mode */ |
127 | __ctl_load(S390_lowcore.kernel_asce, 1, 1); | 104 | __ctl_load(S390_lowcore.kernel_asce, 1, 1); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3ffc0211dc85..97bce6c97574 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -10,10 +10,12 @@ | |||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/hugetlb.h> | ||
13 | #include <asm/pgalloc.h> | 14 | #include <asm/pgalloc.h> |
14 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
15 | #include <asm/setup.h> | 16 | #include <asm/setup.h> |
16 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | #include <asm/sections.h> | ||
17 | 19 | ||
18 | static DEFINE_MUTEX(vmem_mutex); | 20 | static DEFINE_MUTEX(vmem_mutex); |
19 | 21 | ||
@@ -113,7 +115,7 @@ static pte_t __init_refok *vmem_pte_alloc(void) | |||
113 | /* | 115 | /* |
114 | * Add a physical memory range to the 1:1 mapping. | 116 | * Add a physical memory range to the 1:1 mapping. |
115 | */ | 117 | */ |
116 | static int vmem_add_range(unsigned long start, unsigned long size) | 118 | static int vmem_add_range(unsigned long start, unsigned long size, int ro) |
117 | { | 119 | { |
118 | unsigned long address; | 120 | unsigned long address; |
119 | pgd_t *pg_dir; | 121 | pgd_t *pg_dir; |
@@ -140,7 +142,19 @@ static int vmem_add_range(unsigned long start, unsigned long size) | |||
140 | pud_populate_kernel(&init_mm, pu_dir, pm_dir); | 142 | pud_populate_kernel(&init_mm, pu_dir, pm_dir); |
141 | } | 143 | } |
142 | 144 | ||
145 | pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); | ||
143 | pm_dir = pmd_offset(pu_dir, address); | 146 | pm_dir = pmd_offset(pu_dir, address); |
147 | |||
148 | #ifdef __s390x__ | ||
149 | if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && | ||
150 | (address + HPAGE_SIZE <= start + size) && | ||
151 | (address >= HPAGE_SIZE)) { | ||
152 | pte_val(pte) |= _SEGMENT_ENTRY_LARGE; | ||
153 | pmd_val(*pm_dir) = pte_val(pte); | ||
154 | address += HPAGE_SIZE - PAGE_SIZE; | ||
155 | continue; | ||
156 | } | ||
157 | #endif | ||
144 | if (pmd_none(*pm_dir)) { | 158 | if (pmd_none(*pm_dir)) { |
145 | pt_dir = vmem_pte_alloc(); | 159 | pt_dir = vmem_pte_alloc(); |
146 | if (!pt_dir) | 160 | if (!pt_dir) |
@@ -149,7 +163,6 @@ static int vmem_add_range(unsigned long start, unsigned long size) | |||
149 | } | 163 | } |
150 | 164 | ||
151 | pt_dir = pte_offset_kernel(pm_dir, address); | 165 | pt_dir = pte_offset_kernel(pm_dir, address); |
152 | pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); | ||
153 | *pt_dir = pte; | 166 | *pt_dir = pte; |
154 | } | 167 | } |
155 | ret = 0; | 168 | ret = 0; |
@@ -180,6 +193,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size) | |||
180 | pm_dir = pmd_offset(pu_dir, address); | 193 | pm_dir = pmd_offset(pu_dir, address); |
181 | if (pmd_none(*pm_dir)) | 194 | if (pmd_none(*pm_dir)) |
182 | continue; | 195 | continue; |
196 | |||
197 | if (pmd_huge(*pm_dir)) { | ||
198 | pmd_clear_kernel(pm_dir); | ||
199 | address += HPAGE_SIZE - PAGE_SIZE; | ||
200 | continue; | ||
201 | } | ||
202 | |||
183 | pt_dir = pte_offset_kernel(pm_dir, address); | 203 | pt_dir = pte_offset_kernel(pm_dir, address); |
184 | *pt_dir = pte; | 204 | *pt_dir = pte; |
185 | } | 205 | } |
@@ -248,14 +268,14 @@ out: | |||
248 | return ret; | 268 | return ret; |
249 | } | 269 | } |
250 | 270 | ||
251 | static int vmem_add_mem(unsigned long start, unsigned long size) | 271 | static int vmem_add_mem(unsigned long start, unsigned long size, int ro) |
252 | { | 272 | { |
253 | int ret; | 273 | int ret; |
254 | 274 | ||
255 | ret = vmem_add_mem_map(start, size); | 275 | ret = vmem_add_mem_map(start, size); |
256 | if (ret) | 276 | if (ret) |
257 | return ret; | 277 | return ret; |
258 | return vmem_add_range(start, size); | 278 | return vmem_add_range(start, size, ro); |
259 | } | 279 | } |
260 | 280 | ||
261 | /* | 281 | /* |
@@ -338,7 +358,7 @@ int add_shared_memory(unsigned long start, unsigned long size) | |||
338 | if (ret) | 358 | if (ret) |
339 | goto out_free; | 359 | goto out_free; |
340 | 360 | ||
341 | ret = vmem_add_mem(start, size); | 361 | ret = vmem_add_mem(start, size, 0); |
342 | if (ret) | 362 | if (ret) |
343 | goto out_remove; | 363 | goto out_remove; |
344 | 364 | ||
@@ -374,14 +394,35 @@ out: | |||
374 | */ | 394 | */ |
375 | void __init vmem_map_init(void) | 395 | void __init vmem_map_init(void) |
376 | { | 396 | { |
397 | unsigned long ro_start, ro_end; | ||
398 | unsigned long start, end; | ||
377 | int i; | 399 | int i; |
378 | 400 | ||
379 | INIT_LIST_HEAD(&init_mm.context.crst_list); | 401 | INIT_LIST_HEAD(&init_mm.context.crst_list); |
380 | INIT_LIST_HEAD(&init_mm.context.pgtable_list); | 402 | INIT_LIST_HEAD(&init_mm.context.pgtable_list); |
381 | init_mm.context.noexec = 0; | 403 | init_mm.context.noexec = 0; |
382 | NODE_DATA(0)->node_mem_map = VMEM_MAP; | 404 | NODE_DATA(0)->node_mem_map = VMEM_MAP; |
383 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) | 405 | ro_start = ((unsigned long)&_stext) & PAGE_MASK; |
384 | vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); | 406 | ro_end = PFN_ALIGN((unsigned long)&_eshared); |
407 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { | ||
408 | start = memory_chunk[i].addr; | ||
409 | end = memory_chunk[i].addr + memory_chunk[i].size; | ||
410 | if (start >= ro_end || end <= ro_start) | ||
411 | vmem_add_mem(start, end - start, 0); | ||
412 | else if (start >= ro_start && end <= ro_end) | ||
413 | vmem_add_mem(start, end - start, 1); | ||
414 | else if (start >= ro_start) { | ||
415 | vmem_add_mem(start, ro_end - start, 1); | ||
416 | vmem_add_mem(ro_end, end - ro_end, 0); | ||
417 | } else if (end < ro_end) { | ||
418 | vmem_add_mem(start, ro_start - start, 0); | ||
419 | vmem_add_mem(ro_start, end - ro_start, 1); | ||
420 | } else { | ||
421 | vmem_add_mem(start, ro_start - start, 0); | ||
422 | vmem_add_mem(ro_start, ro_end - ro_start, 1); | ||
423 | vmem_add_mem(ro_end, end - ro_end, 0); | ||
424 | } | ||
425 | } | ||
385 | } | 426 | } |
386 | 427 | ||
387 | /* | 428 | /* |
diff --git a/fs/Kconfig b/fs/Kconfig index 2e43d46f65d6..cf12c403b8c7 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL | |||
1005 | 1005 | ||
1006 | config HUGETLBFS | 1006 | config HUGETLBFS |
1007 | bool "HugeTLB file system support" | 1007 | bool "HugeTLB file system support" |
1008 | depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN | 1008 | depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ |
1009 | (S390 && 64BIT) || BROKEN | ||
1009 | help | 1010 | help |
1010 | hugetlbfs is a filesystem backing for HugeTLB pages, based on | 1011 | hugetlbfs is a filesystem backing for HugeTLB pages, based on |
1011 | ramfs. For architectures that support it, say Y here and read | 1012 | ramfs. For architectures that support it, say Y here and read |
diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h new file mode 100644 index 000000000000..600a776f8f75 --- /dev/null +++ b/include/asm-s390/hugetlb.h | |||
@@ -0,0 +1,183 @@ | |||
1 | /* | ||
2 | * IBM System z Huge TLB Page Support for Kernel. | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> | ||
6 | */ | ||
7 | |||
8 | #ifndef _ASM_S390_HUGETLB_H | ||
9 | #define _ASM_S390_HUGETLB_H | ||
10 | |||
11 | #include <asm/page.h> | ||
12 | #include <asm/pgtable.h> | ||
13 | |||
14 | |||
15 | #define is_hugepage_only_range(mm, addr, len) 0 | ||
16 | #define hugetlb_free_pgd_range free_pgd_range | ||
17 | |||
18 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
19 | pte_t *ptep, pte_t pte); | ||
20 | |||
21 | /* | ||
22 | * If the arch doesn't supply something else, assume that hugepage | ||
23 | * size aligned regions are ok without further preparation. | ||
24 | */ | ||
25 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | ||
26 | { | ||
27 | if (len & ~HPAGE_MASK) | ||
28 | return -EINVAL; | ||
29 | if (addr & ~HPAGE_MASK) | ||
30 | return -EINVAL; | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | #define hugetlb_prefault_arch_hook(mm) do { } while (0) | ||
35 | |||
36 | int arch_prepare_hugepage(struct page *page); | ||
37 | void arch_release_hugepage(struct page *page); | ||
38 | |||
39 | static inline pte_t pte_mkhuge(pte_t pte) | ||
40 | { | ||
41 | /* | ||
42 | * PROT_NONE needs to be remapped from the pte type to the ste type. | ||
43 | * The HW invalid bit is also different for pte and ste. The pte | ||
44 | * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE | ||
45 | * bit, so we don't have to clear it. | ||
46 | */ | ||
47 | if (pte_val(pte) & _PAGE_INVALID) { | ||
48 | if (pte_val(pte) & _PAGE_SWT) | ||
49 | pte_val(pte) |= _HPAGE_TYPE_NONE; | ||
50 | pte_val(pte) |= _SEGMENT_ENTRY_INV; | ||
51 | } | ||
52 | /* | ||
53 | * Clear SW pte bits SWT and SWX, there are no SW bits in a segment | ||
54 | * table entry. | ||
55 | */ | ||
56 | pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); | ||
57 | /* | ||
58 | * Also set the change-override bit because we don't need dirty bit | ||
59 | * tracking for hugetlbfs pages. | ||
60 | */ | ||
61 | pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); | ||
62 | return pte; | ||
63 | } | ||
64 | |||
65 | static inline pte_t huge_pte_wrprotect(pte_t pte) | ||
66 | { | ||
67 | pte_val(pte) |= _PAGE_RO; | ||
68 | return pte; | ||
69 | } | ||
70 | |||
71 | static inline int huge_pte_none(pte_t pte) | ||
72 | { | ||
73 | return (pte_val(pte) & _SEGMENT_ENTRY_INV) && | ||
74 | !(pte_val(pte) & _SEGMENT_ENTRY_RO); | ||
75 | } | ||
76 | |||
77 | static inline pte_t huge_ptep_get(pte_t *ptep) | ||
78 | { | ||
79 | pte_t pte = *ptep; | ||
80 | unsigned long mask; | ||
81 | |||
82 | if (!MACHINE_HAS_HPAGE) { | ||
83 | ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); | ||
84 | if (ptep) { | ||
85 | mask = pte_val(pte) & | ||
86 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); | ||
87 | pte = pte_mkhuge(*ptep); | ||
88 | pte_val(pte) |= mask; | ||
89 | } | ||
90 | } | ||
91 | return pte; | ||
92 | } | ||
93 | |||
94 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | ||
95 | unsigned long addr, pte_t *ptep) | ||
96 | { | ||
97 | pte_t pte = huge_ptep_get(ptep); | ||
98 | |||
99 | pmd_clear((pmd_t *) ptep); | ||
100 | return pte; | ||
101 | } | ||
102 | |||
103 | static inline void __pmd_csp(pmd_t *pmdp) | ||
104 | { | ||
105 | register unsigned long reg2 asm("2") = pmd_val(*pmdp); | ||
106 | register unsigned long reg3 asm("3") = pmd_val(*pmdp) | | ||
107 | _SEGMENT_ENTRY_INV; | ||
108 | register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; | ||
109 | |||
110 | asm volatile( | ||
111 | " csp %1,%3" | ||
112 | : "=m" (*pmdp) | ||
113 | : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); | ||
114 | pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; | ||
115 | } | ||
116 | |||
117 | static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) | ||
118 | { | ||
119 | unsigned long sto = (unsigned long) pmdp - | ||
120 | pmd_index(address) * sizeof(pmd_t); | ||
121 | |||
122 | if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { | ||
123 | asm volatile( | ||
124 | " .insn rrf,0xb98e0000,%2,%3,0,0" | ||
125 | : "=m" (*pmdp) | ||
126 | : "m" (*pmdp), "a" (sto), | ||
127 | "a" ((address & HPAGE_MASK)) | ||
128 | ); | ||
129 | } | ||
130 | pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; | ||
131 | } | ||
132 | |||
133 | static inline void huge_ptep_invalidate(struct mm_struct *mm, | ||
134 | unsigned long address, pte_t *ptep) | ||
135 | { | ||
136 | pmd_t *pmdp = (pmd_t *) ptep; | ||
137 | |||
138 | if (!MACHINE_HAS_IDTE) { | ||
139 | __pmd_csp(pmdp); | ||
140 | if (mm->context.noexec) { | ||
141 | pmdp = get_shadow_table(pmdp); | ||
142 | __pmd_csp(pmdp); | ||
143 | } | ||
144 | return; | ||
145 | } | ||
146 | |||
147 | __pmd_idte(address, pmdp); | ||
148 | if (mm->context.noexec) { | ||
149 | pmdp = get_shadow_table(pmdp); | ||
150 | __pmd_idte(address, pmdp); | ||
151 | } | ||
152 | return; | ||
153 | } | ||
154 | |||
155 | #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ | ||
156 | ({ \ | ||
157 | int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ | ||
158 | if (__changed) { \ | ||
159 | huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ | ||
160 | set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ | ||
161 | } \ | ||
162 | __changed; \ | ||
163 | }) | ||
164 | |||
165 | #define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ | ||
166 | ({ \ | ||
167 | pte_t __pte = huge_ptep_get(__ptep); \ | ||
168 | if (pte_write(__pte)) { \ | ||
169 | if (atomic_read(&(__mm)->mm_users) > 1 || \ | ||
170 | (__mm) != current->active_mm) \ | ||
171 | huge_ptep_invalidate(__mm, __addr, __ptep); \ | ||
172 | set_huge_pte_at(__mm, __addr, __ptep, \ | ||
173 | huge_pte_wrprotect(__pte)); \ | ||
174 | } \ | ||
175 | }) | ||
176 | |||
177 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, | ||
178 | unsigned long address, pte_t *ptep) | ||
179 | { | ||
180 | huge_ptep_invalidate(vma->vm_mm, address, ptep); | ||
181 | } | ||
182 | |||
183 | #endif /* _ASM_S390_HUGETLB_H */ | ||
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index fe7f92b6ae6d..b01e6fc9a295 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h | |||
@@ -19,17 +19,34 @@ | |||
19 | #define PAGE_DEFAULT_ACC 0 | 19 | #define PAGE_DEFAULT_ACC 0 |
20 | #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) | 20 | #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) |
21 | 21 | ||
22 | #define HPAGE_SHIFT 20 | ||
23 | #define HPAGE_SIZE (1UL << HPAGE_SHIFT) | ||
24 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | ||
25 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | ||
26 | |||
27 | #define ARCH_HAS_SETCLEAR_HUGE_PTE | ||
28 | #define ARCH_HAS_HUGE_PTE_TYPE | ||
29 | #define ARCH_HAS_PREPARE_HUGEPAGE | ||
30 | #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH | ||
31 | |||
22 | #include <asm/setup.h> | 32 | #include <asm/setup.h> |
23 | #ifndef __ASSEMBLY__ | 33 | #ifndef __ASSEMBLY__ |
24 | 34 | ||
25 | static inline void clear_page(void *page) | 35 | static inline void clear_page(void *page) |
26 | { | 36 | { |
27 | register unsigned long reg1 asm ("1") = 0; | 37 | if (MACHINE_HAS_PFMF) { |
28 | register void *reg2 asm ("2") = page; | 38 | asm volatile( |
29 | register unsigned long reg3 asm ("3") = 4096; | 39 | " .insn rre,0xb9af0000,%0,%1" |
30 | asm volatile( | 40 | : : "d" (0x10000), "a" (page) : "memory", "cc"); |
31 | " mvcl 2,0" | 41 | } else { |
32 | : "+d" (reg2), "+d" (reg3) : "d" (reg1) : "memory", "cc"); | 42 | register unsigned long reg1 asm ("1") = 0; |
43 | register void *reg2 asm ("2") = page; | ||
44 | register unsigned long reg3 asm ("3") = 4096; | ||
45 | asm volatile( | ||
46 | " mvcl 2,0" | ||
47 | : "+d" (reg2), "+d" (reg3) : "d" (reg1) | ||
48 | : "memory", "cc"); | ||
49 | } | ||
33 | } | 50 | } |
34 | 51 | ||
35 | static inline void copy_page(void *to, void *from) | 52 | static inline void copy_page(void *to, void *from) |
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index f8347ce9c5a1..fd336f2e2a7a 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h | |||
@@ -234,6 +234,15 @@ extern char empty_zero_page[PAGE_SIZE]; | |||
234 | #define _PAGE_TYPE_EX_RW 0x002 | 234 | #define _PAGE_TYPE_EX_RW 0x002 |
235 | 235 | ||
236 | /* | 236 | /* |
237 | * Only four types for huge pages, using the invalid bit and protection bit | ||
238 | * of a segment table entry. | ||
239 | */ | ||
240 | #define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ | ||
241 | #define _HPAGE_TYPE_NONE 0x220 | ||
242 | #define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ | ||
243 | #define _HPAGE_TYPE_RW 0x000 | ||
244 | |||
245 | /* | ||
237 | * PTE type bits are rather complicated. handle_pte_fault uses pte_present, | 246 | * PTE type bits are rather complicated. handle_pte_fault uses pte_present, |
238 | * pte_none and pte_file to find out the pte type WITHOUT holding the page | 247 | * pte_none and pte_file to find out the pte type WITHOUT holding the page |
239 | * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to | 248 | * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to |
@@ -325,6 +334,9 @@ extern char empty_zero_page[PAGE_SIZE]; | |||
325 | #define _SEGMENT_ENTRY (0) | 334 | #define _SEGMENT_ENTRY (0) |
326 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) | 335 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) |
327 | 336 | ||
337 | #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ | ||
338 | #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ | ||
339 | |||
328 | #endif /* __s390x__ */ | 340 | #endif /* __s390x__ */ |
329 | 341 | ||
330 | /* | 342 | /* |
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 3a9e458fd8c3..ba69674012a7 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h | |||
@@ -69,6 +69,8 @@ extern unsigned long machine_flags; | |||
69 | #define MACHINE_FLAG_DIAG9C (1UL << 7) | 69 | #define MACHINE_FLAG_DIAG9C (1UL << 7) |
70 | #define MACHINE_FLAG_MVCOS (1UL << 8) | 70 | #define MACHINE_FLAG_MVCOS (1UL << 8) |
71 | #define MACHINE_FLAG_KVM (1UL << 9) | 71 | #define MACHINE_FLAG_KVM (1UL << 9) |
72 | #define MACHINE_FLAG_HPAGE (1UL << 10) | ||
73 | #define MACHINE_FLAG_PFMF (1UL << 11) | ||
72 | 74 | ||
73 | #define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM) | 75 | #define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM) |
74 | #define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM) | 76 | #define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM) |
@@ -82,6 +84,8 @@ extern unsigned long machine_flags; | |||
82 | #define MACHINE_HAS_DIAG44 (1) | 84 | #define MACHINE_HAS_DIAG44 (1) |
83 | #define MACHINE_HAS_MVPG (machine_flags & MACHINE_FLAG_MVPG) | 85 | #define MACHINE_HAS_MVPG (machine_flags & MACHINE_FLAG_MVPG) |
84 | #define MACHINE_HAS_MVCOS (0) | 86 | #define MACHINE_HAS_MVCOS (0) |
87 | #define MACHINE_HAS_HPAGE (0) | ||
88 | #define MACHINE_HAS_PFMF (0) | ||
85 | #else /* __s390x__ */ | 89 | #else /* __s390x__ */ |
86 | #define MACHINE_HAS_IEEE (1) | 90 | #define MACHINE_HAS_IEEE (1) |
87 | #define MACHINE_HAS_CSP (1) | 91 | #define MACHINE_HAS_CSP (1) |
@@ -89,6 +93,8 @@ extern unsigned long machine_flags; | |||
89 | #define MACHINE_HAS_DIAG44 (machine_flags & MACHINE_FLAG_DIAG44) | 93 | #define MACHINE_HAS_DIAG44 (machine_flags & MACHINE_FLAG_DIAG44) |
90 | #define MACHINE_HAS_MVPG (1) | 94 | #define MACHINE_HAS_MVPG (1) |
91 | #define MACHINE_HAS_MVCOS (machine_flags & MACHINE_FLAG_MVCOS) | 95 | #define MACHINE_HAS_MVCOS (machine_flags & MACHINE_FLAG_MVCOS) |
96 | #define MACHINE_HAS_HPAGE (machine_flags & MACHINE_FLAG_HPAGE) | ||
97 | #define MACHINE_HAS_PFMF (machine_flags & MACHINE_FLAG_PFMF) | ||
92 | #endif /* __s390x__ */ | 98 | #endif /* __s390x__ */ |
93 | 99 | ||
94 | #define MACHINE_HAS_SCLP (!MACHINE_IS_P390) | 100 | #define MACHINE_HAS_SCLP (!MACHINE_IS_P390) |
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h index 9e57a93d7de1..d60394b9745e 100644 --- a/include/asm-s390/tlbflush.h +++ b/include/asm-s390/tlbflush.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _S390_TLBFLUSH_H | 2 | #define _S390_TLBFLUSH_H |
3 | 3 | ||
4 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
5 | #include <linux/sched.h> | ||
5 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
6 | #include <asm/pgalloc.h> | 7 | #include <asm/pgalloc.h> |
7 | 8 | ||