diff options
author | Gerald Schaefer <geraldsc@de.ibm.com> | 2008-04-30 07:38:46 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2008-04-30 07:38:47 -0400 |
commit | 53492b1de46a7576170e865062ffcfc93bb5650b (patch) | |
tree | bee94e5b2e8c19c1a094a25023cb82572707feb4 /arch/s390/mm | |
parent | 2e5061e40af88070984e3769eafb5a06022375fd (diff) |
[S390] System z large page support.
This adds hugetlbfs support on System z, using both hardware large page
support if available and software large page emulation on older hardware.
Shared (large) page tables are implemented in software emulation mode,
by using page->index of the first tail page from a compound large page
to store page table information.
Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 134 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 23 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 55 |
5 files changed, 186 insertions, 31 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 66401930f83e..fb988a48a754 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o | 5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o |
6 | obj-$(CONFIG_CMM) += cmm.o | 6 | obj-$(CONFIG_CMM) += cmm.o |
7 | 7 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2650f46001d0..4d537205e83c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/hardirq.h> | 28 | #include <linux/hardirq.h> |
29 | #include <linux/kprobes.h> | 29 | #include <linux/kprobes.h> |
30 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
31 | #include <linux/hugetlb.h> | ||
31 | #include <asm/system.h> | 32 | #include <asm/system.h> |
32 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
33 | #include <asm/s390_ext.h> | 34 | #include <asm/s390_ext.h> |
@@ -367,6 +368,8 @@ good_area: | |||
367 | } | 368 | } |
368 | 369 | ||
369 | survive: | 370 | survive: |
371 | if (is_vm_hugetlb_page(vma)) | ||
372 | address &= HPAGE_MASK; | ||
370 | /* | 373 | /* |
371 | * If for any reason at all we couldn't handle the fault, | 374 | * If for any reason at all we couldn't handle the fault, |
372 | * make sure we exit gracefully rather than endlessly redo | 375 | * make sure we exit gracefully rather than endlessly redo |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c new file mode 100644 index 000000000000..f4b6124fdb75 --- /dev/null +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * IBM System z Huge TLB Page Support for Kernel. | ||
3 | * | ||
4 | * Copyright 2007 IBM Corp. | ||
5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/mm.h> | ||
9 | #include <linux/hugetlb.h> | ||
10 | |||
11 | |||
12 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
13 | pte_t *pteptr, pte_t pteval) | ||
14 | { | ||
15 | pmd_t *pmdp = (pmd_t *) pteptr; | ||
16 | pte_t shadow_pteval = pteval; | ||
17 | unsigned long mask; | ||
18 | |||
19 | if (!MACHINE_HAS_HPAGE) { | ||
20 | pteptr = (pte_t *) pte_page(pteval)[1].index; | ||
21 | mask = pte_val(pteval) & | ||
22 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); | ||
23 | pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; | ||
24 | if (mm->context.noexec) { | ||
25 | pteptr += PTRS_PER_PTE; | ||
26 | pte_val(shadow_pteval) = | ||
27 | (_SEGMENT_ENTRY + __pa(pteptr)) | mask; | ||
28 | } | ||
29 | } | ||
30 | |||
31 | pmd_val(*pmdp) = pte_val(pteval); | ||
32 | if (mm->context.noexec) { | ||
33 | pmdp = get_shadow_table(pmdp); | ||
34 | pmd_val(*pmdp) = pte_val(shadow_pteval); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | int arch_prepare_hugepage(struct page *page) | ||
39 | { | ||
40 | unsigned long addr = page_to_phys(page); | ||
41 | pte_t pte; | ||
42 | pte_t *ptep; | ||
43 | int i; | ||
44 | |||
45 | if (MACHINE_HAS_HPAGE) | ||
46 | return 0; | ||
47 | |||
48 | ptep = (pte_t *) pte_alloc_one(&init_mm, address); | ||
49 | if (!ptep) | ||
50 | return -ENOMEM; | ||
51 | |||
52 | pte = mk_pte(page, PAGE_RW); | ||
53 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
54 | set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); | ||
55 | pte_val(pte) += PAGE_SIZE; | ||
56 | } | ||
57 | page[1].index = (unsigned long) ptep; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | void arch_release_hugepage(struct page *page) | ||
62 | { | ||
63 | pte_t *ptep; | ||
64 | |||
65 | if (MACHINE_HAS_HPAGE) | ||
66 | return; | ||
67 | |||
68 | ptep = (pte_t *) page[1].index; | ||
69 | if (!ptep) | ||
70 | return; | ||
71 | pte_free(&init_mm, ptep); | ||
72 | page[1].index = 0; | ||
73 | } | ||
74 | |||
75 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | ||
76 | { | ||
77 | pgd_t *pgdp; | ||
78 | pud_t *pudp; | ||
79 | pmd_t *pmdp = NULL; | ||
80 | |||
81 | pgdp = pgd_offset(mm, addr); | ||
82 | pudp = pud_alloc(mm, pgdp, addr); | ||
83 | if (pudp) | ||
84 | pmdp = pmd_alloc(mm, pudp, addr); | ||
85 | return (pte_t *) pmdp; | ||
86 | } | ||
87 | |||
88 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
89 | { | ||
90 | pgd_t *pgdp; | ||
91 | pud_t *pudp; | ||
92 | pmd_t *pmdp = NULL; | ||
93 | |||
94 | pgdp = pgd_offset(mm, addr); | ||
95 | if (pgd_present(*pgdp)) { | ||
96 | pudp = pud_offset(pgdp, addr); | ||
97 | if (pud_present(*pudp)) | ||
98 | pmdp = pmd_offset(pudp, addr); | ||
99 | } | ||
100 | return (pte_t *) pmdp; | ||
101 | } | ||
102 | |||
103 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
104 | { | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
109 | int write) | ||
110 | { | ||
111 | return ERR_PTR(-EINVAL); | ||
112 | } | ||
113 | |||
114 | int pmd_huge(pmd_t pmd) | ||
115 | { | ||
116 | if (!MACHINE_HAS_HPAGE) | ||
117 | return 0; | ||
118 | |||
119 | return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); | ||
120 | } | ||
121 | |||
122 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
123 | pmd_t *pmdp, int write) | ||
124 | { | ||
125 | struct page *page; | ||
126 | |||
127 | if (!MACHINE_HAS_HPAGE) | ||
128 | return NULL; | ||
129 | |||
130 | page = pmd_page(*pmdp); | ||
131 | if (page) | ||
132 | page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); | ||
133 | return page; | ||
134 | } | ||
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 202c952a29b4..acc92f46a096 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -77,28 +77,6 @@ void show_mem(void) | |||
77 | printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); | 77 | printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); |
78 | } | 78 | } |
79 | 79 | ||
80 | static void __init setup_ro_region(void) | ||
81 | { | ||
82 | pgd_t *pgd; | ||
83 | pud_t *pud; | ||
84 | pmd_t *pmd; | ||
85 | pte_t *pte; | ||
86 | pte_t new_pte; | ||
87 | unsigned long address, end; | ||
88 | |||
89 | address = ((unsigned long)&_stext) & PAGE_MASK; | ||
90 | end = PFN_ALIGN((unsigned long)&_eshared); | ||
91 | |||
92 | for (; address < end; address += PAGE_SIZE) { | ||
93 | pgd = pgd_offset_k(address); | ||
94 | pud = pud_offset(pgd, address); | ||
95 | pmd = pmd_offset(pud, address); | ||
96 | pte = pte_offset_kernel(pmd, address); | ||
97 | new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); | ||
98 | *pte = new_pte; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | /* | 80 | /* |
103 | * paging_init() sets up the page tables | 81 | * paging_init() sets up the page tables |
104 | */ | 82 | */ |
@@ -121,7 +99,6 @@ void __init paging_init(void) | |||
121 | clear_table((unsigned long *) init_mm.pgd, pgd_type, | 99 | clear_table((unsigned long *) init_mm.pgd, pgd_type, |
122 | sizeof(unsigned long)*2048); | 100 | sizeof(unsigned long)*2048); |
123 | vmem_map_init(); | 101 | vmem_map_init(); |
124 | setup_ro_region(); | ||
125 | 102 | ||
126 | /* enable virtual mapping in kernel mode */ | 103 | /* enable virtual mapping in kernel mode */ |
127 | __ctl_load(S390_lowcore.kernel_asce, 1, 1); | 104 | __ctl_load(S390_lowcore.kernel_asce, 1, 1); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3ffc0211dc85..97bce6c97574 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -10,10 +10,12 @@ | |||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/hugetlb.h> | ||
13 | #include <asm/pgalloc.h> | 14 | #include <asm/pgalloc.h> |
14 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
15 | #include <asm/setup.h> | 16 | #include <asm/setup.h> |
16 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | #include <asm/sections.h> | ||
17 | 19 | ||
18 | static DEFINE_MUTEX(vmem_mutex); | 20 | static DEFINE_MUTEX(vmem_mutex); |
19 | 21 | ||
@@ -113,7 +115,7 @@ static pte_t __init_refok *vmem_pte_alloc(void) | |||
113 | /* | 115 | /* |
114 | * Add a physical memory range to the 1:1 mapping. | 116 | * Add a physical memory range to the 1:1 mapping. |
115 | */ | 117 | */ |
116 | static int vmem_add_range(unsigned long start, unsigned long size) | 118 | static int vmem_add_range(unsigned long start, unsigned long size, int ro) |
117 | { | 119 | { |
118 | unsigned long address; | 120 | unsigned long address; |
119 | pgd_t *pg_dir; | 121 | pgd_t *pg_dir; |
@@ -140,7 +142,19 @@ static int vmem_add_range(unsigned long start, unsigned long size) | |||
140 | pud_populate_kernel(&init_mm, pu_dir, pm_dir); | 142 | pud_populate_kernel(&init_mm, pu_dir, pm_dir); |
141 | } | 143 | } |
142 | 144 | ||
145 | pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); | ||
143 | pm_dir = pmd_offset(pu_dir, address); | 146 | pm_dir = pmd_offset(pu_dir, address); |
147 | |||
148 | #ifdef __s390x__ | ||
149 | if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && | ||
150 | (address + HPAGE_SIZE <= start + size) && | ||
151 | (address >= HPAGE_SIZE)) { | ||
152 | pte_val(pte) |= _SEGMENT_ENTRY_LARGE; | ||
153 | pmd_val(*pm_dir) = pte_val(pte); | ||
154 | address += HPAGE_SIZE - PAGE_SIZE; | ||
155 | continue; | ||
156 | } | ||
157 | #endif | ||
144 | if (pmd_none(*pm_dir)) { | 158 | if (pmd_none(*pm_dir)) { |
145 | pt_dir = vmem_pte_alloc(); | 159 | pt_dir = vmem_pte_alloc(); |
146 | if (!pt_dir) | 160 | if (!pt_dir) |
@@ -149,7 +163,6 @@ static int vmem_add_range(unsigned long start, unsigned long size) | |||
149 | } | 163 | } |
150 | 164 | ||
151 | pt_dir = pte_offset_kernel(pm_dir, address); | 165 | pt_dir = pte_offset_kernel(pm_dir, address); |
152 | pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); | ||
153 | *pt_dir = pte; | 166 | *pt_dir = pte; |
154 | } | 167 | } |
155 | ret = 0; | 168 | ret = 0; |
@@ -180,6 +193,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size) | |||
180 | pm_dir = pmd_offset(pu_dir, address); | 193 | pm_dir = pmd_offset(pu_dir, address); |
181 | if (pmd_none(*pm_dir)) | 194 | if (pmd_none(*pm_dir)) |
182 | continue; | 195 | continue; |
196 | |||
197 | if (pmd_huge(*pm_dir)) { | ||
198 | pmd_clear_kernel(pm_dir); | ||
199 | address += HPAGE_SIZE - PAGE_SIZE; | ||
200 | continue; | ||
201 | } | ||
202 | |||
183 | pt_dir = pte_offset_kernel(pm_dir, address); | 203 | pt_dir = pte_offset_kernel(pm_dir, address); |
184 | *pt_dir = pte; | 204 | *pt_dir = pte; |
185 | } | 205 | } |
@@ -248,14 +268,14 @@ out: | |||
248 | return ret; | 268 | return ret; |
249 | } | 269 | } |
250 | 270 | ||
251 | static int vmem_add_mem(unsigned long start, unsigned long size) | 271 | static int vmem_add_mem(unsigned long start, unsigned long size, int ro) |
252 | { | 272 | { |
253 | int ret; | 273 | int ret; |
254 | 274 | ||
255 | ret = vmem_add_mem_map(start, size); | 275 | ret = vmem_add_mem_map(start, size); |
256 | if (ret) | 276 | if (ret) |
257 | return ret; | 277 | return ret; |
258 | return vmem_add_range(start, size); | 278 | return vmem_add_range(start, size, ro); |
259 | } | 279 | } |
260 | 280 | ||
261 | /* | 281 | /* |
@@ -338,7 +358,7 @@ int add_shared_memory(unsigned long start, unsigned long size) | |||
338 | if (ret) | 358 | if (ret) |
339 | goto out_free; | 359 | goto out_free; |
340 | 360 | ||
341 | ret = vmem_add_mem(start, size); | 361 | ret = vmem_add_mem(start, size, 0); |
342 | if (ret) | 362 | if (ret) |
343 | goto out_remove; | 363 | goto out_remove; |
344 | 364 | ||
@@ -374,14 +394,35 @@ out: | |||
374 | */ | 394 | */ |
375 | void __init vmem_map_init(void) | 395 | void __init vmem_map_init(void) |
376 | { | 396 | { |
397 | unsigned long ro_start, ro_end; | ||
398 | unsigned long start, end; | ||
377 | int i; | 399 | int i; |
378 | 400 | ||
379 | INIT_LIST_HEAD(&init_mm.context.crst_list); | 401 | INIT_LIST_HEAD(&init_mm.context.crst_list); |
380 | INIT_LIST_HEAD(&init_mm.context.pgtable_list); | 402 | INIT_LIST_HEAD(&init_mm.context.pgtable_list); |
381 | init_mm.context.noexec = 0; | 403 | init_mm.context.noexec = 0; |
382 | NODE_DATA(0)->node_mem_map = VMEM_MAP; | 404 | NODE_DATA(0)->node_mem_map = VMEM_MAP; |
383 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) | 405 | ro_start = ((unsigned long)&_stext) & PAGE_MASK; |
384 | vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); | 406 | ro_end = PFN_ALIGN((unsigned long)&_eshared); |
407 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { | ||
408 | start = memory_chunk[i].addr; | ||
409 | end = memory_chunk[i].addr + memory_chunk[i].size; | ||
410 | if (start >= ro_end || end <= ro_start) | ||
411 | vmem_add_mem(start, end - start, 0); | ||
412 | else if (start >= ro_start && end <= ro_end) | ||
413 | vmem_add_mem(start, end - start, 1); | ||
414 | else if (start >= ro_start) { | ||
415 | vmem_add_mem(start, ro_end - start, 1); | ||
416 | vmem_add_mem(ro_end, end - ro_end, 0); | ||
417 | } else if (end < ro_end) { | ||
418 | vmem_add_mem(start, ro_start - start, 0); | ||
419 | vmem_add_mem(ro_start, end - ro_start, 1); | ||
420 | } else { | ||
421 | vmem_add_mem(start, ro_start - start, 0); | ||
422 | vmem_add_mem(ro_start, ro_end - ro_start, 1); | ||
423 | vmem_add_mem(ro_end, end - ro_end, 0); | ||
424 | } | ||
425 | } | ||
385 | } | 426 | } |
386 | 427 | ||
387 | /* | 428 | /* |