diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2013-07-01 06:20:58 -0400 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2013-07-01 06:20:58 -0400 |
commit | aa729dccb5e8dfbc78e2e235b8754d6acccee731 (patch) | |
tree | f6123726a25957481e2528b9b6b0d0cfd992a5fb | |
parent | ee877b5321c4dfee9dc9f2a12b19ddcd33149f6a (diff) | |
parent | af07484863e0c20796081e57093886c22dc16705 (diff) |
Merge branch 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux into upstream-hugepages
* 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux:
ARM64: mm: THP support.
ARM64: mm: Raise MAX_ORDER for 64KB pages and THP.
ARM64: mm: HugeTLB support.
ARM64: mm: Move PTE_PROT_NONE bit.
ARM64: mm: Make PAGE_NONE pages read only and no-execute.
ARM64: mm: Restore memblock limit when map_mem finished.
mm: thp: Correct the HPAGE_PMD_ORDER check.
x86: mm: Remove general hugetlb code from x86.
mm: hugetlb: Copy general hugetlb code from x86 to mm.
x86: mm: Remove x86 version of huge_pmd_share.
mm: hugetlb: Copy huge_pmd_share from x86 to mm.
Conflicts:
arch/arm64/Kconfig
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
-rw-r--r-- | arch/arm64/Kconfig | 17 | ||||
-rw-r--r-- | arch/arm64/include/asm/hugetlb.h | 117 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable-hwdef.h | 13 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 96 | ||||
-rw-r--r-- | arch/arm64/include/asm/tlb.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 2 | ||||
-rw-r--r-- | arch/arm64/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/mm/fault.c | 19 | ||||
-rw-r--r-- | arch/arm64/mm/hugetlbpage.c | 70 | ||||
-rw-r--r-- | arch/arm64/mm/mmu.c | 19 | ||||
-rw-r--r-- | arch/x86/Kconfig | 6 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 187 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 2 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 4 | ||||
-rw-r--r-- | mm/hugetlb.c | 219 |
15 files changed, 543 insertions, 235 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1fac4e5a1c43..4143d9b0d87a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -188,6 +188,18 @@ config HW_PERF_EVENTS | |||
188 | Enable hardware performance counter support for perf events. If | 188 | Enable hardware performance counter support for perf events. If |
189 | disabled, perf events will use software events only. | 189 | disabled, perf events will use software events only. |
190 | 190 | ||
191 | config SYS_SUPPORTS_HUGETLBFS | ||
192 | def_bool y | ||
193 | |||
194 | config ARCH_WANT_GENERAL_HUGETLB | ||
195 | def_bool y | ||
196 | |||
197 | config ARCH_WANT_HUGE_PMD_SHARE | ||
198 | def_bool y if !ARM64_64K_PAGES | ||
199 | |||
200 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE | ||
201 | def_bool y | ||
202 | |||
191 | source "mm/Kconfig" | 203 | source "mm/Kconfig" |
192 | 204 | ||
193 | config XEN_DOM0 | 205 | config XEN_DOM0 |
@@ -200,6 +212,11 @@ config XEN | |||
200 | help | 212 | help |
201 | Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. | 213 | Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. |
202 | 214 | ||
215 | config FORCE_MAX_ZONEORDER | ||
216 | int | ||
217 | default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) | ||
218 | default "11" | ||
219 | |||
203 | endmenu | 220 | endmenu |
204 | 221 | ||
205 | menu "Boot options" | 222 | menu "Boot options" |
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * arch/arm64/include/asm/hugetlb.h | ||
3 | * | ||
4 | * Copyright (C) 2013 Linaro Ltd. | ||
5 | * | ||
6 | * Based on arch/x86/include/asm/hugetlb.h | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifndef __ASM_HUGETLB_H | ||
23 | #define __ASM_HUGETLB_H | ||
24 | |||
25 | #include <asm-generic/hugetlb.h> | ||
26 | #include <asm/page.h> | ||
27 | |||
28 | static inline pte_t huge_ptep_get(pte_t *ptep) | ||
29 | { | ||
30 | return *ptep; | ||
31 | } | ||
32 | |||
33 | static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
34 | pte_t *ptep, pte_t pte) | ||
35 | { | ||
36 | set_pte_at(mm, addr, ptep, pte); | ||
37 | } | ||
38 | |||
39 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, | ||
40 | unsigned long addr, pte_t *ptep) | ||
41 | { | ||
42 | ptep_clear_flush(vma, addr, ptep); | ||
43 | } | ||
44 | |||
45 | static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, | ||
46 | unsigned long addr, pte_t *ptep) | ||
47 | { | ||
48 | ptep_set_wrprotect(mm, addr, ptep); | ||
49 | } | ||
50 | |||
51 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | ||
52 | unsigned long addr, pte_t *ptep) | ||
53 | { | ||
54 | return ptep_get_and_clear(mm, addr, ptep); | ||
55 | } | ||
56 | |||
57 | static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, | ||
58 | unsigned long addr, pte_t *ptep, | ||
59 | pte_t pte, int dirty) | ||
60 | { | ||
61 | return ptep_set_access_flags(vma, addr, ptep, pte, dirty); | ||
62 | } | ||
63 | |||
64 | static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, | ||
65 | unsigned long addr, unsigned long end, | ||
66 | unsigned long floor, | ||
67 | unsigned long ceiling) | ||
68 | { | ||
69 | free_pgd_range(tlb, addr, end, floor, ceiling); | ||
70 | } | ||
71 | |||
72 | static inline int is_hugepage_only_range(struct mm_struct *mm, | ||
73 | unsigned long addr, unsigned long len) | ||
74 | { | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static inline int prepare_hugepage_range(struct file *file, | ||
79 | unsigned long addr, unsigned long len) | ||
80 | { | ||
81 | struct hstate *h = hstate_file(file); | ||
82 | if (len & ~huge_page_mask(h)) | ||
83 | return -EINVAL; | ||
84 | if (addr & ~huge_page_mask(h)) | ||
85 | return -EINVAL; | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) | ||
90 | { | ||
91 | } | ||
92 | |||
93 | static inline int huge_pte_none(pte_t pte) | ||
94 | { | ||
95 | return pte_none(pte); | ||
96 | } | ||
97 | |||
98 | static inline pte_t huge_pte_wrprotect(pte_t pte) | ||
99 | { | ||
100 | return pte_wrprotect(pte); | ||
101 | } | ||
102 | |||
103 | static inline int arch_prepare_hugepage(struct page *page) | ||
104 | { | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | static inline void arch_release_hugepage(struct page *page) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | static inline void arch_clear_hugepage_flags(struct page *page) | ||
113 | { | ||
114 | clear_bit(PG_dcache_clean, &page->flags); | ||
115 | } | ||
116 | |||
117 | #endif /* __ASM_HUGETLB_H */ | ||
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 66367c6c6527..e182a356c979 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h | |||
@@ -25,17 +25,27 @@ | |||
25 | /* | 25 | /* |
26 | * Hardware page table definitions. | 26 | * Hardware page table definitions. |
27 | * | 27 | * |
28 | * Level 1 descriptor (PUD). | ||
29 | */ | ||
30 | |||
31 | #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) | ||
32 | |||
33 | /* | ||
28 | * Level 2 descriptor (PMD). | 34 | * Level 2 descriptor (PMD). |
29 | */ | 35 | */ |
30 | #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) | 36 | #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) |
31 | #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) | 37 | #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) |
32 | #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) | 38 | #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) |
33 | #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) | 39 | #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) |
40 | #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) | ||
34 | 41 | ||
35 | /* | 42 | /* |
36 | * Section | 43 | * Section |
37 | */ | 44 | */ |
38 | #define PMD_SECT_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ | 45 | #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) |
46 | #define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) | ||
47 | #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ | ||
48 | #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ | ||
39 | #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) | 49 | #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) |
40 | #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) | 50 | #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) |
41 | #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) | 51 | #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) |
@@ -54,6 +64,7 @@ | |||
54 | #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) | 64 | #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) |
55 | #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) | 65 | #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) |
56 | #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) | 66 | #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) |
67 | #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) | ||
57 | #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ | 68 | #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ |
58 | #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ | 69 | #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ |
59 | #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ | 70 | #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ |
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5588e8ad9762..065e58f01b1e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h | |||
@@ -25,8 +25,8 @@ | |||
25 | * Software defined PTE bits definition. | 25 | * Software defined PTE bits definition. |
26 | */ | 26 | */ |
27 | #define PTE_VALID (_AT(pteval_t, 1) << 0) | 27 | #define PTE_VALID (_AT(pteval_t, 1) << 0) |
28 | #define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ | 28 | #define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ |
29 | #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ | 29 | #define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ |
30 | #define PTE_DIRTY (_AT(pteval_t, 1) << 55) | 30 | #define PTE_DIRTY (_AT(pteval_t, 1) << 55) |
31 | #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) | 31 | #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) |
32 | 32 | ||
@@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; | |||
66 | 66 | ||
67 | #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) | 67 | #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) |
68 | 68 | ||
69 | #define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) | 69 | #define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) |
70 | #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) | 70 | #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) |
71 | #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) | 71 | #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) |
72 | #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) | 72 | #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) |
@@ -82,7 +82,7 @@ extern pgprot_t pgprot_default; | |||
82 | #define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) | 82 | #define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) |
83 | #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) | 83 | #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) |
84 | 84 | ||
85 | #define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) | 85 | #define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) |
86 | #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) | 86 | #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) |
87 | #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) | 87 | #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) |
88 | #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) | 88 | #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) |
@@ -179,12 +179,76 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
179 | /* | 179 | /* |
180 | * Huge pte definitions. | 180 | * Huge pte definitions. |
181 | */ | 181 | */ |
182 | #define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) | 182 | #define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) |
183 | #define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) | 183 | #define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) |
184 | |||
185 | /* | ||
186 | * Hugetlb definitions. | ||
187 | */ | ||
188 | #define HUGE_MAX_HSTATE 2 | ||
189 | #define HPAGE_SHIFT PMD_SHIFT | ||
190 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) | ||
191 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | ||
192 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | ||
184 | 193 | ||
185 | #define __HAVE_ARCH_PTE_SPECIAL | 194 | #define __HAVE_ARCH_PTE_SPECIAL |
186 | 195 | ||
187 | /* | 196 | /* |
197 | * Software PMD bits for THP | ||
198 | */ | ||
199 | |||
200 | #define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) | ||
201 | #define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) | ||
202 | |||
203 | /* | ||
204 | * THP definitions. | ||
205 | */ | ||
206 | #define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) | ||
207 | |||
208 | #define __HAVE_ARCH_PMD_WRITE | ||
209 | #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) | ||
210 | |||
211 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
212 | #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) | ||
213 | #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) | ||
214 | #endif | ||
215 | |||
216 | #define PMD_BIT_FUNC(fn,op) \ | ||
217 | static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } | ||
218 | |||
219 | PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); | ||
220 | PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); | ||
221 | PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); | ||
222 | PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); | ||
223 | PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); | ||
224 | PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); | ||
225 | PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); | ||
226 | |||
227 | #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) | ||
228 | |||
229 | #define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) | ||
230 | #define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) | ||
231 | #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) | ||
232 | |||
233 | #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) | ||
234 | |||
235 | static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
236 | { | ||
237 | const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | | ||
238 | PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | | ||
239 | PMD_SECT_VALID; | ||
240 | pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); | ||
241 | return pmd; | ||
242 | } | ||
243 | |||
244 | #define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) | ||
245 | |||
246 | static inline int has_transparent_hugepage(void) | ||
247 | { | ||
248 | return 1; | ||
249 | } | ||
250 | |||
251 | /* | ||
188 | * Mark the prot value as uncacheable and unbufferable. | 252 | * Mark the prot value as uncacheable and unbufferable. |
189 | */ | 253 | */ |
190 | #define pgprot_noncached(prot) \ | 254 | #define pgprot_noncached(prot) \ |
@@ -293,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; | |||
293 | 357 | ||
294 | /* | 358 | /* |
295 | * Encode and decode a swap entry: | 359 | * Encode and decode a swap entry: |
296 | * bits 0-1: present (must be zero) | 360 | * bits 0, 2: present (must both be zero) |
297 | * bit 2: PTE_FILE | 361 | * bit 3: PTE_FILE |
298 | * bits 3-8: swap type | 362 | * bits 4-8: swap type |
299 | * bits 9-63: swap offset | 363 | * bits 9-63: swap offset |
300 | */ | 364 | */ |
301 | #define __SWP_TYPE_SHIFT 3 | 365 | #define __SWP_TYPE_SHIFT 4 |
302 | #define __SWP_TYPE_BITS 6 | 366 | #define __SWP_TYPE_BITS 6 |
303 | #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) | 367 | #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) |
304 | #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) | 368 | #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) |
@@ -318,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; | |||
318 | 382 | ||
319 | /* | 383 | /* |
320 | * Encode and decode a file entry: | 384 | * Encode and decode a file entry: |
321 | * bits 0-1: present (must be zero) | 385 | * bits 0, 2: present (must both be zero) |
322 | * bit 2: PTE_FILE | 386 | * bit 3: PTE_FILE |
323 | * bits 3-63: file offset / PAGE_SIZE | 387 | * bits 4-63: file offset / PAGE_SIZE |
324 | */ | 388 | */ |
325 | #define pte_file(pte) (pte_val(pte) & PTE_FILE) | 389 | #define pte_file(pte) (pte_val(pte) & PTE_FILE) |
326 | #define pte_to_pgoff(x) (pte_val(x) >> 3) | 390 | #define pte_to_pgoff(x) (pte_val(x) >> 4) |
327 | #define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) | 391 | #define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) |
328 | 392 | ||
329 | #define PTE_FILE_MAX_BITS 61 | 393 | #define PTE_FILE_MAX_BITS 60 |
330 | 394 | ||
331 | extern int kern_addr_valid(unsigned long addr); | 395 | extern int kern_addr_valid(unsigned long addr); |
332 | 396 | ||
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..46b3beb4b773 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h | |||
@@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | |||
187 | 187 | ||
188 | #define tlb_migrate_finish(mm) do { } while (0) | 188 | #define tlb_migrate_finish(mm) do { } while (0) |
189 | 189 | ||
190 | static inline void | ||
191 | tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) | ||
192 | { | ||
193 | tlb_add_flush(tlb, addr); | ||
194 | } | ||
195 | |||
190 | #endif | 196 | #endif |
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h | |||
@@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, | |||
117 | dsb(); | 117 | dsb(); |
118 | } | 118 | } |
119 | 119 | ||
120 | #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) | ||
121 | |||
120 | #endif | 122 | #endif |
121 | 123 | ||
122 | #endif | 124 | #endif |
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile | |||
@@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ | |||
2 | cache.o copypage.o flush.o \ | 2 | cache.o copypage.o flush.o \ |
3 | ioremap.o mmap.o pgd.o mmu.o \ | 3 | ioremap.o mmap.o pgd.o mmu.o \ |
4 | context.o tlb.o proc.o | 4 | context.o tlb.o proc.o |
5 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | ||
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..0ecac8980aae 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c | |||
@@ -365,17 +365,6 @@ static int __kprobes do_translation_fault(unsigned long addr, | |||
365 | } | 365 | } |
366 | 366 | ||
367 | /* | 367 | /* |
368 | * Some section permission faults need to be handled gracefully. They can | ||
369 | * happen due to a __{get,put}_user during an oops. | ||
370 | */ | ||
371 | static int do_sect_fault(unsigned long addr, unsigned int esr, | ||
372 | struct pt_regs *regs) | ||
373 | { | ||
374 | do_bad_area(addr, esr, regs); | ||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | /* | ||
379 | * This abort handler always returns "fault". | 368 | * This abort handler always returns "fault". |
380 | */ | 369 | */ |
381 | static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) | 370 | static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) |
@@ -398,12 +387,12 @@ static struct fault_info { | |||
398 | { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, | 387 | { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, |
399 | { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, | 388 | { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, |
400 | { do_bad, SIGBUS, 0, "reserved access flag fault" }, | 389 | { do_bad, SIGBUS, 0, "reserved access flag fault" }, |
401 | { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, | 390 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, |
402 | { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, | 391 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, |
403 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, | 392 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, |
404 | { do_bad, SIGBUS, 0, "reserved permission fault" }, | 393 | { do_bad, SIGBUS, 0, "reserved permission fault" }, |
405 | { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, | 394 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, |
406 | { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, | 395 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, |
407 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, | 396 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, |
408 | { do_bad, SIGBUS, 0, "synchronous external abort" }, | 397 | { do_bad, SIGBUS, 0, "synchronous external abort" }, |
409 | { do_bad, SIGBUS, 0, "asynchronous external abort" }, | 398 | { do_bad, SIGBUS, 0, "asynchronous external abort" }, |
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * arch/arm64/mm/hugetlbpage.c | ||
3 | * | ||
4 | * Copyright (C) 2013 Linaro Ltd. | ||
5 | * | ||
6 | * Based on arch/x86/mm/hugetlbpage.c. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include <linux/init.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/hugetlb.h> | ||
26 | #include <linux/pagemap.h> | ||
27 | #include <linux/err.h> | ||
28 | #include <linux/sysctl.h> | ||
29 | #include <asm/mman.h> | ||
30 | #include <asm/tlb.h> | ||
31 | #include <asm/tlbflush.h> | ||
32 | #include <asm/pgalloc.h> | ||
33 | |||
34 | #ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
35 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | #endif | ||
40 | |||
41 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
42 | int write) | ||
43 | { | ||
44 | return ERR_PTR(-EINVAL); | ||
45 | } | ||
46 | |||
47 | int pmd_huge(pmd_t pmd) | ||
48 | { | ||
49 | return !(pmd_val(pmd) & PMD_TABLE_BIT); | ||
50 | } | ||
51 | |||
52 | int pud_huge(pud_t pud) | ||
53 | { | ||
54 | return !(pud_val(pud) & PUD_TABLE_BIT); | ||
55 | } | ||
56 | |||
57 | static __init int setup_hugepagesz(char *opt) | ||
58 | { | ||
59 | unsigned long ps = memparse(opt, &opt); | ||
60 | if (ps == PMD_SIZE) { | ||
61 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
62 | } else if (ps == PUD_SIZE) { | ||
63 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
64 | } else { | ||
65 | pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); | ||
66 | return 0; | ||
67 | } | ||
68 | return 1; | ||
69 | } | ||
70 | __setup("hugepagesz=", setup_hugepagesz); | ||
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 80a369eab637..a8d1059b91b2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c | |||
@@ -297,6 +297,16 @@ static void __init map_mem(void) | |||
297 | { | 297 | { |
298 | struct memblock_region *reg; | 298 | struct memblock_region *reg; |
299 | 299 | ||
300 | /* | ||
301 | * Temporarily limit the memblock range. We need to do this as | ||
302 | * create_mapping requires puds, pmds and ptes to be allocated from | ||
303 | * memory addressable from the initial direct kernel mapping. | ||
304 | * | ||
305 | * The initial direct kernel mapping, located at swapper_pg_dir, | ||
306 | * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). | ||
307 | */ | ||
308 | memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); | ||
309 | |||
300 | /* map all the memory banks */ | 310 | /* map all the memory banks */ |
301 | for_each_memblock(memory, reg) { | 311 | for_each_memblock(memory, reg) { |
302 | phys_addr_t start = reg->base; | 312 | phys_addr_t start = reg->base; |
@@ -307,6 +317,9 @@ static void __init map_mem(void) | |||
307 | 317 | ||
308 | create_mapping(start, __phys_to_virt(start), end - start); | 318 | create_mapping(start, __phys_to_virt(start), end - start); |
309 | } | 319 | } |
320 | |||
321 | /* Limit no longer required. */ | ||
322 | memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); | ||
310 | } | 323 | } |
311 | 324 | ||
312 | /* | 325 | /* |
@@ -317,12 +330,6 @@ void __init paging_init(void) | |||
317 | { | 330 | { |
318 | void *zero_page; | 331 | void *zero_page; |
319 | 332 | ||
320 | /* | ||
321 | * Maximum PGDIR_SIZE addressable via the initial direct kernel | ||
322 | * mapping in swapper_pg_dir. | ||
323 | */ | ||
324 | memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); | ||
325 | |||
326 | init_mem_pgprot(); | 333 | init_mem_pgprot(); |
327 | map_mem(); | 334 | map_mem(); |
328 | 335 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 685692c94f05..191c4e34722d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE | |||
207 | config ARCH_SUSPEND_POSSIBLE | 207 | config ARCH_SUSPEND_POSSIBLE |
208 | def_bool y | 208 | def_bool y |
209 | 209 | ||
210 | config ARCH_WANT_HUGE_PMD_SHARE | ||
211 | def_bool y | ||
212 | |||
213 | config ARCH_WANT_GENERAL_HUGETLB | ||
214 | def_bool y | ||
215 | |||
210 | config ZONE_DMA32 | 216 | config ZONE_DMA32 |
211 | bool | 217 | bool |
212 | default X86_64 | 218 | default X86_64 |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -16,169 +16,6 @@ | |||
16 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
17 | #include <asm/pgalloc.h> | 17 | #include <asm/pgalloc.h> |
18 | 18 | ||
19 | static unsigned long page_table_shareable(struct vm_area_struct *svma, | ||
20 | struct vm_area_struct *vma, | ||
21 | unsigned long addr, pgoff_t idx) | ||
22 | { | ||
23 | unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + | ||
24 | svma->vm_start; | ||
25 | unsigned long sbase = saddr & PUD_MASK; | ||
26 | unsigned long s_end = sbase + PUD_SIZE; | ||
27 | |||
28 | /* Allow segments to share if only one is marked locked */ | ||
29 | unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; | ||
30 | unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; | ||
31 | |||
32 | /* | ||
33 | * match the virtual addresses, permission and the alignment of the | ||
34 | * page table page. | ||
35 | */ | ||
36 | if (pmd_index(addr) != pmd_index(saddr) || | ||
37 | vm_flags != svm_flags || | ||
38 | sbase < svma->vm_start || svma->vm_end < s_end) | ||
39 | return 0; | ||
40 | |||
41 | return saddr; | ||
42 | } | ||
43 | |||
44 | static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | ||
45 | { | ||
46 | unsigned long base = addr & PUD_MASK; | ||
47 | unsigned long end = base + PUD_SIZE; | ||
48 | |||
49 | /* | ||
50 | * check on proper vm_flags and page table alignment | ||
51 | */ | ||
52 | if (vma->vm_flags & VM_MAYSHARE && | ||
53 | vma->vm_start <= base && end <= vma->vm_end) | ||
54 | return 1; | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | ||
60 | * and returns the corresponding pte. While this is not necessary for the | ||
61 | * !shared pmd case because we can allocate the pmd later as well, it makes the | ||
62 | * code much cleaner. pmd allocation is essential for the shared case because | ||
63 | * pud has to be populated inside the same i_mmap_mutex section - otherwise | ||
64 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a | ||
65 | * bad pmd for sharing. | ||
66 | */ | ||
67 | static pte_t * | ||
68 | huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
69 | { | ||
70 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
71 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
72 | pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
73 | vma->vm_pgoff; | ||
74 | struct vm_area_struct *svma; | ||
75 | unsigned long saddr; | ||
76 | pte_t *spte = NULL; | ||
77 | pte_t *pte; | ||
78 | |||
79 | if (!vma_shareable(vma, addr)) | ||
80 | return (pte_t *)pmd_alloc(mm, pud, addr); | ||
81 | |||
82 | mutex_lock(&mapping->i_mmap_mutex); | ||
83 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { | ||
84 | if (svma == vma) | ||
85 | continue; | ||
86 | |||
87 | saddr = page_table_shareable(svma, vma, addr, idx); | ||
88 | if (saddr) { | ||
89 | spte = huge_pte_offset(svma->vm_mm, saddr); | ||
90 | if (spte) { | ||
91 | get_page(virt_to_page(spte)); | ||
92 | break; | ||
93 | } | ||
94 | } | ||
95 | } | ||
96 | |||
97 | if (!spte) | ||
98 | goto out; | ||
99 | |||
100 | spin_lock(&mm->page_table_lock); | ||
101 | if (pud_none(*pud)) | ||
102 | pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); | ||
103 | else | ||
104 | put_page(virt_to_page(spte)); | ||
105 | spin_unlock(&mm->page_table_lock); | ||
106 | out: | ||
107 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
108 | mutex_unlock(&mapping->i_mmap_mutex); | ||
109 | return pte; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * unmap huge page backed by shared pte. | ||
114 | * | ||
115 | * Hugetlb pte page is ref counted at the time of mapping. If pte is shared | ||
116 | * indicated by page_count > 1, unmap is achieved by clearing pud and | ||
117 | * decrementing the ref count. If count == 1, the pte page is not shared. | ||
118 | * | ||
119 | * called with vma->vm_mm->page_table_lock held. | ||
120 | * | ||
121 | * returns: 1 successfully unmapped a shared pte page | ||
122 | * 0 the underlying pte page is not shared, or it is the last user | ||
123 | */ | ||
124 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
125 | { | ||
126 | pgd_t *pgd = pgd_offset(mm, *addr); | ||
127 | pud_t *pud = pud_offset(pgd, *addr); | ||
128 | |||
129 | BUG_ON(page_count(virt_to_page(ptep)) == 0); | ||
130 | if (page_count(virt_to_page(ptep)) == 1) | ||
131 | return 0; | ||
132 | |||
133 | pud_clear(pud); | ||
134 | put_page(virt_to_page(ptep)); | ||
135 | *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; | ||
136 | return 1; | ||
137 | } | ||
138 | |||
139 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
140 | unsigned long addr, unsigned long sz) | ||
141 | { | ||
142 | pgd_t *pgd; | ||
143 | pud_t *pud; | ||
144 | pte_t *pte = NULL; | ||
145 | |||
146 | pgd = pgd_offset(mm, addr); | ||
147 | pud = pud_alloc(mm, pgd, addr); | ||
148 | if (pud) { | ||
149 | if (sz == PUD_SIZE) { | ||
150 | pte = (pte_t *)pud; | ||
151 | } else { | ||
152 | BUG_ON(sz != PMD_SIZE); | ||
153 | if (pud_none(*pud)) | ||
154 | pte = huge_pmd_share(mm, addr, pud); | ||
155 | else | ||
156 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
157 | } | ||
158 | } | ||
159 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
160 | |||
161 | return pte; | ||
162 | } | ||
163 | |||
164 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
165 | { | ||
166 | pgd_t *pgd; | ||
167 | pud_t *pud; | ||
168 | pmd_t *pmd = NULL; | ||
169 | |||
170 | pgd = pgd_offset(mm, addr); | ||
171 | if (pgd_present(*pgd)) { | ||
172 | pud = pud_offset(pgd, addr); | ||
173 | if (pud_present(*pud)) { | ||
174 | if (pud_large(*pud)) | ||
175 | return (pte_t *)pud; | ||
176 | pmd = pmd_offset(pud, addr); | ||
177 | } | ||
178 | } | ||
179 | return (pte_t *) pmd; | ||
180 | } | ||
181 | |||
182 | #if 0 /* This is just for testing */ | 19 | #if 0 /* This is just for testing */ |
183 | struct page * | 20 | struct page * |
184 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 21 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
@@ -240,30 +77,6 @@ int pud_huge(pud_t pud) | |||
240 | return !!(pud_val(pud) & _PAGE_PSE); | 77 | return !!(pud_val(pud) & _PAGE_PSE); |
241 | } | 78 | } |
242 | 79 | ||
243 | struct page * | ||
244 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
245 | pmd_t *pmd, int write) | ||
246 | { | ||
247 | struct page *page; | ||
248 | |||
249 | page = pte_page(*(pte_t *)pmd); | ||
250 | if (page) | ||
251 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
252 | return page; | ||
253 | } | ||
254 | |||
255 | struct page * | ||
256 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
257 | pud_t *pud, int write) | ||
258 | { | ||
259 | struct page *page; | ||
260 | |||
261 | page = pte_page(*(pte_t *)pud); | ||
262 | if (page) | ||
263 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
264 | return page; | ||
265 | } | ||
266 | |||
267 | #endif | 80 | #endif |
268 | 81 | ||
269 | /* x86_64 also uses this file */ | 82 | /* x86_64 also uses this file */ |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..26ee56c80dc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, | |||
123 | } while (0) | 123 | } while (0) |
124 | extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, | 124 | extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, |
125 | pmd_t *pmd); | 125 | pmd_t *pmd); |
126 | #if HPAGE_PMD_ORDER > MAX_ORDER | 126 | #if HPAGE_PMD_ORDER >= MAX_ORDER |
127 | #error "hugepages can't be allocated by the buddy allocator" | 127 | #error "hugepages can't be allocated by the buddy allocator" |
128 | #endif | 128 | #endif |
129 | extern int hugepage_madvise(struct vm_area_struct *vma, | 129 | extern int hugepage_madvise(struct vm_area_struct *vma, |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b4890fa57e7..981546ad231c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); | |||
69 | int dequeue_hwpoisoned_huge_page(struct page *page); | 69 | int dequeue_hwpoisoned_huge_page(struct page *page); |
70 | void copy_huge_page(struct page *dst, struct page *src); | 70 | void copy_huge_page(struct page *dst, struct page *src); |
71 | 71 | ||
72 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
73 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); | ||
74 | #endif | ||
75 | |||
72 | extern unsigned long hugepages_treat_as_movable; | 76 | extern unsigned long hugepages_treat_as_movable; |
73 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 77 | extern const unsigned long hugetlb_zero, hugetlb_infinity; |
74 | extern int sysctl_hugetlb_shm_group; | 78 | extern int sysctl_hugetlb_shm_group; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f8feeeca6686..63217261fd14 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2931,15 +2931,6 @@ out_mutex: | |||
2931 | return ret; | 2931 | return ret; |
2932 | } | 2932 | } |
2933 | 2933 | ||
2934 | /* Can be overriden by architectures */ | ||
2935 | __attribute__((weak)) struct page * | ||
2936 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
2937 | pud_t *pud, int write) | ||
2938 | { | ||
2939 | BUG(); | ||
2940 | return NULL; | ||
2941 | } | ||
2942 | |||
2943 | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2934 | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
2944 | struct page **pages, struct vm_area_struct **vmas, | 2935 | struct page **pages, struct vm_area_struct **vmas, |
2945 | unsigned long *position, unsigned long *nr_pages, | 2936 | unsigned long *position, unsigned long *nr_pages, |
@@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
3169 | hugetlb_acct_memory(h, -(chg - freed)); | 3160 | hugetlb_acct_memory(h, -(chg - freed)); |
3170 | } | 3161 | } |
3171 | 3162 | ||
3163 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
3164 | static unsigned long page_table_shareable(struct vm_area_struct *svma, | ||
3165 | struct vm_area_struct *vma, | ||
3166 | unsigned long addr, pgoff_t idx) | ||
3167 | { | ||
3168 | unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + | ||
3169 | svma->vm_start; | ||
3170 | unsigned long sbase = saddr & PUD_MASK; | ||
3171 | unsigned long s_end = sbase + PUD_SIZE; | ||
3172 | |||
3173 | /* Allow segments to share if only one is marked locked */ | ||
3174 | unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; | ||
3175 | unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; | ||
3176 | |||
3177 | /* | ||
3178 | * match the virtual addresses, permission and the alignment of the | ||
3179 | * page table page. | ||
3180 | */ | ||
3181 | if (pmd_index(addr) != pmd_index(saddr) || | ||
3182 | vm_flags != svm_flags || | ||
3183 | sbase < svma->vm_start || svma->vm_end < s_end) | ||
3184 | return 0; | ||
3185 | |||
3186 | return saddr; | ||
3187 | } | ||
3188 | |||
3189 | static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | ||
3190 | { | ||
3191 | unsigned long base = addr & PUD_MASK; | ||
3192 | unsigned long end = base + PUD_SIZE; | ||
3193 | |||
3194 | /* | ||
3195 | * check on proper vm_flags and page table alignment | ||
3196 | */ | ||
3197 | if (vma->vm_flags & VM_MAYSHARE && | ||
3198 | vma->vm_start <= base && end <= vma->vm_end) | ||
3199 | return 1; | ||
3200 | return 0; | ||
3201 | } | ||
3202 | |||
3203 | /* | ||
3204 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | ||
3205 | * and returns the corresponding pte. While this is not necessary for the | ||
3206 | * !shared pmd case because we can allocate the pmd later as well, it makes the | ||
3207 | * code much cleaner. pmd allocation is essential for the shared case because | ||
3208 | * pud has to be populated inside the same i_mmap_mutex section - otherwise | ||
3209 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a | ||
3210 | * bad pmd for sharing. | ||
3211 | */ | ||
3212 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
3213 | { | ||
3214 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
3215 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
3216 | pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
3217 | vma->vm_pgoff; | ||
3218 | struct vm_area_struct *svma; | ||
3219 | unsigned long saddr; | ||
3220 | pte_t *spte = NULL; | ||
3221 | pte_t *pte; | ||
3222 | |||
3223 | if (!vma_shareable(vma, addr)) | ||
3224 | return (pte_t *)pmd_alloc(mm, pud, addr); | ||
3225 | |||
3226 | mutex_lock(&mapping->i_mmap_mutex); | ||
3227 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { | ||
3228 | if (svma == vma) | ||
3229 | continue; | ||
3230 | |||
3231 | saddr = page_table_shareable(svma, vma, addr, idx); | ||
3232 | if (saddr) { | ||
3233 | spte = huge_pte_offset(svma->vm_mm, saddr); | ||
3234 | if (spte) { | ||
3235 | get_page(virt_to_page(spte)); | ||
3236 | break; | ||
3237 | } | ||
3238 | } | ||
3239 | } | ||
3240 | |||
3241 | if (!spte) | ||
3242 | goto out; | ||
3243 | |||
3244 | spin_lock(&mm->page_table_lock); | ||
3245 | if (pud_none(*pud)) | ||
3246 | pud_populate(mm, pud, | ||
3247 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); | ||
3248 | else | ||
3249 | put_page(virt_to_page(spte)); | ||
3250 | spin_unlock(&mm->page_table_lock); | ||
3251 | out: | ||
3252 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
3253 | mutex_unlock(&mapping->i_mmap_mutex); | ||
3254 | return pte; | ||
3255 | } | ||
3256 | |||
3257 | /* | ||
3258 | * unmap huge page backed by shared pte. | ||
3259 | * | ||
3260 | * Hugetlb pte page is ref counted at the time of mapping. If pte is shared | ||
3261 | * indicated by page_count > 1, unmap is achieved by clearing pud and | ||
3262 | * decrementing the ref count. If count == 1, the pte page is not shared. | ||
3263 | * | ||
3264 | * called with vma->vm_mm->page_table_lock held. | ||
3265 | * | ||
3266 | * returns: 1 successfully unmapped a shared pte page | ||
3267 | * 0 the underlying pte page is not shared, or it is the last user | ||
3268 | */ | ||
3269 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
3270 | { | ||
3271 | pgd_t *pgd = pgd_offset(mm, *addr); | ||
3272 | pud_t *pud = pud_offset(pgd, *addr); | ||
3273 | |||
3274 | BUG_ON(page_count(virt_to_page(ptep)) == 0); | ||
3275 | if (page_count(virt_to_page(ptep)) == 1) | ||
3276 | return 0; | ||
3277 | |||
3278 | pud_clear(pud); | ||
3279 | put_page(virt_to_page(ptep)); | ||
3280 | *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; | ||
3281 | return 1; | ||
3282 | } | ||
3283 | #define want_pmd_share() (1) | ||
3284 | #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | ||
3285 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
3286 | { | ||
3287 | return NULL; | ||
3288 | } | ||
3289 | #define want_pmd_share() (0) | ||
3290 | #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | ||
3291 | |||
3292 | #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB | ||
3293 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
3294 | unsigned long addr, unsigned long sz) | ||
3295 | { | ||
3296 | pgd_t *pgd; | ||
3297 | pud_t *pud; | ||
3298 | pte_t *pte = NULL; | ||
3299 | |||
3300 | pgd = pgd_offset(mm, addr); | ||
3301 | pud = pud_alloc(mm, pgd, addr); | ||
3302 | if (pud) { | ||
3303 | if (sz == PUD_SIZE) { | ||
3304 | pte = (pte_t *)pud; | ||
3305 | } else { | ||
3306 | BUG_ON(sz != PMD_SIZE); | ||
3307 | if (want_pmd_share() && pud_none(*pud)) | ||
3308 | pte = huge_pmd_share(mm, addr, pud); | ||
3309 | else | ||
3310 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
3311 | } | ||
3312 | } | ||
3313 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
3314 | |||
3315 | return pte; | ||
3316 | } | ||
3317 | |||
3318 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
3319 | { | ||
3320 | pgd_t *pgd; | ||
3321 | pud_t *pud; | ||
3322 | pmd_t *pmd = NULL; | ||
3323 | |||
3324 | pgd = pgd_offset(mm, addr); | ||
3325 | if (pgd_present(*pgd)) { | ||
3326 | pud = pud_offset(pgd, addr); | ||
3327 | if (pud_present(*pud)) { | ||
3328 | if (pud_huge(*pud)) | ||
3329 | return (pte_t *)pud; | ||
3330 | pmd = pmd_offset(pud, addr); | ||
3331 | } | ||
3332 | } | ||
3333 | return (pte_t *) pmd; | ||
3334 | } | ||
3335 | |||
3336 | struct page * | ||
3337 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
3338 | pmd_t *pmd, int write) | ||
3339 | { | ||
3340 | struct page *page; | ||
3341 | |||
3342 | page = pte_page(*(pte_t *)pmd); | ||
3343 | if (page) | ||
3344 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
3345 | return page; | ||
3346 | } | ||
3347 | |||
3348 | struct page * | ||
3349 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
3350 | pud_t *pud, int write) | ||
3351 | { | ||
3352 | struct page *page; | ||
3353 | |||
3354 | page = pte_page(*(pte_t *)pud); | ||
3355 | if (page) | ||
3356 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
3357 | return page; | ||
3358 | } | ||
3359 | |||
3360 | #else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ | ||
3361 | |||
3362 | /* Can be overriden by architectures */ | ||
3363 | __attribute__((weak)) struct page * | ||
3364 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
3365 | pud_t *pud, int write) | ||
3366 | { | ||
3367 | BUG(); | ||
3368 | return NULL; | ||
3369 | } | ||
3370 | |||
3371 | #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ | ||
3372 | |||
3172 | #ifdef CONFIG_MEMORY_FAILURE | 3373 | #ifdef CONFIG_MEMORY_FAILURE |
3173 | 3374 | ||
3174 | /* Should be called in hugetlb_lock */ | 3375 | /* Should be called in hugetlb_lock */ |