aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2013-07-01 06:20:58 -0400
committerCatalin Marinas <catalin.marinas@arm.com>2013-07-01 06:20:58 -0400
commitaa729dccb5e8dfbc78e2e235b8754d6acccee731 (patch)
treef6123726a25957481e2528b9b6b0d0cfd992a5fb
parentee877b5321c4dfee9dc9f2a12b19ddcd33149f6a (diff)
parentaf07484863e0c20796081e57093886c22dc16705 (diff)
Merge branch 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux into upstream-hugepages
* 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux: ARM64: mm: THP support. ARM64: mm: Raise MAX_ORDER for 64KB pages and THP. ARM64: mm: HugeTLB support. ARM64: mm: Move PTE_PROT_NONE bit. ARM64: mm: Make PAGE_NONE pages read only and no-execute. ARM64: mm: Restore memblock limit when map_mem finished. mm: thp: Correct the HPAGE_PMD_ORDER check. x86: mm: Remove general hugetlb code from x86. mm: hugetlb: Copy general hugetlb code from x86 to mm. x86: mm: Remove x86 version of huge_pmd_share. mm: hugetlb: Copy huge_pmd_share from x86 to mm. Conflicts: arch/arm64/Kconfig arch/arm64/include/asm/pgtable-hwdef.h arch/arm64/include/asm/pgtable.h
-rw-r--r--arch/arm64/Kconfig17
-rw-r--r--arch/arm64/include/asm/hugetlb.h117
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h13
-rw-r--r--arch/arm64/include/asm/pgtable.h96
-rw-r--r--arch/arm64/include/asm/tlb.h6
-rw-r--r--arch/arm64/include/asm/tlbflush.h2
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/fault.c19
-rw-r--r--arch/arm64/mm/hugetlbpage.c70
-rw-r--r--arch/arm64/mm/mmu.c19
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/mm/hugetlbpage.c187
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/hugetlb.h4
-rw-r--r--mm/hugetlb.c219
15 files changed, 543 insertions, 235 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1fac4e5a1c43..4143d9b0d87a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -188,6 +188,18 @@ config HW_PERF_EVENTS
188 Enable hardware performance counter support for perf events. If 188 Enable hardware performance counter support for perf events. If
189 disabled, perf events will use software events only. 189 disabled, perf events will use software events only.
190 190
191config SYS_SUPPORTS_HUGETLBFS
192 def_bool y
193
194config ARCH_WANT_GENERAL_HUGETLB
195 def_bool y
196
197config ARCH_WANT_HUGE_PMD_SHARE
198 def_bool y if !ARM64_64K_PAGES
199
200config HAVE_ARCH_TRANSPARENT_HUGEPAGE
201 def_bool y
202
191source "mm/Kconfig" 203source "mm/Kconfig"
192 204
193config XEN_DOM0 205config XEN_DOM0
@@ -200,6 +212,11 @@ config XEN
200 help 212 help
201 Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. 213 Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
202 214
215config FORCE_MAX_ZONEORDER
216 int
217 default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
218 default "11"
219
203endmenu 220endmenu
204 221
205menu "Boot options" 222menu "Boot options"
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
new file mode 100644
index 000000000000..5b7ca8ace95f
--- /dev/null
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -0,0 +1,117 @@
1/*
2 * arch/arm64/include/asm/hugetlb.h
3 *
4 * Copyright (C) 2013 Linaro Ltd.
5 *
6 * Based on arch/x86/include/asm/hugetlb.h
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#ifndef __ASM_HUGETLB_H
23#define __ASM_HUGETLB_H
24
25#include <asm-generic/hugetlb.h>
26#include <asm/page.h>
27
28static inline pte_t huge_ptep_get(pte_t *ptep)
29{
30 return *ptep;
31}
32
33static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
34 pte_t *ptep, pte_t pte)
35{
36 set_pte_at(mm, addr, ptep, pte);
37}
38
39static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
40 unsigned long addr, pte_t *ptep)
41{
42 ptep_clear_flush(vma, addr, ptep);
43}
44
45static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
46 unsigned long addr, pte_t *ptep)
47{
48 ptep_set_wrprotect(mm, addr, ptep);
49}
50
51static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
52 unsigned long addr, pte_t *ptep)
53{
54 return ptep_get_and_clear(mm, addr, ptep);
55}
56
57static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
58 unsigned long addr, pte_t *ptep,
59 pte_t pte, int dirty)
60{
61 return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
62}
63
64static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
65 unsigned long addr, unsigned long end,
66 unsigned long floor,
67 unsigned long ceiling)
68{
69 free_pgd_range(tlb, addr, end, floor, ceiling);
70}
71
72static inline int is_hugepage_only_range(struct mm_struct *mm,
73 unsigned long addr, unsigned long len)
74{
75 return 0;
76}
77
78static inline int prepare_hugepage_range(struct file *file,
79 unsigned long addr, unsigned long len)
80{
81 struct hstate *h = hstate_file(file);
82 if (len & ~huge_page_mask(h))
83 return -EINVAL;
84 if (addr & ~huge_page_mask(h))
85 return -EINVAL;
86 return 0;
87}
88
89static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
90{
91}
92
93static inline int huge_pte_none(pte_t pte)
94{
95 return pte_none(pte);
96}
97
98static inline pte_t huge_pte_wrprotect(pte_t pte)
99{
100 return pte_wrprotect(pte);
101}
102
103static inline int arch_prepare_hugepage(struct page *page)
104{
105 return 0;
106}
107
108static inline void arch_release_hugepage(struct page *page)
109{
110}
111
112static inline void arch_clear_hugepage_flags(struct page *page)
113{
114 clear_bit(PG_dcache_clean, &page->flags);
115}
116
117#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 66367c6c6527..e182a356c979 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -25,17 +25,27 @@
25/* 25/*
26 * Hardware page table definitions. 26 * Hardware page table definitions.
27 * 27 *
28 * Level 1 descriptor (PUD).
29 */
30
31#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
32
33/*
28 * Level 2 descriptor (PMD). 34 * Level 2 descriptor (PMD).
29 */ 35 */
30#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) 36#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
31#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) 37#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
32#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) 38#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
33#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) 39#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
40#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
34 41
35/* 42/*
36 * Section 43 * Section
37 */ 44 */
38#define PMD_SECT_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ 45#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
46#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2)
47#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
48#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
39#define PMD_SECT_S (_AT(pmdval_t, 3) << 8) 49#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
40#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) 50#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
41#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) 51#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
@@ -54,6 +64,7 @@
54#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) 64#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
55#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) 65#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
56#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) 66#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
67#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
57#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ 68#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
58#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ 69#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
59#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ 70#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5588e8ad9762..065e58f01b1e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -25,8 +25,8 @@
25 * Software defined PTE bits definition. 25 * Software defined PTE bits definition.
26 */ 26 */
27#define PTE_VALID (_AT(pteval_t, 1) << 0) 27#define PTE_VALID (_AT(pteval_t, 1) << 0)
28#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ 28#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */
29#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ 29#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */
30#define PTE_DIRTY (_AT(pteval_t, 1) << 55) 30#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
31#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) 31#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
32 32
@@ -66,7 +66,7 @@ extern pgprot_t pgprot_default;
66 66
67#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) 67#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
68 68
69#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) 69#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
70#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) 70#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
71#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) 71#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
72#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) 72#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
@@ -82,7 +82,7 @@ extern pgprot_t pgprot_default;
82#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) 82#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
83#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) 83#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
84 84
85#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) 85#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
86#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) 86#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
87#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) 87#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
88#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) 88#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
@@ -179,12 +179,76 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
179/* 179/*
180 * Huge pte definitions. 180 * Huge pte definitions.
181 */ 181 */
182#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) 182#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
183#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) 183#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
184
185/*
186 * Hugetlb definitions.
187 */
188#define HUGE_MAX_HSTATE 2
189#define HPAGE_SHIFT PMD_SHIFT
190#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
191#define HPAGE_MASK (~(HPAGE_SIZE - 1))
192#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
184 193
185#define __HAVE_ARCH_PTE_SPECIAL 194#define __HAVE_ARCH_PTE_SPECIAL
186 195
187/* 196/*
197 * Software PMD bits for THP
198 */
199
200#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
201#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57)
202
203/*
204 * THP definitions.
205 */
206#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
207
208#define __HAVE_ARCH_PMD_WRITE
209#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
210
211#ifdef CONFIG_TRANSPARENT_HUGEPAGE
212#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
213#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
214#endif
215
216#define PMD_BIT_FUNC(fn,op) \
217static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
218
219PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
220PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
221PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
222PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
223PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
224PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
225PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
226
227#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
228
229#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
230#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
231#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
232
233#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
234
235static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
236{
237 const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
238 PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
239 PMD_SECT_VALID;
240 pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
241 return pmd;
242}
243
244#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd)
245
246static inline int has_transparent_hugepage(void)
247{
248 return 1;
249}
250
251/*
188 * Mark the prot value as uncacheable and unbufferable. 252 * Mark the prot value as uncacheable and unbufferable.
189 */ 253 */
190#define pgprot_noncached(prot) \ 254#define pgprot_noncached(prot) \
@@ -293,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
293 357
294/* 358/*
295 * Encode and decode a swap entry: 359 * Encode and decode a swap entry:
296 * bits 0-1: present (must be zero) 360 * bits 0, 2: present (must both be zero)
297 * bit 2: PTE_FILE 361 * bit 3: PTE_FILE
298 * bits 3-8: swap type 362 * bits 4-8: swap type
299 * bits 9-63: swap offset 363 * bits 9-63: swap offset
300 */ 364 */
301#define __SWP_TYPE_SHIFT 3 365#define __SWP_TYPE_SHIFT 4
302#define __SWP_TYPE_BITS 6 366#define __SWP_TYPE_BITS 6
303#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) 367#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
304#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) 368#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -318,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
318 382
319/* 383/*
320 * Encode and decode a file entry: 384 * Encode and decode a file entry:
321 * bits 0-1: present (must be zero) 385 * bits 0, 2: present (must both be zero)
322 * bit 2: PTE_FILE 386 * bit 3: PTE_FILE
323 * bits 3-63: file offset / PAGE_SIZE 387 * bits 4-63: file offset / PAGE_SIZE
324 */ 388 */
325#define pte_file(pte) (pte_val(pte) & PTE_FILE) 389#define pte_file(pte) (pte_val(pte) & PTE_FILE)
326#define pte_to_pgoff(x) (pte_val(x) >> 3) 390#define pte_to_pgoff(x) (pte_val(x) >> 4)
327#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) 391#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE)
328 392
329#define PTE_FILE_MAX_BITS 61 393#define PTE_FILE_MAX_BITS 60
330 394
331extern int kern_addr_valid(unsigned long addr); 395extern int kern_addr_valid(unsigned long addr);
332 396
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 654f0968030b..46b3beb4b773 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
187 187
188#define tlb_migrate_finish(mm) do { } while (0) 188#define tlb_migrate_finish(mm) do { } while (0)
189 189
190static inline void
191tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
192{
193 tlb_add_flush(tlb, addr);
194}
195
190#endif 196#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 122d6320f745..8b482035cfc2 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
117 dsb(); 117 dsb();
118} 118}
119 119
120#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
121
120#endif 122#endif
121 123
122#endif 124#endif
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 3140a2abcdc2..b51d36401d83 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
2 cache.o copypage.o flush.o \ 2 cache.o copypage.o flush.o \
3 ioremap.o mmap.o pgd.o mmu.o \ 3 ioremap.o mmap.o pgd.o mmu.o \
4 context.o tlb.o proc.o 4 context.o tlb.o proc.o
5obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1426468b77f3..0ecac8980aae 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -365,17 +365,6 @@ static int __kprobes do_translation_fault(unsigned long addr,
365} 365}
366 366
367/* 367/*
368 * Some section permission faults need to be handled gracefully. They can
369 * happen due to a __{get,put}_user during an oops.
370 */
371static int do_sect_fault(unsigned long addr, unsigned int esr,
372 struct pt_regs *regs)
373{
374 do_bad_area(addr, esr, regs);
375 return 0;
376}
377
378/*
379 * This abort handler always returns "fault". 368 * This abort handler always returns "fault".
380 */ 369 */
381static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) 370static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
@@ -398,12 +387,12 @@ static struct fault_info {
398 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 387 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
399 { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 388 { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
400 { do_bad, SIGBUS, 0, "reserved access flag fault" }, 389 { do_bad, SIGBUS, 0, "reserved access flag fault" },
401 { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 390 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
402 { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, 391 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
403 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, 392 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
404 { do_bad, SIGBUS, 0, "reserved permission fault" }, 393 { do_bad, SIGBUS, 0, "reserved permission fault" },
405 { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, 394 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
406 { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, 395 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
407 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, 396 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
408 { do_bad, SIGBUS, 0, "synchronous external abort" }, 397 { do_bad, SIGBUS, 0, "synchronous external abort" },
409 { do_bad, SIGBUS, 0, "asynchronous external abort" }, 398 { do_bad, SIGBUS, 0, "asynchronous external abort" },
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
new file mode 100644
index 000000000000..2fc8258bab2d
--- /dev/null
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -0,0 +1,70 @@
1/*
2 * arch/arm64/mm/hugetlbpage.c
3 *
4 * Copyright (C) 2013 Linaro Ltd.
5 *
6 * Based on arch/x86/mm/hugetlbpage.c.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/init.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/hugetlb.h>
26#include <linux/pagemap.h>
27#include <linux/err.h>
28#include <linux/sysctl.h>
29#include <asm/mman.h>
30#include <asm/tlb.h>
31#include <asm/tlbflush.h>
32#include <asm/pgalloc.h>
33
34#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
35int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
36{
37 return 0;
38}
39#endif
40
41struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
42 int write)
43{
44 return ERR_PTR(-EINVAL);
45}
46
47int pmd_huge(pmd_t pmd)
48{
49 return !(pmd_val(pmd) & PMD_TABLE_BIT);
50}
51
52int pud_huge(pud_t pud)
53{
54 return !(pud_val(pud) & PUD_TABLE_BIT);
55}
56
57static __init int setup_hugepagesz(char *opt)
58{
59 unsigned long ps = memparse(opt, &opt);
60 if (ps == PMD_SIZE) {
61 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
62 } else if (ps == PUD_SIZE) {
63 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
64 } else {
65 pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
66 return 0;
67 }
68 return 1;
69}
70__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 80a369eab637..a8d1059b91b2 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -297,6 +297,16 @@ static void __init map_mem(void)
297{ 297{
298 struct memblock_region *reg; 298 struct memblock_region *reg;
299 299
300 /*
301 * Temporarily limit the memblock range. We need to do this as
302 * create_mapping requires puds, pmds and ptes to be allocated from
303 * memory addressable from the initial direct kernel mapping.
304 *
305 * The initial direct kernel mapping, located at swapper_pg_dir,
306 * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned).
307 */
308 memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
309
300 /* map all the memory banks */ 310 /* map all the memory banks */
301 for_each_memblock(memory, reg) { 311 for_each_memblock(memory, reg) {
302 phys_addr_t start = reg->base; 312 phys_addr_t start = reg->base;
@@ -307,6 +317,9 @@ static void __init map_mem(void)
307 317
308 create_mapping(start, __phys_to_virt(start), end - start); 318 create_mapping(start, __phys_to_virt(start), end - start);
309 } 319 }
320
321 /* Limit no longer required. */
322 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
310} 323}
311 324
312/* 325/*
@@ -317,12 +330,6 @@ void __init paging_init(void)
317{ 330{
318 void *zero_page; 331 void *zero_page;
319 332
320 /*
321 * Maximum PGDIR_SIZE addressable via the initial direct kernel
322 * mapping in swapper_pg_dir.
323 */
324 memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
325
326 init_mem_pgprot(); 333 init_mem_pgprot();
327 map_mem(); 334 map_mem();
328 335
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 685692c94f05..191c4e34722d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE
207config ARCH_SUSPEND_POSSIBLE 207config ARCH_SUSPEND_POSSIBLE
208 def_bool y 208 def_bool y
209 209
210config ARCH_WANT_HUGE_PMD_SHARE
211 def_bool y
212
213config ARCH_WANT_GENERAL_HUGETLB
214 def_bool y
215
210config ZONE_DMA32 216config ZONE_DMA32
211 bool 217 bool
212 default X86_64 218 default X86_64
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index ae1aa71d0115..7e73e8c69096 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -16,169 +16,6 @@
16#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
17#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
18 18
19static unsigned long page_table_shareable(struct vm_area_struct *svma,
20 struct vm_area_struct *vma,
21 unsigned long addr, pgoff_t idx)
22{
23 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
24 svma->vm_start;
25 unsigned long sbase = saddr & PUD_MASK;
26 unsigned long s_end = sbase + PUD_SIZE;
27
28 /* Allow segments to share if only one is marked locked */
29 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
30 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
31
32 /*
33 * match the virtual addresses, permission and the alignment of the
34 * page table page.
35 */
36 if (pmd_index(addr) != pmd_index(saddr) ||
37 vm_flags != svm_flags ||
38 sbase < svma->vm_start || svma->vm_end < s_end)
39 return 0;
40
41 return saddr;
42}
43
44static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
45{
46 unsigned long base = addr & PUD_MASK;
47 unsigned long end = base + PUD_SIZE;
48
49 /*
50 * check on proper vm_flags and page table alignment
51 */
52 if (vma->vm_flags & VM_MAYSHARE &&
53 vma->vm_start <= base && end <= vma->vm_end)
54 return 1;
55 return 0;
56}
57
58/*
59 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
60 * and returns the corresponding pte. While this is not necessary for the
61 * !shared pmd case because we can allocate the pmd later as well, it makes the
62 * code much cleaner. pmd allocation is essential for the shared case because
63 * pud has to be populated inside the same i_mmap_mutex section - otherwise
64 * racing tasks could either miss the sharing (see huge_pte_offset) or select a
65 * bad pmd for sharing.
66 */
67static pte_t *
68huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
69{
70 struct vm_area_struct *vma = find_vma(mm, addr);
71 struct address_space *mapping = vma->vm_file->f_mapping;
72 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
73 vma->vm_pgoff;
74 struct vm_area_struct *svma;
75 unsigned long saddr;
76 pte_t *spte = NULL;
77 pte_t *pte;
78
79 if (!vma_shareable(vma, addr))
80 return (pte_t *)pmd_alloc(mm, pud, addr);
81
82 mutex_lock(&mapping->i_mmap_mutex);
83 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
84 if (svma == vma)
85 continue;
86
87 saddr = page_table_shareable(svma, vma, addr, idx);
88 if (saddr) {
89 spte = huge_pte_offset(svma->vm_mm, saddr);
90 if (spte) {
91 get_page(virt_to_page(spte));
92 break;
93 }
94 }
95 }
96
97 if (!spte)
98 goto out;
99
100 spin_lock(&mm->page_table_lock);
101 if (pud_none(*pud))
102 pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
103 else
104 put_page(virt_to_page(spte));
105 spin_unlock(&mm->page_table_lock);
106out:
107 pte = (pte_t *)pmd_alloc(mm, pud, addr);
108 mutex_unlock(&mapping->i_mmap_mutex);
109 return pte;
110}
111
112/*
113 * unmap huge page backed by shared pte.
114 *
115 * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
116 * indicated by page_count > 1, unmap is achieved by clearing pud and
117 * decrementing the ref count. If count == 1, the pte page is not shared.
118 *
119 * called with vma->vm_mm->page_table_lock held.
120 *
121 * returns: 1 successfully unmapped a shared pte page
122 * 0 the underlying pte page is not shared, or it is the last user
123 */
124int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
125{
126 pgd_t *pgd = pgd_offset(mm, *addr);
127 pud_t *pud = pud_offset(pgd, *addr);
128
129 BUG_ON(page_count(virt_to_page(ptep)) == 0);
130 if (page_count(virt_to_page(ptep)) == 1)
131 return 0;
132
133 pud_clear(pud);
134 put_page(virt_to_page(ptep));
135 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
136 return 1;
137}
138
139pte_t *huge_pte_alloc(struct mm_struct *mm,
140 unsigned long addr, unsigned long sz)
141{
142 pgd_t *pgd;
143 pud_t *pud;
144 pte_t *pte = NULL;
145
146 pgd = pgd_offset(mm, addr);
147 pud = pud_alloc(mm, pgd, addr);
148 if (pud) {
149 if (sz == PUD_SIZE) {
150 pte = (pte_t *)pud;
151 } else {
152 BUG_ON(sz != PMD_SIZE);
153 if (pud_none(*pud))
154 pte = huge_pmd_share(mm, addr, pud);
155 else
156 pte = (pte_t *)pmd_alloc(mm, pud, addr);
157 }
158 }
159 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
160
161 return pte;
162}
163
164pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
165{
166 pgd_t *pgd;
167 pud_t *pud;
168 pmd_t *pmd = NULL;
169
170 pgd = pgd_offset(mm, addr);
171 if (pgd_present(*pgd)) {
172 pud = pud_offset(pgd, addr);
173 if (pud_present(*pud)) {
174 if (pud_large(*pud))
175 return (pte_t *)pud;
176 pmd = pmd_offset(pud, addr);
177 }
178 }
179 return (pte_t *) pmd;
180}
181
182#if 0 /* This is just for testing */ 19#if 0 /* This is just for testing */
183struct page * 20struct page *
184follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 21follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
240 return !!(pud_val(pud) & _PAGE_PSE); 77 return !!(pud_val(pud) & _PAGE_PSE);
241} 78}
242 79
243struct page *
244follow_huge_pmd(struct mm_struct *mm, unsigned long address,
245 pmd_t *pmd, int write)
246{
247 struct page *page;
248
249 page = pte_page(*(pte_t *)pmd);
250 if (page)
251 page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
252 return page;
253}
254
255struct page *
256follow_huge_pud(struct mm_struct *mm, unsigned long address,
257 pud_t *pud, int write)
258{
259 struct page *page;
260
261 page = pte_page(*(pte_t *)pud);
262 if (page)
263 page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
264 return page;
265}
266
267#endif 80#endif
268 81
269/* x86_64 also uses this file */ 82/* x86_64 also uses this file */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 528454c2caa9..26ee56c80dc7 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
123 } while (0) 123 } while (0)
124extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, 124extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
125 pmd_t *pmd); 125 pmd_t *pmd);
126#if HPAGE_PMD_ORDER > MAX_ORDER 126#if HPAGE_PMD_ORDER >= MAX_ORDER
127#error "hugepages can't be allocated by the buddy allocator" 127#error "hugepages can't be allocated by the buddy allocator"
128#endif 128#endif
129extern int hugepage_madvise(struct vm_area_struct *vma, 129extern int hugepage_madvise(struct vm_area_struct *vma,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b4890fa57e7..981546ad231c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
69int dequeue_hwpoisoned_huge_page(struct page *page); 69int dequeue_hwpoisoned_huge_page(struct page *page);
70void copy_huge_page(struct page *dst, struct page *src); 70void copy_huge_page(struct page *dst, struct page *src);
71 71
72#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
73pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
74#endif
75
72extern unsigned long hugepages_treat_as_movable; 76extern unsigned long hugepages_treat_as_movable;
73extern const unsigned long hugetlb_zero, hugetlb_infinity; 77extern const unsigned long hugetlb_zero, hugetlb_infinity;
74extern int sysctl_hugetlb_shm_group; 78extern int sysctl_hugetlb_shm_group;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f8feeeca6686..63217261fd14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2931,15 +2931,6 @@ out_mutex:
2931 return ret; 2931 return ret;
2932} 2932}
2933 2933
2934/* Can be overriden by architectures */
2935__attribute__((weak)) struct page *
2936follow_huge_pud(struct mm_struct *mm, unsigned long address,
2937 pud_t *pud, int write)
2938{
2939 BUG();
2940 return NULL;
2941}
2942
2943long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 2934long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2944 struct page **pages, struct vm_area_struct **vmas, 2935 struct page **pages, struct vm_area_struct **vmas,
2945 unsigned long *position, unsigned long *nr_pages, 2936 unsigned long *position, unsigned long *nr_pages,
@@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
3169 hugetlb_acct_memory(h, -(chg - freed)); 3160 hugetlb_acct_memory(h, -(chg - freed));
3170} 3161}
3171 3162
3163#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
3164static unsigned long page_table_shareable(struct vm_area_struct *svma,
3165 struct vm_area_struct *vma,
3166 unsigned long addr, pgoff_t idx)
3167{
3168 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
3169 svma->vm_start;
3170 unsigned long sbase = saddr & PUD_MASK;
3171 unsigned long s_end = sbase + PUD_SIZE;
3172
3173 /* Allow segments to share if only one is marked locked */
3174 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
3175 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
3176
3177 /*
3178 * match the virtual addresses, permission and the alignment of the
3179 * page table page.
3180 */
3181 if (pmd_index(addr) != pmd_index(saddr) ||
3182 vm_flags != svm_flags ||
3183 sbase < svma->vm_start || svma->vm_end < s_end)
3184 return 0;
3185
3186 return saddr;
3187}
3188
3189static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
3190{
3191 unsigned long base = addr & PUD_MASK;
3192 unsigned long end = base + PUD_SIZE;
3193
3194 /*
3195 * check on proper vm_flags and page table alignment
3196 */
3197 if (vma->vm_flags & VM_MAYSHARE &&
3198 vma->vm_start <= base && end <= vma->vm_end)
3199 return 1;
3200 return 0;
3201}
3202
3203/*
3204 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
3205 * and returns the corresponding pte. While this is not necessary for the
3206 * !shared pmd case because we can allocate the pmd later as well, it makes the
3207 * code much cleaner. pmd allocation is essential for the shared case because
3208 * pud has to be populated inside the same i_mmap_mutex section - otherwise
3209 * racing tasks could either miss the sharing (see huge_pte_offset) or select a
3210 * bad pmd for sharing.
3211 */
3212pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3213{
3214 struct vm_area_struct *vma = find_vma(mm, addr);
3215 struct address_space *mapping = vma->vm_file->f_mapping;
3216 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
3217 vma->vm_pgoff;
3218 struct vm_area_struct *svma;
3219 unsigned long saddr;
3220 pte_t *spte = NULL;
3221 pte_t *pte;
3222
3223 if (!vma_shareable(vma, addr))
3224 return (pte_t *)pmd_alloc(mm, pud, addr);
3225
3226 mutex_lock(&mapping->i_mmap_mutex);
3227 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
3228 if (svma == vma)
3229 continue;
3230
3231 saddr = page_table_shareable(svma, vma, addr, idx);
3232 if (saddr) {
3233 spte = huge_pte_offset(svma->vm_mm, saddr);
3234 if (spte) {
3235 get_page(virt_to_page(spte));
3236 break;
3237 }
3238 }
3239 }
3240
3241 if (!spte)
3242 goto out;
3243
3244 spin_lock(&mm->page_table_lock);
3245 if (pud_none(*pud))
3246 pud_populate(mm, pud,
3247 (pmd_t *)((unsigned long)spte & PAGE_MASK));
3248 else
3249 put_page(virt_to_page(spte));
3250 spin_unlock(&mm->page_table_lock);
3251out:
3252 pte = (pte_t *)pmd_alloc(mm, pud, addr);
3253 mutex_unlock(&mapping->i_mmap_mutex);
3254 return pte;
3255}
3256
3257/*
3258 * unmap huge page backed by shared pte.
3259 *
3260 * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
3261 * indicated by page_count > 1, unmap is achieved by clearing pud and
3262 * decrementing the ref count. If count == 1, the pte page is not shared.
3263 *
3264 * called with vma->vm_mm->page_table_lock held.
3265 *
3266 * returns: 1 successfully unmapped a shared pte page
3267 * 0 the underlying pte page is not shared, or it is the last user
3268 */
3269int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
3270{
3271 pgd_t *pgd = pgd_offset(mm, *addr);
3272 pud_t *pud = pud_offset(pgd, *addr);
3273
3274 BUG_ON(page_count(virt_to_page(ptep)) == 0);
3275 if (page_count(virt_to_page(ptep)) == 1)
3276 return 0;
3277
3278 pud_clear(pud);
3279 put_page(virt_to_page(ptep));
3280 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
3281 return 1;
3282}
3283#define want_pmd_share() (1)
3284#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
3285pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3286{
3287 return NULL;
3288}
3289#define want_pmd_share() (0)
3290#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
3291
3292#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
3293pte_t *huge_pte_alloc(struct mm_struct *mm,
3294 unsigned long addr, unsigned long sz)
3295{
3296 pgd_t *pgd;
3297 pud_t *pud;
3298 pte_t *pte = NULL;
3299
3300 pgd = pgd_offset(mm, addr);
3301 pud = pud_alloc(mm, pgd, addr);
3302 if (pud) {
3303 if (sz == PUD_SIZE) {
3304 pte = (pte_t *)pud;
3305 } else {
3306 BUG_ON(sz != PMD_SIZE);
3307 if (want_pmd_share() && pud_none(*pud))
3308 pte = huge_pmd_share(mm, addr, pud);
3309 else
3310 pte = (pte_t *)pmd_alloc(mm, pud, addr);
3311 }
3312 }
3313 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
3314
3315 return pte;
3316}
3317
3318pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
3319{
3320 pgd_t *pgd;
3321 pud_t *pud;
3322 pmd_t *pmd = NULL;
3323
3324 pgd = pgd_offset(mm, addr);
3325 if (pgd_present(*pgd)) {
3326 pud = pud_offset(pgd, addr);
3327 if (pud_present(*pud)) {
3328 if (pud_huge(*pud))
3329 return (pte_t *)pud;
3330 pmd = pmd_offset(pud, addr);
3331 }
3332 }
3333 return (pte_t *) pmd;
3334}
3335
3336struct page *
3337follow_huge_pmd(struct mm_struct *mm, unsigned long address,
3338 pmd_t *pmd, int write)
3339{
3340 struct page *page;
3341
3342 page = pte_page(*(pte_t *)pmd);
3343 if (page)
3344 page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
3345 return page;
3346}
3347
3348struct page *
3349follow_huge_pud(struct mm_struct *mm, unsigned long address,
3350 pud_t *pud, int write)
3351{
3352 struct page *page;
3353
3354 page = pte_page(*(pte_t *)pud);
3355 if (page)
3356 page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
3357 return page;
3358}
3359
3360#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
3361
3362/* Can be overriden by architectures */
3363__attribute__((weak)) struct page *
3364follow_huge_pud(struct mm_struct *mm, unsigned long address,
3365 pud_t *pud, int write)
3366{
3367 BUG();
3368 return NULL;
3369}
3370
3371#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
3372
3172#ifdef CONFIG_MEMORY_FAILURE 3373#ifdef CONFIG_MEMORY_FAILURE
3173 3374
3174/* Should be called in hugetlb_lock */ 3375/* Should be called in hugetlb_lock */