diff options
| author | Catalin Marinas <catalin.marinas@arm.com> | 2013-07-01 06:20:58 -0400 |
|---|---|---|
| committer | Catalin Marinas <catalin.marinas@arm.com> | 2013-07-01 06:20:58 -0400 |
| commit | aa729dccb5e8dfbc78e2e235b8754d6acccee731 (patch) | |
| tree | f6123726a25957481e2528b9b6b0d0cfd992a5fb | |
| parent | ee877b5321c4dfee9dc9f2a12b19ddcd33149f6a (diff) | |
| parent | af07484863e0c20796081e57093886c22dc16705 (diff) | |
Merge branch 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux into upstream-hugepages
* 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux:
ARM64: mm: THP support.
ARM64: mm: Raise MAX_ORDER for 64KB pages and THP.
ARM64: mm: HugeTLB support.
ARM64: mm: Move PTE_PROT_NONE bit.
ARM64: mm: Make PAGE_NONE pages read only and no-execute.
ARM64: mm: Restore memblock limit when map_mem finished.
mm: thp: Correct the HPAGE_PMD_ORDER check.
x86: mm: Remove general hugetlb code from x86.
mm: hugetlb: Copy general hugetlb code from x86 to mm.
x86: mm: Remove x86 version of huge_pmd_share.
mm: hugetlb: Copy huge_pmd_share from x86 to mm.
Conflicts:
arch/arm64/Kconfig
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
| -rw-r--r-- | arch/arm64/Kconfig | 17 | ||||
| -rw-r--r-- | arch/arm64/include/asm/hugetlb.h | 117 | ||||
| -rw-r--r-- | arch/arm64/include/asm/pgtable-hwdef.h | 13 | ||||
| -rw-r--r-- | arch/arm64/include/asm/pgtable.h | 96 | ||||
| -rw-r--r-- | arch/arm64/include/asm/tlb.h | 6 | ||||
| -rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 2 | ||||
| -rw-r--r-- | arch/arm64/mm/Makefile | 1 | ||||
| -rw-r--r-- | arch/arm64/mm/fault.c | 19 | ||||
| -rw-r--r-- | arch/arm64/mm/hugetlbpage.c | 70 | ||||
| -rw-r--r-- | arch/arm64/mm/mmu.c | 19 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 6 | ||||
| -rw-r--r-- | arch/x86/mm/hugetlbpage.c | 187 | ||||
| -rw-r--r-- | include/linux/huge_mm.h | 2 | ||||
| -rw-r--r-- | include/linux/hugetlb.h | 4 | ||||
| -rw-r--r-- | mm/hugetlb.c | 219 |
15 files changed, 543 insertions, 235 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1fac4e5a1c43..4143d9b0d87a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
| @@ -188,6 +188,18 @@ config HW_PERF_EVENTS | |||
| 188 | Enable hardware performance counter support for perf events. If | 188 | Enable hardware performance counter support for perf events. If |
| 189 | disabled, perf events will use software events only. | 189 | disabled, perf events will use software events only. |
| 190 | 190 | ||
| 191 | config SYS_SUPPORTS_HUGETLBFS | ||
| 192 | def_bool y | ||
| 193 | |||
| 194 | config ARCH_WANT_GENERAL_HUGETLB | ||
| 195 | def_bool y | ||
| 196 | |||
| 197 | config ARCH_WANT_HUGE_PMD_SHARE | ||
| 198 | def_bool y if !ARM64_64K_PAGES | ||
| 199 | |||
| 200 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE | ||
| 201 | def_bool y | ||
| 202 | |||
| 191 | source "mm/Kconfig" | 203 | source "mm/Kconfig" |
| 192 | 204 | ||
| 193 | config XEN_DOM0 | 205 | config XEN_DOM0 |
| @@ -200,6 +212,11 @@ config XEN | |||
| 200 | help | 212 | help |
| 201 | Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. | 213 | Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. |
| 202 | 214 | ||
| 215 | config FORCE_MAX_ZONEORDER | ||
| 216 | int | ||
| 217 | default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) | ||
| 218 | default "11" | ||
| 219 | |||
| 203 | endmenu | 220 | endmenu |
| 204 | 221 | ||
| 205 | menu "Boot options" | 222 | menu "Boot options" |
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h | |||
| @@ -0,0 +1,117 @@ | |||
| 1 | /* | ||
| 2 | * arch/arm64/include/asm/hugetlb.h | ||
| 3 | * | ||
| 4 | * Copyright (C) 2013 Linaro Ltd. | ||
| 5 | * | ||
| 6 | * Based on arch/x86/include/asm/hugetlb.h | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2 as | ||
| 10 | * published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #ifndef __ASM_HUGETLB_H | ||
| 23 | #define __ASM_HUGETLB_H | ||
| 24 | |||
| 25 | #include <asm-generic/hugetlb.h> | ||
| 26 | #include <asm/page.h> | ||
| 27 | |||
| 28 | static inline pte_t huge_ptep_get(pte_t *ptep) | ||
| 29 | { | ||
| 30 | return *ptep; | ||
| 31 | } | ||
| 32 | |||
| 33 | static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
| 34 | pte_t *ptep, pte_t pte) | ||
| 35 | { | ||
| 36 | set_pte_at(mm, addr, ptep, pte); | ||
| 37 | } | ||
| 38 | |||
| 39 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, | ||
| 40 | unsigned long addr, pte_t *ptep) | ||
| 41 | { | ||
| 42 | ptep_clear_flush(vma, addr, ptep); | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, | ||
| 46 | unsigned long addr, pte_t *ptep) | ||
| 47 | { | ||
| 48 | ptep_set_wrprotect(mm, addr, ptep); | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | ||
| 52 | unsigned long addr, pte_t *ptep) | ||
| 53 | { | ||
| 54 | return ptep_get_and_clear(mm, addr, ptep); | ||
| 55 | } | ||
| 56 | |||
| 57 | static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, | ||
| 58 | unsigned long addr, pte_t *ptep, | ||
| 59 | pte_t pte, int dirty) | ||
| 60 | { | ||
| 61 | return ptep_set_access_flags(vma, addr, ptep, pte, dirty); | ||
| 62 | } | ||
| 63 | |||
| 64 | static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, | ||
| 65 | unsigned long addr, unsigned long end, | ||
| 66 | unsigned long floor, | ||
| 67 | unsigned long ceiling) | ||
| 68 | { | ||
| 69 | free_pgd_range(tlb, addr, end, floor, ceiling); | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline int is_hugepage_only_range(struct mm_struct *mm, | ||
| 73 | unsigned long addr, unsigned long len) | ||
| 74 | { | ||
| 75 | return 0; | ||
| 76 | } | ||
| 77 | |||
| 78 | static inline int prepare_hugepage_range(struct file *file, | ||
| 79 | unsigned long addr, unsigned long len) | ||
| 80 | { | ||
| 81 | struct hstate *h = hstate_file(file); | ||
| 82 | if (len & ~huge_page_mask(h)) | ||
| 83 | return -EINVAL; | ||
| 84 | if (addr & ~huge_page_mask(h)) | ||
| 85 | return -EINVAL; | ||
| 86 | return 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) | ||
| 90 | { | ||
| 91 | } | ||
| 92 | |||
| 93 | static inline int huge_pte_none(pte_t pte) | ||
| 94 | { | ||
| 95 | return pte_none(pte); | ||
| 96 | } | ||
| 97 | |||
| 98 | static inline pte_t huge_pte_wrprotect(pte_t pte) | ||
| 99 | { | ||
| 100 | return pte_wrprotect(pte); | ||
| 101 | } | ||
| 102 | |||
| 103 | static inline int arch_prepare_hugepage(struct page *page) | ||
| 104 | { | ||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void arch_release_hugepage(struct page *page) | ||
| 109 | { | ||
| 110 | } | ||
| 111 | |||
| 112 | static inline void arch_clear_hugepage_flags(struct page *page) | ||
| 113 | { | ||
| 114 | clear_bit(PG_dcache_clean, &page->flags); | ||
| 115 | } | ||
| 116 | |||
| 117 | #endif /* __ASM_HUGETLB_H */ | ||
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 66367c6c6527..e182a356c979 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h | |||
| @@ -25,17 +25,27 @@ | |||
| 25 | /* | 25 | /* |
| 26 | * Hardware page table definitions. | 26 | * Hardware page table definitions. |
| 27 | * | 27 | * |
| 28 | * Level 1 descriptor (PUD). | ||
| 29 | */ | ||
| 30 | |||
| 31 | #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) | ||
| 32 | |||
| 33 | /* | ||
| 28 | * Level 2 descriptor (PMD). | 34 | * Level 2 descriptor (PMD). |
| 29 | */ | 35 | */ |
| 30 | #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) | 36 | #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) |
| 31 | #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) | 37 | #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) |
| 32 | #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) | 38 | #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) |
| 33 | #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) | 39 | #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) |
| 40 | #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) | ||
| 34 | 41 | ||
| 35 | /* | 42 | /* |
| 36 | * Section | 43 | * Section |
| 37 | */ | 44 | */ |
| 38 | #define PMD_SECT_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ | 45 | #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) |
| 46 | #define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) | ||
| 47 | #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ | ||
| 48 | #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ | ||
| 39 | #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) | 49 | #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) |
| 40 | #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) | 50 | #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) |
| 41 | #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) | 51 | #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) |
| @@ -54,6 +64,7 @@ | |||
| 54 | #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) | 64 | #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) |
| 55 | #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) | 65 | #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) |
| 56 | #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) | 66 | #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) |
| 67 | #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) | ||
| 57 | #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ | 68 | #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ |
| 58 | #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ | 69 | #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ |
| 59 | #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ | 70 | #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ |
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5588e8ad9762..065e58f01b1e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h | |||
| @@ -25,8 +25,8 @@ | |||
| 25 | * Software defined PTE bits definition. | 25 | * Software defined PTE bits definition. |
| 26 | */ | 26 | */ |
| 27 | #define PTE_VALID (_AT(pteval_t, 1) << 0) | 27 | #define PTE_VALID (_AT(pteval_t, 1) << 0) |
| 28 | #define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ | 28 | #define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ |
| 29 | #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ | 29 | #define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ |
| 30 | #define PTE_DIRTY (_AT(pteval_t, 1) << 55) | 30 | #define PTE_DIRTY (_AT(pteval_t, 1) << 55) |
| 31 | #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) | 31 | #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) |
| 32 | 32 | ||
| @@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; | |||
| 66 | 66 | ||
| 67 | #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) | 67 | #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) |
| 68 | 68 | ||
| 69 | #define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) | 69 | #define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) |
| 70 | #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) | 70 | #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) |
| 71 | #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) | 71 | #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) |
| 72 | #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) | 72 | #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) |
| @@ -82,7 +82,7 @@ extern pgprot_t pgprot_default; | |||
| 82 | #define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) | 82 | #define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) |
| 83 | #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) | 83 | #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) |
| 84 | 84 | ||
| 85 | #define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) | 85 | #define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) |
| 86 | #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) | 86 | #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) |
| 87 | #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) | 87 | #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) |
| 88 | #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) | 88 | #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) |
| @@ -179,12 +179,76 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
| 179 | /* | 179 | /* |
| 180 | * Huge pte definitions. | 180 | * Huge pte definitions. |
| 181 | */ | 181 | */ |
| 182 | #define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) | 182 | #define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) |
| 183 | #define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) | 183 | #define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) |
| 184 | |||
| 185 | /* | ||
| 186 | * Hugetlb definitions. | ||
| 187 | */ | ||
| 188 | #define HUGE_MAX_HSTATE 2 | ||
| 189 | #define HPAGE_SHIFT PMD_SHIFT | ||
| 190 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) | ||
| 191 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | ||
| 192 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | ||
| 184 | 193 | ||
| 185 | #define __HAVE_ARCH_PTE_SPECIAL | 194 | #define __HAVE_ARCH_PTE_SPECIAL |
| 186 | 195 | ||
| 187 | /* | 196 | /* |
| 197 | * Software PMD bits for THP | ||
| 198 | */ | ||
| 199 | |||
| 200 | #define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) | ||
| 201 | #define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) | ||
| 202 | |||
| 203 | /* | ||
| 204 | * THP definitions. | ||
| 205 | */ | ||
| 206 | #define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) | ||
| 207 | |||
| 208 | #define __HAVE_ARCH_PMD_WRITE | ||
| 209 | #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) | ||
| 210 | |||
| 211 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 212 | #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) | ||
| 213 | #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) | ||
| 214 | #endif | ||
| 215 | |||
| 216 | #define PMD_BIT_FUNC(fn,op) \ | ||
| 217 | static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } | ||
| 218 | |||
| 219 | PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); | ||
| 220 | PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); | ||
| 221 | PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); | ||
| 222 | PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); | ||
| 223 | PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); | ||
| 224 | PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); | ||
| 225 | PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); | ||
| 226 | |||
| 227 | #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) | ||
| 228 | |||
| 229 | #define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) | ||
| 230 | #define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) | ||
| 231 | #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) | ||
| 232 | |||
| 233 | #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) | ||
| 234 | |||
| 235 | static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
| 236 | { | ||
| 237 | const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | | ||
| 238 | PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | | ||
| 239 | PMD_SECT_VALID; | ||
| 240 | pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); | ||
| 241 | return pmd; | ||
| 242 | } | ||
| 243 | |||
| 244 | #define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) | ||
| 245 | |||
| 246 | static inline int has_transparent_hugepage(void) | ||
| 247 | { | ||
| 248 | return 1; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | ||
| 188 | * Mark the prot value as uncacheable and unbufferable. | 252 | * Mark the prot value as uncacheable and unbufferable. |
| 189 | */ | 253 | */ |
| 190 | #define pgprot_noncached(prot) \ | 254 | #define pgprot_noncached(prot) \ |
| @@ -293,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; | |||
| 293 | 357 | ||
| 294 | /* | 358 | /* |
| 295 | * Encode and decode a swap entry: | 359 | * Encode and decode a swap entry: |
| 296 | * bits 0-1: present (must be zero) | 360 | * bits 0, 2: present (must both be zero) |
| 297 | * bit 2: PTE_FILE | 361 | * bit 3: PTE_FILE |
| 298 | * bits 3-8: swap type | 362 | * bits 4-8: swap type |
| 299 | * bits 9-63: swap offset | 363 | * bits 9-63: swap offset |
| 300 | */ | 364 | */ |
| 301 | #define __SWP_TYPE_SHIFT 3 | 365 | #define __SWP_TYPE_SHIFT 4 |
| 302 | #define __SWP_TYPE_BITS 6 | 366 | #define __SWP_TYPE_BITS 6 |
| 303 | #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) | 367 | #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) |
| 304 | #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) | 368 | #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) |
| @@ -318,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; | |||
| 318 | 382 | ||
| 319 | /* | 383 | /* |
| 320 | * Encode and decode a file entry: | 384 | * Encode and decode a file entry: |
| 321 | * bits 0-1: present (must be zero) | 385 | * bits 0, 2: present (must both be zero) |
| 322 | * bit 2: PTE_FILE | 386 | * bit 3: PTE_FILE |
| 323 | * bits 3-63: file offset / PAGE_SIZE | 387 | * bits 4-63: file offset / PAGE_SIZE |
| 324 | */ | 388 | */ |
| 325 | #define pte_file(pte) (pte_val(pte) & PTE_FILE) | 389 | #define pte_file(pte) (pte_val(pte) & PTE_FILE) |
| 326 | #define pte_to_pgoff(x) (pte_val(x) >> 3) | 390 | #define pte_to_pgoff(x) (pte_val(x) >> 4) |
| 327 | #define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) | 391 | #define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) |
| 328 | 392 | ||
| 329 | #define PTE_FILE_MAX_BITS 61 | 393 | #define PTE_FILE_MAX_BITS 60 |
| 330 | 394 | ||
| 331 | extern int kern_addr_valid(unsigned long addr); | 395 | extern int kern_addr_valid(unsigned long addr); |
| 332 | 396 | ||
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..46b3beb4b773 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h | |||
| @@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | |||
| 187 | 187 | ||
| 188 | #define tlb_migrate_finish(mm) do { } while (0) | 188 | #define tlb_migrate_finish(mm) do { } while (0) |
| 189 | 189 | ||
| 190 | static inline void | ||
| 191 | tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) | ||
| 192 | { | ||
| 193 | tlb_add_flush(tlb, addr); | ||
| 194 | } | ||
| 195 | |||
| 190 | #endif | 196 | #endif |
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h | |||
| @@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, | |||
| 117 | dsb(); | 117 | dsb(); |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) | ||
| 121 | |||
| 120 | #endif | 122 | #endif |
| 121 | 123 | ||
| 122 | #endif | 124 | #endif |
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile | |||
| @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ | |||
| 2 | cache.o copypage.o flush.o \ | 2 | cache.o copypage.o flush.o \ |
| 3 | ioremap.o mmap.o pgd.o mmu.o \ | 3 | ioremap.o mmap.o pgd.o mmu.o \ |
| 4 | context.o tlb.o proc.o | 4 | context.o tlb.o proc.o |
| 5 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | ||
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..0ecac8980aae 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c | |||
| @@ -365,17 +365,6 @@ static int __kprobes do_translation_fault(unsigned long addr, | |||
| 365 | } | 365 | } |
| 366 | 366 | ||
| 367 | /* | 367 | /* |
| 368 | * Some section permission faults need to be handled gracefully. They can | ||
| 369 | * happen due to a __{get,put}_user during an oops. | ||
| 370 | */ | ||
| 371 | static int do_sect_fault(unsigned long addr, unsigned int esr, | ||
| 372 | struct pt_regs *regs) | ||
| 373 | { | ||
| 374 | do_bad_area(addr, esr, regs); | ||
| 375 | return 0; | ||
| 376 | } | ||
| 377 | |||
| 378 | /* | ||
| 379 | * This abort handler always returns "fault". | 368 | * This abort handler always returns "fault". |
| 380 | */ | 369 | */ |
| 381 | static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) | 370 | static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) |
| @@ -398,12 +387,12 @@ static struct fault_info { | |||
| 398 | { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, | 387 | { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, |
| 399 | { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, | 388 | { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, |
| 400 | { do_bad, SIGBUS, 0, "reserved access flag fault" }, | 389 | { do_bad, SIGBUS, 0, "reserved access flag fault" }, |
| 401 | { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, | 390 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, |
| 402 | { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, | 391 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, |
| 403 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, | 392 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, |
| 404 | { do_bad, SIGBUS, 0, "reserved permission fault" }, | 393 | { do_bad, SIGBUS, 0, "reserved permission fault" }, |
| 405 | { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, | 394 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, |
| 406 | { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, | 395 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, |
| 407 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, | 396 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, |
| 408 | { do_bad, SIGBUS, 0, "synchronous external abort" }, | 397 | { do_bad, SIGBUS, 0, "synchronous external abort" }, |
| 409 | { do_bad, SIGBUS, 0, "asynchronous external abort" }, | 398 | { do_bad, SIGBUS, 0, "asynchronous external abort" }, |
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | /* | ||
| 2 | * arch/arm64/mm/hugetlbpage.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2013 Linaro Ltd. | ||
| 5 | * | ||
| 6 | * Based on arch/x86/mm/hugetlbpage.c. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2 as | ||
| 10 | * published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/init.h> | ||
| 23 | #include <linux/fs.h> | ||
| 24 | #include <linux/mm.h> | ||
| 25 | #include <linux/hugetlb.h> | ||
| 26 | #include <linux/pagemap.h> | ||
| 27 | #include <linux/err.h> | ||
| 28 | #include <linux/sysctl.h> | ||
| 29 | #include <asm/mman.h> | ||
| 30 | #include <asm/tlb.h> | ||
| 31 | #include <asm/tlbflush.h> | ||
| 32 | #include <asm/pgalloc.h> | ||
| 33 | |||
| 34 | #ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
| 35 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
| 36 | { | ||
| 37 | return 0; | ||
| 38 | } | ||
| 39 | #endif | ||
| 40 | |||
| 41 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
| 42 | int write) | ||
| 43 | { | ||
| 44 | return ERR_PTR(-EINVAL); | ||
| 45 | } | ||
| 46 | |||
| 47 | int pmd_huge(pmd_t pmd) | ||
| 48 | { | ||
| 49 | return !(pmd_val(pmd) & PMD_TABLE_BIT); | ||
| 50 | } | ||
| 51 | |||
| 52 | int pud_huge(pud_t pud) | ||
| 53 | { | ||
| 54 | return !(pud_val(pud) & PUD_TABLE_BIT); | ||
| 55 | } | ||
| 56 | |||
| 57 | static __init int setup_hugepagesz(char *opt) | ||
| 58 | { | ||
| 59 | unsigned long ps = memparse(opt, &opt); | ||
| 60 | if (ps == PMD_SIZE) { | ||
| 61 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
| 62 | } else if (ps == PUD_SIZE) { | ||
| 63 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
| 64 | } else { | ||
| 65 | pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); | ||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | return 1; | ||
| 69 | } | ||
| 70 | __setup("hugepagesz=", setup_hugepagesz); | ||
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 80a369eab637..a8d1059b91b2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c | |||
| @@ -297,6 +297,16 @@ static void __init map_mem(void) | |||
| 297 | { | 297 | { |
| 298 | struct memblock_region *reg; | 298 | struct memblock_region *reg; |
| 299 | 299 | ||
| 300 | /* | ||
| 301 | * Temporarily limit the memblock range. We need to do this as | ||
| 302 | * create_mapping requires puds, pmds and ptes to be allocated from | ||
| 303 | * memory addressable from the initial direct kernel mapping. | ||
| 304 | * | ||
| 305 | * The initial direct kernel mapping, located at swapper_pg_dir, | ||
| 306 | * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). | ||
| 307 | */ | ||
| 308 | memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); | ||
| 309 | |||
| 300 | /* map all the memory banks */ | 310 | /* map all the memory banks */ |
| 301 | for_each_memblock(memory, reg) { | 311 | for_each_memblock(memory, reg) { |
| 302 | phys_addr_t start = reg->base; | 312 | phys_addr_t start = reg->base; |
| @@ -307,6 +317,9 @@ static void __init map_mem(void) | |||
| 307 | 317 | ||
| 308 | create_mapping(start, __phys_to_virt(start), end - start); | 318 | create_mapping(start, __phys_to_virt(start), end - start); |
| 309 | } | 319 | } |
| 320 | |||
| 321 | /* Limit no longer required. */ | ||
| 322 | memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); | ||
| 310 | } | 323 | } |
| 311 | 324 | ||
| 312 | /* | 325 | /* |
| @@ -317,12 +330,6 @@ void __init paging_init(void) | |||
| 317 | { | 330 | { |
| 318 | void *zero_page; | 331 | void *zero_page; |
| 319 | 332 | ||
| 320 | /* | ||
| 321 | * Maximum PGDIR_SIZE addressable via the initial direct kernel | ||
| 322 | * mapping in swapper_pg_dir. | ||
| 323 | */ | ||
| 324 | memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); | ||
| 325 | |||
| 326 | init_mem_pgprot(); | 333 | init_mem_pgprot(); |
| 327 | map_mem(); | 334 | map_mem(); |
| 328 | 335 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 685692c94f05..191c4e34722d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE | |||
| 207 | config ARCH_SUSPEND_POSSIBLE | 207 | config ARCH_SUSPEND_POSSIBLE |
| 208 | def_bool y | 208 | def_bool y |
| 209 | 209 | ||
| 210 | config ARCH_WANT_HUGE_PMD_SHARE | ||
| 211 | def_bool y | ||
| 212 | |||
| 213 | config ARCH_WANT_GENERAL_HUGETLB | ||
| 214 | def_bool y | ||
| 215 | |||
| 210 | config ZONE_DMA32 | 216 | config ZONE_DMA32 |
| 211 | bool | 217 | bool |
| 212 | default X86_64 | 218 | default X86_64 |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
| @@ -16,169 +16,6 @@ | |||
| 16 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
| 17 | #include <asm/pgalloc.h> | 17 | #include <asm/pgalloc.h> |
| 18 | 18 | ||
| 19 | static unsigned long page_table_shareable(struct vm_area_struct *svma, | ||
| 20 | struct vm_area_struct *vma, | ||
| 21 | unsigned long addr, pgoff_t idx) | ||
| 22 | { | ||
| 23 | unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + | ||
| 24 | svma->vm_start; | ||
| 25 | unsigned long sbase = saddr & PUD_MASK; | ||
| 26 | unsigned long s_end = sbase + PUD_SIZE; | ||
| 27 | |||
| 28 | /* Allow segments to share if only one is marked locked */ | ||
| 29 | unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; | ||
| 30 | unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; | ||
| 31 | |||
| 32 | /* | ||
| 33 | * match the virtual addresses, permission and the alignment of the | ||
| 34 | * page table page. | ||
| 35 | */ | ||
| 36 | if (pmd_index(addr) != pmd_index(saddr) || | ||
| 37 | vm_flags != svm_flags || | ||
| 38 | sbase < svma->vm_start || svma->vm_end < s_end) | ||
| 39 | return 0; | ||
| 40 | |||
| 41 | return saddr; | ||
| 42 | } | ||
| 43 | |||
| 44 | static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | ||
| 45 | { | ||
| 46 | unsigned long base = addr & PUD_MASK; | ||
| 47 | unsigned long end = base + PUD_SIZE; | ||
| 48 | |||
| 49 | /* | ||
| 50 | * check on proper vm_flags and page table alignment | ||
| 51 | */ | ||
| 52 | if (vma->vm_flags & VM_MAYSHARE && | ||
| 53 | vma->vm_start <= base && end <= vma->vm_end) | ||
| 54 | return 1; | ||
| 55 | return 0; | ||
| 56 | } | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | ||
| 60 | * and returns the corresponding pte. While this is not necessary for the | ||
| 61 | * !shared pmd case because we can allocate the pmd later as well, it makes the | ||
| 62 | * code much cleaner. pmd allocation is essential for the shared case because | ||
| 63 | * pud has to be populated inside the same i_mmap_mutex section - otherwise | ||
| 64 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a | ||
| 65 | * bad pmd for sharing. | ||
| 66 | */ | ||
| 67 | static pte_t * | ||
| 68 | huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
| 69 | { | ||
| 70 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
| 71 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
| 72 | pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
| 73 | vma->vm_pgoff; | ||
| 74 | struct vm_area_struct *svma; | ||
| 75 | unsigned long saddr; | ||
| 76 | pte_t *spte = NULL; | ||
| 77 | pte_t *pte; | ||
| 78 | |||
| 79 | if (!vma_shareable(vma, addr)) | ||
| 80 | return (pte_t *)pmd_alloc(mm, pud, addr); | ||
| 81 | |||
| 82 | mutex_lock(&mapping->i_mmap_mutex); | ||
| 83 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { | ||
| 84 | if (svma == vma) | ||
| 85 | continue; | ||
| 86 | |||
| 87 | saddr = page_table_shareable(svma, vma, addr, idx); | ||
| 88 | if (saddr) { | ||
| 89 | spte = huge_pte_offset(svma->vm_mm, saddr); | ||
| 90 | if (spte) { | ||
| 91 | get_page(virt_to_page(spte)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | if (!spte) | ||
| 98 | goto out; | ||
| 99 | |||
| 100 | spin_lock(&mm->page_table_lock); | ||
| 101 | if (pud_none(*pud)) | ||
| 102 | pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); | ||
| 103 | else | ||
| 104 | put_page(virt_to_page(spte)); | ||
| 105 | spin_unlock(&mm->page_table_lock); | ||
| 106 | out: | ||
| 107 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
| 108 | mutex_unlock(&mapping->i_mmap_mutex); | ||
| 109 | return pte; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * unmap huge page backed by shared pte. | ||
| 114 | * | ||
| 115 | * Hugetlb pte page is ref counted at the time of mapping. If pte is shared | ||
| 116 | * indicated by page_count > 1, unmap is achieved by clearing pud and | ||
| 117 | * decrementing the ref count. If count == 1, the pte page is not shared. | ||
| 118 | * | ||
| 119 | * called with vma->vm_mm->page_table_lock held. | ||
| 120 | * | ||
| 121 | * returns: 1 successfully unmapped a shared pte page | ||
| 122 | * 0 the underlying pte page is not shared, or it is the last user | ||
| 123 | */ | ||
| 124 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
| 125 | { | ||
| 126 | pgd_t *pgd = pgd_offset(mm, *addr); | ||
| 127 | pud_t *pud = pud_offset(pgd, *addr); | ||
| 128 | |||
| 129 | BUG_ON(page_count(virt_to_page(ptep)) == 0); | ||
| 130 | if (page_count(virt_to_page(ptep)) == 1) | ||
| 131 | return 0; | ||
| 132 | |||
| 133 | pud_clear(pud); | ||
| 134 | put_page(virt_to_page(ptep)); | ||
| 135 | *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; | ||
| 136 | return 1; | ||
| 137 | } | ||
| 138 | |||
| 139 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
| 140 | unsigned long addr, unsigned long sz) | ||
| 141 | { | ||
| 142 | pgd_t *pgd; | ||
| 143 | pud_t *pud; | ||
| 144 | pte_t *pte = NULL; | ||
| 145 | |||
| 146 | pgd = pgd_offset(mm, addr); | ||
| 147 | pud = pud_alloc(mm, pgd, addr); | ||
| 148 | if (pud) { | ||
| 149 | if (sz == PUD_SIZE) { | ||
| 150 | pte = (pte_t *)pud; | ||
| 151 | } else { | ||
| 152 | BUG_ON(sz != PMD_SIZE); | ||
| 153 | if (pud_none(*pud)) | ||
| 154 | pte = huge_pmd_share(mm, addr, pud); | ||
| 155 | else | ||
| 156 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
| 160 | |||
| 161 | return pte; | ||
| 162 | } | ||
| 163 | |||
| 164 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
| 165 | { | ||
| 166 | pgd_t *pgd; | ||
| 167 | pud_t *pud; | ||
| 168 | pmd_t *pmd = NULL; | ||
| 169 | |||
| 170 | pgd = pgd_offset(mm, addr); | ||
| 171 | if (pgd_present(*pgd)) { | ||
| 172 | pud = pud_offset(pgd, addr); | ||
| 173 | if (pud_present(*pud)) { | ||
| 174 | if (pud_large(*pud)) | ||
| 175 | return (pte_t *)pud; | ||
| 176 | pmd = pmd_offset(pud, addr); | ||
| 177 | } | ||
| 178 | } | ||
| 179 | return (pte_t *) pmd; | ||
| 180 | } | ||
| 181 | |||
| 182 | #if 0 /* This is just for testing */ | 19 | #if 0 /* This is just for testing */ |
| 183 | struct page * | 20 | struct page * |
| 184 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 21 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
| @@ -240,30 +77,6 @@ int pud_huge(pud_t pud) | |||
| 240 | return !!(pud_val(pud) & _PAGE_PSE); | 77 | return !!(pud_val(pud) & _PAGE_PSE); |
| 241 | } | 78 | } |
| 242 | 79 | ||
| 243 | struct page * | ||
| 244 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
| 245 | pmd_t *pmd, int write) | ||
| 246 | { | ||
| 247 | struct page *page; | ||
| 248 | |||
| 249 | page = pte_page(*(pte_t *)pmd); | ||
| 250 | if (page) | ||
| 251 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
| 252 | return page; | ||
| 253 | } | ||
| 254 | |||
| 255 | struct page * | ||
| 256 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
| 257 | pud_t *pud, int write) | ||
| 258 | { | ||
| 259 | struct page *page; | ||
| 260 | |||
| 261 | page = pte_page(*(pte_t *)pud); | ||
| 262 | if (page) | ||
| 263 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
| 264 | return page; | ||
| 265 | } | ||
| 266 | |||
| 267 | #endif | 80 | #endif |
| 268 | 81 | ||
| 269 | /* x86_64 also uses this file */ | 82 | /* x86_64 also uses this file */ |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..26ee56c80dc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
| @@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, | |||
| 123 | } while (0) | 123 | } while (0) |
| 124 | extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, | 124 | extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, |
| 125 | pmd_t *pmd); | 125 | pmd_t *pmd); |
| 126 | #if HPAGE_PMD_ORDER > MAX_ORDER | 126 | #if HPAGE_PMD_ORDER >= MAX_ORDER |
| 127 | #error "hugepages can't be allocated by the buddy allocator" | 127 | #error "hugepages can't be allocated by the buddy allocator" |
| 128 | #endif | 128 | #endif |
| 129 | extern int hugepage_madvise(struct vm_area_struct *vma, | 129 | extern int hugepage_madvise(struct vm_area_struct *vma, |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b4890fa57e7..981546ad231c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
| @@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); | |||
| 69 | int dequeue_hwpoisoned_huge_page(struct page *page); | 69 | int dequeue_hwpoisoned_huge_page(struct page *page); |
| 70 | void copy_huge_page(struct page *dst, struct page *src); | 70 | void copy_huge_page(struct page *dst, struct page *src); |
| 71 | 71 | ||
| 72 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
| 73 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); | ||
| 74 | #endif | ||
| 75 | |||
| 72 | extern unsigned long hugepages_treat_as_movable; | 76 | extern unsigned long hugepages_treat_as_movable; |
| 73 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 77 | extern const unsigned long hugetlb_zero, hugetlb_infinity; |
| 74 | extern int sysctl_hugetlb_shm_group; | 78 | extern int sysctl_hugetlb_shm_group; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f8feeeca6686..63217261fd14 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -2931,15 +2931,6 @@ out_mutex: | |||
| 2931 | return ret; | 2931 | return ret; |
| 2932 | } | 2932 | } |
| 2933 | 2933 | ||
| 2934 | /* Can be overriden by architectures */ | ||
| 2935 | __attribute__((weak)) struct page * | ||
| 2936 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
| 2937 | pud_t *pud, int write) | ||
| 2938 | { | ||
| 2939 | BUG(); | ||
| 2940 | return NULL; | ||
| 2941 | } | ||
| 2942 | |||
| 2943 | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2934 | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
| 2944 | struct page **pages, struct vm_area_struct **vmas, | 2935 | struct page **pages, struct vm_area_struct **vmas, |
| 2945 | unsigned long *position, unsigned long *nr_pages, | 2936 | unsigned long *position, unsigned long *nr_pages, |
| @@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
| 3169 | hugetlb_acct_memory(h, -(chg - freed)); | 3160 | hugetlb_acct_memory(h, -(chg - freed)); |
| 3170 | } | 3161 | } |
| 3171 | 3162 | ||
| 3163 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
| 3164 | static unsigned long page_table_shareable(struct vm_area_struct *svma, | ||
| 3165 | struct vm_area_struct *vma, | ||
| 3166 | unsigned long addr, pgoff_t idx) | ||
| 3167 | { | ||
| 3168 | unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + | ||
| 3169 | svma->vm_start; | ||
| 3170 | unsigned long sbase = saddr & PUD_MASK; | ||
| 3171 | unsigned long s_end = sbase + PUD_SIZE; | ||
| 3172 | |||
| 3173 | /* Allow segments to share if only one is marked locked */ | ||
| 3174 | unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; | ||
| 3175 | unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; | ||
| 3176 | |||
| 3177 | /* | ||
| 3178 | * match the virtual addresses, permission and the alignment of the | ||
| 3179 | * page table page. | ||
| 3180 | */ | ||
| 3181 | if (pmd_index(addr) != pmd_index(saddr) || | ||
| 3182 | vm_flags != svm_flags || | ||
| 3183 | sbase < svma->vm_start || svma->vm_end < s_end) | ||
| 3184 | return 0; | ||
| 3185 | |||
| 3186 | return saddr; | ||
| 3187 | } | ||
| 3188 | |||
| 3189 | static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | ||
| 3190 | { | ||
| 3191 | unsigned long base = addr & PUD_MASK; | ||
| 3192 | unsigned long end = base + PUD_SIZE; | ||
| 3193 | |||
| 3194 | /* | ||
| 3195 | * check on proper vm_flags and page table alignment | ||
| 3196 | */ | ||
| 3197 | if (vma->vm_flags & VM_MAYSHARE && | ||
| 3198 | vma->vm_start <= base && end <= vma->vm_end) | ||
| 3199 | return 1; | ||
| 3200 | return 0; | ||
| 3201 | } | ||
| 3202 | |||
| 3203 | /* | ||
| 3204 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | ||
| 3205 | * and returns the corresponding pte. While this is not necessary for the | ||
| 3206 | * !shared pmd case because we can allocate the pmd later as well, it makes the | ||
| 3207 | * code much cleaner. pmd allocation is essential for the shared case because | ||
| 3208 | * pud has to be populated inside the same i_mmap_mutex section - otherwise | ||
| 3209 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a | ||
| 3210 | * bad pmd for sharing. | ||
| 3211 | */ | ||
| 3212 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
| 3213 | { | ||
| 3214 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
| 3215 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
| 3216 | pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
| 3217 | vma->vm_pgoff; | ||
| 3218 | struct vm_area_struct *svma; | ||
| 3219 | unsigned long saddr; | ||
| 3220 | pte_t *spte = NULL; | ||
| 3221 | pte_t *pte; | ||
| 3222 | |||
| 3223 | if (!vma_shareable(vma, addr)) | ||
| 3224 | return (pte_t *)pmd_alloc(mm, pud, addr); | ||
| 3225 | |||
| 3226 | mutex_lock(&mapping->i_mmap_mutex); | ||
| 3227 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { | ||
| 3228 | if (svma == vma) | ||
| 3229 | continue; | ||
| 3230 | |||
| 3231 | saddr = page_table_shareable(svma, vma, addr, idx); | ||
| 3232 | if (saddr) { | ||
| 3233 | spte = huge_pte_offset(svma->vm_mm, saddr); | ||
| 3234 | if (spte) { | ||
| 3235 | get_page(virt_to_page(spte)); | ||
| 3236 | break; | ||
| 3237 | } | ||
| 3238 | } | ||
| 3239 | } | ||
| 3240 | |||
| 3241 | if (!spte) | ||
| 3242 | goto out; | ||
| 3243 | |||
| 3244 | spin_lock(&mm->page_table_lock); | ||
| 3245 | if (pud_none(*pud)) | ||
| 3246 | pud_populate(mm, pud, | ||
| 3247 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); | ||
| 3248 | else | ||
| 3249 | put_page(virt_to_page(spte)); | ||
| 3250 | spin_unlock(&mm->page_table_lock); | ||
| 3251 | out: | ||
| 3252 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
| 3253 | mutex_unlock(&mapping->i_mmap_mutex); | ||
| 3254 | return pte; | ||
| 3255 | } | ||
| 3256 | |||
| 3257 | /* | ||
| 3258 | * unmap huge page backed by shared pte. | ||
| 3259 | * | ||
| 3260 | * Hugetlb pte page is ref counted at the time of mapping. If pte is shared | ||
| 3261 | * indicated by page_count > 1, unmap is achieved by clearing pud and | ||
| 3262 | * decrementing the ref count. If count == 1, the pte page is not shared. | ||
| 3263 | * | ||
| 3264 | * called with vma->vm_mm->page_table_lock held. | ||
| 3265 | * | ||
| 3266 | * returns: 1 successfully unmapped a shared pte page | ||
| 3267 | * 0 the underlying pte page is not shared, or it is the last user | ||
| 3268 | */ | ||
| 3269 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
| 3270 | { | ||
| 3271 | pgd_t *pgd = pgd_offset(mm, *addr); | ||
| 3272 | pud_t *pud = pud_offset(pgd, *addr); | ||
| 3273 | |||
| 3274 | BUG_ON(page_count(virt_to_page(ptep)) == 0); | ||
| 3275 | if (page_count(virt_to_page(ptep)) == 1) | ||
| 3276 | return 0; | ||
| 3277 | |||
| 3278 | pud_clear(pud); | ||
| 3279 | put_page(virt_to_page(ptep)); | ||
| 3280 | *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; | ||
| 3281 | return 1; | ||
| 3282 | } | ||
| 3283 | #define want_pmd_share() (1) | ||
| 3284 | #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | ||
| 3285 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
| 3286 | { | ||
| 3287 | return NULL; | ||
| 3288 | } | ||
| 3289 | #define want_pmd_share() (0) | ||
| 3290 | #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | ||
| 3291 | |||
| 3292 | #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB | ||
| 3293 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
| 3294 | unsigned long addr, unsigned long sz) | ||
| 3295 | { | ||
| 3296 | pgd_t *pgd; | ||
| 3297 | pud_t *pud; | ||
| 3298 | pte_t *pte = NULL; | ||
| 3299 | |||
| 3300 | pgd = pgd_offset(mm, addr); | ||
| 3301 | pud = pud_alloc(mm, pgd, addr); | ||
| 3302 | if (pud) { | ||
| 3303 | if (sz == PUD_SIZE) { | ||
| 3304 | pte = (pte_t *)pud; | ||
| 3305 | } else { | ||
| 3306 | BUG_ON(sz != PMD_SIZE); | ||
| 3307 | if (want_pmd_share() && pud_none(*pud)) | ||
| 3308 | pte = huge_pmd_share(mm, addr, pud); | ||
| 3309 | else | ||
| 3310 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
| 3311 | } | ||
| 3312 | } | ||
| 3313 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
| 3314 | |||
| 3315 | return pte; | ||
| 3316 | } | ||
| 3317 | |||
| 3318 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
| 3319 | { | ||
| 3320 | pgd_t *pgd; | ||
| 3321 | pud_t *pud; | ||
| 3322 | pmd_t *pmd = NULL; | ||
| 3323 | |||
| 3324 | pgd = pgd_offset(mm, addr); | ||
| 3325 | if (pgd_present(*pgd)) { | ||
| 3326 | pud = pud_offset(pgd, addr); | ||
| 3327 | if (pud_present(*pud)) { | ||
| 3328 | if (pud_huge(*pud)) | ||
| 3329 | return (pte_t *)pud; | ||
| 3330 | pmd = pmd_offset(pud, addr); | ||
| 3331 | } | ||
| 3332 | } | ||
| 3333 | return (pte_t *) pmd; | ||
| 3334 | } | ||
| 3335 | |||
| 3336 | struct page * | ||
| 3337 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
| 3338 | pmd_t *pmd, int write) | ||
| 3339 | { | ||
| 3340 | struct page *page; | ||
| 3341 | |||
| 3342 | page = pte_page(*(pte_t *)pmd); | ||
| 3343 | if (page) | ||
| 3344 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
| 3345 | return page; | ||
| 3346 | } | ||
| 3347 | |||
| 3348 | struct page * | ||
| 3349 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
| 3350 | pud_t *pud, int write) | ||
| 3351 | { | ||
| 3352 | struct page *page; | ||
| 3353 | |||
| 3354 | page = pte_page(*(pte_t *)pud); | ||
| 3355 | if (page) | ||
| 3356 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
| 3357 | return page; | ||
| 3358 | } | ||
| 3359 | |||
| 3360 | #else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ | ||
| 3361 | |||
| 3362 | /* Can be overriden by architectures */ | ||
| 3363 | __attribute__((weak)) struct page * | ||
| 3364 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
| 3365 | pud_t *pud, int write) | ||
| 3366 | { | ||
| 3367 | BUG(); | ||
| 3368 | return NULL; | ||
| 3369 | } | ||
| 3370 | |||
| 3371 | #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ | ||
| 3372 | |||
| 3172 | #ifdef CONFIG_MEMORY_FAILURE | 3373 | #ifdef CONFIG_MEMORY_FAILURE |
| 3173 | 3374 | ||
| 3174 | /* Should be called in hugetlb_lock */ | 3375 | /* Should be called in hugetlb_lock */ |
