diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-03 13:31:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-03 13:31:38 -0400 |
commit | 1873e50028ce87dd9014049c86d71a898fa02166 (patch) | |
tree | 046d37339278c3b88f0c248e9e6ff5fed804fe62 /mm/hugetlb.c | |
parent | fb2af0020a51709ad87ea8055c325d3fbde04158 (diff) | |
parent | aa729dccb5e8dfbc78e2e235b8754d6acccee731 (diff) |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64
Pull ARM64 updates from Catalin Marinas:
"Main features:
- KVM and Xen ports to AArch64
- Hugetlbfs and transparent huge pages support for arm64
- Applied Micro X-Gene Kconfig entry and dts file
- Cache flushing improvements
For arm64 huge pages support, there are x86 changes moving part of
arch/x86/mm/hugetlbpage.c into mm/hugetlb.c to be re-used by arm64"
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64: (66 commits)
arm64: Add initial DTS for APM X-Gene Storm SOC and APM Mustang board
arm64: Add defines for APM ARMv8 implementation
arm64: Enable APM X-Gene SOC family in the defconfig
arm64: Add Kconfig option for APM X-Gene SOC family
arm64/Makefile: provide vdso_install target
ARM64: mm: THP support.
ARM64: mm: Raise MAX_ORDER for 64KB pages and THP.
ARM64: mm: HugeTLB support.
ARM64: mm: Move PTE_PROT_NONE bit.
ARM64: mm: Make PAGE_NONE pages read only and no-execute.
ARM64: mm: Restore memblock limit when map_mem finished.
mm: thp: Correct the HPAGE_PMD_ORDER check.
x86: mm: Remove general hugetlb code from x86.
mm: hugetlb: Copy general hugetlb code from x86 to mm.
x86: mm: Remove x86 version of huge_pmd_share.
mm: hugetlb: Copy huge_pmd_share from x86 to mm.
arm64: KVM: document kernel object mappings in HYP
arm64: KVM: MAINTAINERS update
arm64: KVM: userspace API documentation
arm64: KVM: enable initialization of a 32bit vcpu
...
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 219 |
1 files changed, 210 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5cf99bf8cce2..aed085ad11a8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2948,15 +2948,6 @@ out_mutex: | |||
2948 | return ret; | 2948 | return ret; |
2949 | } | 2949 | } |
2950 | 2950 | ||
2951 | /* Can be overriden by architectures */ | ||
2952 | __attribute__((weak)) struct page * | ||
2953 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
2954 | pud_t *pud, int write) | ||
2955 | { | ||
2956 | BUG(); | ||
2957 | return NULL; | ||
2958 | } | ||
2959 | |||
2960 | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2951 | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
2961 | struct page **pages, struct vm_area_struct **vmas, | 2952 | struct page **pages, struct vm_area_struct **vmas, |
2962 | unsigned long *position, unsigned long *nr_pages, | 2953 | unsigned long *position, unsigned long *nr_pages, |
@@ -3186,6 +3177,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
3186 | hugetlb_acct_memory(h, -(chg - freed)); | 3177 | hugetlb_acct_memory(h, -(chg - freed)); |
3187 | } | 3178 | } |
3188 | 3179 | ||
3180 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | ||
3181 | static unsigned long page_table_shareable(struct vm_area_struct *svma, | ||
3182 | struct vm_area_struct *vma, | ||
3183 | unsigned long addr, pgoff_t idx) | ||
3184 | { | ||
3185 | unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + | ||
3186 | svma->vm_start; | ||
3187 | unsigned long sbase = saddr & PUD_MASK; | ||
3188 | unsigned long s_end = sbase + PUD_SIZE; | ||
3189 | |||
3190 | /* Allow segments to share if only one is marked locked */ | ||
3191 | unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; | ||
3192 | unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; | ||
3193 | |||
3194 | /* | ||
3195 | * match the virtual addresses, permission and the alignment of the | ||
3196 | * page table page. | ||
3197 | */ | ||
3198 | if (pmd_index(addr) != pmd_index(saddr) || | ||
3199 | vm_flags != svm_flags || | ||
3200 | sbase < svma->vm_start || svma->vm_end < s_end) | ||
3201 | return 0; | ||
3202 | |||
3203 | return saddr; | ||
3204 | } | ||
3205 | |||
3206 | static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | ||
3207 | { | ||
3208 | unsigned long base = addr & PUD_MASK; | ||
3209 | unsigned long end = base + PUD_SIZE; | ||
3210 | |||
3211 | /* | ||
3212 | * check on proper vm_flags and page table alignment | ||
3213 | */ | ||
3214 | if (vma->vm_flags & VM_MAYSHARE && | ||
3215 | vma->vm_start <= base && end <= vma->vm_end) | ||
3216 | return 1; | ||
3217 | return 0; | ||
3218 | } | ||
3219 | |||
3220 | /* | ||
3221 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | ||
3222 | * and returns the corresponding pte. While this is not necessary for the | ||
3223 | * !shared pmd case because we can allocate the pmd later as well, it makes the | ||
3224 | * code much cleaner. pmd allocation is essential for the shared case because | ||
3225 | * pud has to be populated inside the same i_mmap_mutex section - otherwise | ||
3226 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a | ||
3227 | * bad pmd for sharing. | ||
3228 | */ | ||
3229 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
3230 | { | ||
3231 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
3232 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
3233 | pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
3234 | vma->vm_pgoff; | ||
3235 | struct vm_area_struct *svma; | ||
3236 | unsigned long saddr; | ||
3237 | pte_t *spte = NULL; | ||
3238 | pte_t *pte; | ||
3239 | |||
3240 | if (!vma_shareable(vma, addr)) | ||
3241 | return (pte_t *)pmd_alloc(mm, pud, addr); | ||
3242 | |||
3243 | mutex_lock(&mapping->i_mmap_mutex); | ||
3244 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { | ||
3245 | if (svma == vma) | ||
3246 | continue; | ||
3247 | |||
3248 | saddr = page_table_shareable(svma, vma, addr, idx); | ||
3249 | if (saddr) { | ||
3250 | spte = huge_pte_offset(svma->vm_mm, saddr); | ||
3251 | if (spte) { | ||
3252 | get_page(virt_to_page(spte)); | ||
3253 | break; | ||
3254 | } | ||
3255 | } | ||
3256 | } | ||
3257 | |||
3258 | if (!spte) | ||
3259 | goto out; | ||
3260 | |||
3261 | spin_lock(&mm->page_table_lock); | ||
3262 | if (pud_none(*pud)) | ||
3263 | pud_populate(mm, pud, | ||
3264 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); | ||
3265 | else | ||
3266 | put_page(virt_to_page(spte)); | ||
3267 | spin_unlock(&mm->page_table_lock); | ||
3268 | out: | ||
3269 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
3270 | mutex_unlock(&mapping->i_mmap_mutex); | ||
3271 | return pte; | ||
3272 | } | ||
3273 | |||
3274 | /* | ||
3275 | * unmap huge page backed by shared pte. | ||
3276 | * | ||
3277 | * Hugetlb pte page is ref counted at the time of mapping. If pte is shared | ||
3278 | * indicated by page_count > 1, unmap is achieved by clearing pud and | ||
3279 | * decrementing the ref count. If count == 1, the pte page is not shared. | ||
3280 | * | ||
3281 | * called with vma->vm_mm->page_table_lock held. | ||
3282 | * | ||
3283 | * returns: 1 successfully unmapped a shared pte page | ||
3284 | * 0 the underlying pte page is not shared, or it is the last user | ||
3285 | */ | ||
3286 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
3287 | { | ||
3288 | pgd_t *pgd = pgd_offset(mm, *addr); | ||
3289 | pud_t *pud = pud_offset(pgd, *addr); | ||
3290 | |||
3291 | BUG_ON(page_count(virt_to_page(ptep)) == 0); | ||
3292 | if (page_count(virt_to_page(ptep)) == 1) | ||
3293 | return 0; | ||
3294 | |||
3295 | pud_clear(pud); | ||
3296 | put_page(virt_to_page(ptep)); | ||
3297 | *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; | ||
3298 | return 1; | ||
3299 | } | ||
3300 | #define want_pmd_share() (1) | ||
3301 | #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | ||
3302 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
3303 | { | ||
3304 | return NULL; | ||
3305 | } | ||
3306 | #define want_pmd_share() (0) | ||
3307 | #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | ||
3308 | |||
3309 | #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB | ||
3310 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
3311 | unsigned long addr, unsigned long sz) | ||
3312 | { | ||
3313 | pgd_t *pgd; | ||
3314 | pud_t *pud; | ||
3315 | pte_t *pte = NULL; | ||
3316 | |||
3317 | pgd = pgd_offset(mm, addr); | ||
3318 | pud = pud_alloc(mm, pgd, addr); | ||
3319 | if (pud) { | ||
3320 | if (sz == PUD_SIZE) { | ||
3321 | pte = (pte_t *)pud; | ||
3322 | } else { | ||
3323 | BUG_ON(sz != PMD_SIZE); | ||
3324 | if (want_pmd_share() && pud_none(*pud)) | ||
3325 | pte = huge_pmd_share(mm, addr, pud); | ||
3326 | else | ||
3327 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | ||
3328 | } | ||
3329 | } | ||
3330 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
3331 | |||
3332 | return pte; | ||
3333 | } | ||
3334 | |||
3335 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
3336 | { | ||
3337 | pgd_t *pgd; | ||
3338 | pud_t *pud; | ||
3339 | pmd_t *pmd = NULL; | ||
3340 | |||
3341 | pgd = pgd_offset(mm, addr); | ||
3342 | if (pgd_present(*pgd)) { | ||
3343 | pud = pud_offset(pgd, addr); | ||
3344 | if (pud_present(*pud)) { | ||
3345 | if (pud_huge(*pud)) | ||
3346 | return (pte_t *)pud; | ||
3347 | pmd = pmd_offset(pud, addr); | ||
3348 | } | ||
3349 | } | ||
3350 | return (pte_t *) pmd; | ||
3351 | } | ||
3352 | |||
3353 | struct page * | ||
3354 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
3355 | pmd_t *pmd, int write) | ||
3356 | { | ||
3357 | struct page *page; | ||
3358 | |||
3359 | page = pte_page(*(pte_t *)pmd); | ||
3360 | if (page) | ||
3361 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
3362 | return page; | ||
3363 | } | ||
3364 | |||
3365 | struct page * | ||
3366 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
3367 | pud_t *pud, int write) | ||
3368 | { | ||
3369 | struct page *page; | ||
3370 | |||
3371 | page = pte_page(*(pte_t *)pud); | ||
3372 | if (page) | ||
3373 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
3374 | return page; | ||
3375 | } | ||
3376 | |||
3377 | #else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ | ||
3378 | |||
3379 | /* Can be overriden by architectures */ | ||
3380 | __attribute__((weak)) struct page * | ||
3381 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
3382 | pud_t *pud, int write) | ||
3383 | { | ||
3384 | BUG(); | ||
3385 | return NULL; | ||
3386 | } | ||
3387 | |||
3388 | #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ | ||
3389 | |||
3189 | #ifdef CONFIG_MEMORY_FAILURE | 3390 | #ifdef CONFIG_MEMORY_FAILURE |
3190 | 3391 | ||
3191 | /* Should be called in hugetlb_lock */ | 3392 | /* Should be called in hugetlb_lock */ |