aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorSteve Capper <steve.capper@linaro.org>2013-04-23 07:35:02 -0400
committerSteve Capper <steve.capper@linaro.org>2013-06-14 04:33:47 -0400
commit3212b535f200c85b5a67cbfaea18431da71b5c72 (patch)
tree4ae2cd05ea3fc8b7358806cb34fd2ab7b562fc40 /mm/hugetlb.c
parentd683b96b072dc4680fc74964eca77e6a23d1fa6e (diff)
mm: hugetlb: Copy huge_pmd_share from x86 to mm.
Under x86, multiple puds can be made to reference the same bank of huge pmds provided that they represent a full PUD_SIZE of shared huge memory that is aligned to a PUD_SIZE boundary. The code to share pmds does not require any architecture specific knowledge other than the fact that pmds can be indexed, thus can be beneficial to some other architectures. This patch copies the huge pmd sharing (and unsharing) logic from x86/ to mm/ and introduces a new config option to activate it: CONFIG_ARCH_WANTS_HUGE_PMD_SHARE Signed-off-by: Steve Capper <steve.capper@linaro.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c122
1 files changed, 122 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f8feeeca6686..b0bfb292350e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3169,6 +3169,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
3169 hugetlb_acct_memory(h, -(chg - freed)); 3169 hugetlb_acct_memory(h, -(chg - freed));
3170} 3170}
3171 3171
3172#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
3173static unsigned long page_table_shareable(struct vm_area_struct *svma,
3174 struct vm_area_struct *vma,
3175 unsigned long addr, pgoff_t idx)
3176{
3177 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
3178 svma->vm_start;
3179 unsigned long sbase = saddr & PUD_MASK;
3180 unsigned long s_end = sbase + PUD_SIZE;
3181
3182 /* Allow segments to share if only one is marked locked */
3183 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
3184 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
3185
3186 /*
3187 * match the virtual addresses, permission and the alignment of the
3188 * page table page.
3189 */
3190 if (pmd_index(addr) != pmd_index(saddr) ||
3191 vm_flags != svm_flags ||
3192 sbase < svma->vm_start || svma->vm_end < s_end)
3193 return 0;
3194
3195 return saddr;
3196}
3197
3198static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
3199{
3200 unsigned long base = addr & PUD_MASK;
3201 unsigned long end = base + PUD_SIZE;
3202
3203 /*
3204 * check on proper vm_flags and page table alignment
3205 */
3206 if (vma->vm_flags & VM_MAYSHARE &&
3207 vma->vm_start <= base && end <= vma->vm_end)
3208 return 1;
3209 return 0;
3210}
3211
3212/*
3213 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
3214 * and returns the corresponding pte. While this is not necessary for the
3215 * !shared pmd case because we can allocate the pmd later as well, it makes the
3216 * code much cleaner. pmd allocation is essential for the shared case because
3217 * pud has to be populated inside the same i_mmap_mutex section - otherwise
3218 * racing tasks could either miss the sharing (see huge_pte_offset) or select a
3219 * bad pmd for sharing.
3220 */
3221pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3222{
3223 struct vm_area_struct *vma = find_vma(mm, addr);
3224 struct address_space *mapping = vma->vm_file->f_mapping;
3225 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
3226 vma->vm_pgoff;
3227 struct vm_area_struct *svma;
3228 unsigned long saddr;
3229 pte_t *spte = NULL;
3230 pte_t *pte;
3231
3232 if (!vma_shareable(vma, addr))
3233 return (pte_t *)pmd_alloc(mm, pud, addr);
3234
3235 mutex_lock(&mapping->i_mmap_mutex);
3236 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
3237 if (svma == vma)
3238 continue;
3239
3240 saddr = page_table_shareable(svma, vma, addr, idx);
3241 if (saddr) {
3242 spte = huge_pte_offset(svma->vm_mm, saddr);
3243 if (spte) {
3244 get_page(virt_to_page(spte));
3245 break;
3246 }
3247 }
3248 }
3249
3250 if (!spte)
3251 goto out;
3252
3253 spin_lock(&mm->page_table_lock);
3254 if (pud_none(*pud))
3255 pud_populate(mm, pud,
3256 (pmd_t *)((unsigned long)spte & PAGE_MASK));
3257 else
3258 put_page(virt_to_page(spte));
3259 spin_unlock(&mm->page_table_lock);
3260out:
3261 pte = (pte_t *)pmd_alloc(mm, pud, addr);
3262 mutex_unlock(&mapping->i_mmap_mutex);
3263 return pte;
3264}
3265
3266/*
3267 * unmap huge page backed by shared pte.
3268 *
3269 * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
3270 * indicated by page_count > 1, unmap is achieved by clearing pud and
3271 * decrementing the ref count. If count == 1, the pte page is not shared.
3272 *
3273 * called with vma->vm_mm->page_table_lock held.
3274 *
3275 * returns: 1 successfully unmapped a shared pte page
3276 * 0 the underlying pte page is not shared, or it is the last user
3277 */
3278int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
3279{
3280 pgd_t *pgd = pgd_offset(mm, *addr);
3281 pud_t *pud = pud_offset(pgd, *addr);
3282
3283 BUG_ON(page_count(virt_to_page(ptep)) == 0);
3284 if (page_count(virt_to_page(ptep)) == 1)
3285 return 0;
3286
3287 pud_clear(pud);
3288 put_page(virt_to_page(ptep));
3289 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
3290 return 1;
3291}
3292#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
3293
3172#ifdef CONFIG_MEMORY_FAILURE 3294#ifdef CONFIG_MEMORY_FAILURE
3173 3295
3174/* Should be called in hugetlb_lock */ 3296/* Should be called in hugetlb_lock */