mm: hugetlb: Copy huge_pmd_share from x86 to mm.

Under x86, multiple puds can be made to reference the same bank of huge pmds provided that they represent a full PUD_SIZE of shared huge memory that is aligned to a PUD_SIZE boundary. The code to share pmds does not require any architecture specific knowledge other than the fact that pmds can be indexed, thus can be beneficial to some other architectures. This patch copies the huge pmd sharing (and unsharing) logic from x86/ to mm/ and introduces a new config option to activate it: CONFIG_ARCH_WANTS_HUGE_PMD_SHARE Signed-off-by: Steve Capper <steve.capper@linaro.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org>
author: Steve Capper <steve.capper@linaro.org> 2013-04-23 07:35:02 -0400
committer: Steve Capper <steve.capper@linaro.org> 2013-06-14 04:33:47 -0400
commit: 3212b535f200c85b5a67cbfaea18431da71b5c72 (patch)
tree: 4ae2cd05ea3fc8b7358806cb34fd2ab7b562fc40 /mm/hugetlb.c
parent: d683b96b072dc4680fc74964eca77e6a23d1fa6e (diff)
1 files changed, 122 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f8feeeca6686..b0bfb292350e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3169,6 +3169,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
        hugetlb_acct_memory(h, -(chg - freed));
 }
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static unsigned long page_table_shareable(struct vm_area_struct *svma,
+                                struct vm_area_struct *vma,
+                                unsigned long addr, pgoff_t idx)
+{
+        unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
+                                svma->vm_start;
+        unsigned long sbase = saddr & PUD_MASK;
+        unsigned long s_end = sbase + PUD_SIZE;
+        /* Allow segments to share if only one is marked locked */
+        unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
+        unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+        /*
+         * match the virtual addresses, permission and the alignment of the
+         * page table page.
+         */
+        if (pmd_index(addr) != pmd_index(saddr) ||
+            vm_flags != svm_flags ||
+            sbase < svma->vm_start || svma->vm_end < s_end)
+                return 0;
+        return saddr;
+}
+static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+{
+        unsigned long base = addr & PUD_MASK;
+        unsigned long end = base + PUD_SIZE;
+        /*
+         * check on proper vm_flags and page table alignment
+         */
+        if (vma->vm_flags & VM_MAYSHARE &&
+            vma->vm_start <= base && end <= vma->vm_end)
+                return 1;
+        return 0;
+}
+/*
+ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
+ * and returns the corresponding pte. While this is not necessary for the
+ * !shared pmd case because we can allocate the pmd later as well, it makes the
+ * code much cleaner. pmd allocation is essential for the shared case because
+ * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
+ * bad pmd for sharing.
+ */
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+{
+        struct vm_area_struct *vma = find_vma(mm, addr);
+        struct address_space *mapping = vma->vm_file->f_mapping;
+        pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+                        vma->vm_pgoff;
+        struct vm_area_struct *svma;
+        unsigned long saddr;
+        pte_t *spte = NULL;
+        pte_t *pte;
+        if (!vma_shareable(vma, addr))
+                return (pte_t *)pmd_alloc(mm, pud, addr);
+        mutex_lock(&mapping->i_mmap_mutex);
+        vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
+                if (svma == vma)
+                        continue;
+                saddr = page_table_shareable(svma, vma, addr, idx);
+                if (saddr) {
+                        spte = huge_pte_offset(svma->vm_mm, saddr);
+                        if (spte) {
+                                get_page(virt_to_page(spte));
+                                break;
+                        }
+                }
+        }
+        if (!spte)
+                goto out;
+        spin_lock(&mm->page_table_lock);
+        if (pud_none(*pud))
+                pud_populate(mm, pud,
+                                (pmd_t *)((unsigned long)spte & PAGE_MASK));
+        else
+                put_page(virt_to_page(spte));
+        spin_unlock(&mm->page_table_lock);
+out:
+        pte = (pte_t *)pmd_alloc(mm, pud, addr);
+        mutex_unlock(&mapping->i_mmap_mutex);
+        return pte;
+}
+/*
+ * unmap huge page backed by shared pte.
+ *
+ * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
+ * indicated by page_count > 1, unmap is achieved by clearing pud and
+ * decrementing the ref count. If count == 1, the pte page is not shared.
+ *
+ * called with vma->vm_mm->page_table_lock held.
+ *
+ * returns: 1 successfully unmapped a shared pte page
+ *          0 the underlying pte page is not shared, or it is the last user
+ */
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+        pgd_t *pgd = pgd_offset(mm, *addr);
+        pud_t *pud = pud_offset(pgd, *addr);
+        BUG_ON(page_count(virt_to_page(ptep)) == 0);
+        if (page_count(virt_to_page(ptep)) == 1)
+                return 0;
+        pud_clear(pud);
+        put_page(virt_to_page(ptep));
+        *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
+        return 1;
+}
+#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
 #ifdef CONFIG_MEMORY_FAILURE
 /* Should be called in hugetlb_lock */
author	Steve Capper <steve.capper@linaro.org>	2013-04-23 07:35:02 -0400
committer	Steve Capper <steve.capper@linaro.org>	2013-06-14 04:33:47 -0400
commit	3212b535f200c85b5a67cbfaea18431da71b5c72 (patch)
tree	4ae2cd05ea3fc8b7358806cb34fd2ab7b562fc40 /mm/hugetlb.c
parent	d683b96b072dc4680fc74964eca77e6a23d1fa6e (diff)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f8feeeca6686..b0bfb292350e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c
@@ -3169,6 +3169,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
3169	hugetlb_acct_memory(h, -(chg - freed));	3169	hugetlb_acct_memory(h, -(chg - freed));
3170	}	3170	}
3171		3171
		3172	#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
		3173	static unsigned long page_table_shareable(struct vm_area_struct *svma,
		3174	struct vm_area_struct *vma,
		3175	unsigned long addr, pgoff_t idx)
		3176	{
		3177	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
		3178	svma->vm_start;
		3179	unsigned long sbase = saddr & PUD_MASK;
		3180	unsigned long s_end = sbase + PUD_SIZE;
		3181
		3182	/* Allow segments to share if only one is marked locked */
		3183	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
		3184	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
		3185
		3186	/*
		3187	* match the virtual addresses, permission and the alignment of the
		3188	* page table page.
		3189	*/
		3190	if (pmd_index(addr) != pmd_index(saddr) \|\|
		3191	vm_flags != svm_flags \|\|
		3192	sbase < svma->vm_start \|\| svma->vm_end < s_end)
		3193	return 0;
		3194
		3195	return saddr;
		3196	}
		3197
		3198	static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
		3199	{
		3200	unsigned long base = addr & PUD_MASK;
		3201	unsigned long end = base + PUD_SIZE;
		3202
		3203	/*
		3204	* check on proper vm_flags and page table alignment
		3205	*/
		3206	if (vma->vm_flags & VM_MAYSHARE &&
		3207	vma->vm_start <= base && end <= vma->vm_end)
		3208	return 1;
		3209	return 0;
		3210	}
		3211
		3212	/*
		3213	* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
		3214	* and returns the corresponding pte. While this is not necessary for the
		3215	* !shared pmd case because we can allocate the pmd later as well, it makes the
		3216	* code much cleaner. pmd allocation is essential for the shared case because
		3217	* pud has to be populated inside the same i_mmap_mutex section - otherwise
		3218	* racing tasks could either miss the sharing (see huge_pte_offset) or select a
		3219	* bad pmd for sharing.
		3220	*/
		3221	pte_t huge_pmd_share(struct mm_struct mm, unsigned long addr, pud_t *pud)
		3222	{
		3223	struct vm_area_struct *vma = find_vma(mm, addr);
		3224	struct address_space *mapping = vma->vm_file->f_mapping;
		3225	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
		3226	vma->vm_pgoff;
		3227	struct vm_area_struct *svma;
		3228	unsigned long saddr;
		3229	pte_t *spte = NULL;
		3230	pte_t *pte;
		3231
		3232	if (!vma_shareable(vma, addr))
		3233	return (pte_t *)pmd_alloc(mm, pud, addr);
		3234
		3235	mutex_lock(&mapping->i_mmap_mutex);
		3236	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
		3237	if (svma == vma)
		3238	continue;
		3239
		3240	saddr = page_table_shareable(svma, vma, addr, idx);
		3241	if (saddr) {
		3242	spte = huge_pte_offset(svma->vm_mm, saddr);
		3243	if (spte) {
		3244	get_page(virt_to_page(spte));
		3245	break;
		3246	}
		3247	}
		3248	}
		3249
		3250	if (!spte)
		3251	goto out;
		3252
		3253	spin_lock(&mm->page_table_lock);
		3254	if (pud_none(*pud))
		3255	pud_populate(mm, pud,
		3256	(pmd_t *)((unsigned long)spte & PAGE_MASK));
		3257	else
		3258	put_page(virt_to_page(spte));
		3259	spin_unlock(&mm->page_table_lock);
		3260	out:
		3261	pte = (pte_t *)pmd_alloc(mm, pud, addr);
		3262	mutex_unlock(&mapping->i_mmap_mutex);
		3263	return pte;
		3264	}
		3265
		3266	/*
		3267	* unmap huge page backed by shared pte.
		3268	*
		3269	* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
		3270	* indicated by page_count > 1, unmap is achieved by clearing pud and
		3271	* decrementing the ref count. If count == 1, the pte page is not shared.
		3272	*
		3273	* called with vma->vm_mm->page_table_lock held.
		3274	*
		3275	* returns: 1 successfully unmapped a shared pte page
		3276	* 0 the underlying pte page is not shared, or it is the last user
		3277	*/
		3278	int huge_pmd_unshare(struct mm_struct mm, unsigned long addr, pte_t *ptep)
		3279	{
		3280	pgd_t pgd = pgd_offset(mm, addr);
		3281	pud_t pud = pud_offset(pgd, addr);
		3282
		3283	BUG_ON(page_count(virt_to_page(ptep)) == 0);
		3284	if (page_count(virt_to_page(ptep)) == 1)
		3285	return 0;
		3286
		3287	pud_clear(pud);
		3288	put_page(virt_to_page(ptep));
		3289	addr = ALIGN(addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
		3290	return 1;
		3291	}
		3292	#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
		3293
3172	#ifdef CONFIG_MEMORY_FAILURE	3294	#ifdef CONFIG_MEMORY_FAILURE
3173		3295
3174	/* Should be called in hugetlb_lock */	3296	/* Should be called in hugetlb_lock */