aboutsummaryrefslogtreecommitdiffstats
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2017-02-22 18:42:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:28 -0500
commit60d4d2d2b40e44cd36bfb6049e8d9e2055a24f8a (patch)
treeb622e72fc72555be1f09f5f496d9a54fcb1e2e3c /mm/userfaultfd.c
parent8fb5debc5fcd450470cdd789c2d80ef95ebb8cf4 (diff)
userfaultfd: hugetlbfs: add __mcopy_atomic_hugetlb for huge page UFFDIO_COPY
__mcopy_atomic_hugetlb performs the UFFDIO_COPY operation for huge pages. It is based on the existing __mcopy_atomic routine for normal pages. Unlike normal pages, there is no huge page support for the UFFDIO_ZEROPAGE operation. Link: http://lkml.kernel.org/r/20161216144821.5183-19-aarcange@redhat.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Michael Rapoport <RAPOPORT@il.ibm.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c186
1 files changed, 186 insertions, 0 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9c2ed70ac78d..ef0495bfd17a 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -14,6 +14,8 @@
14#include <linux/swapops.h> 14#include <linux/swapops.h>
15#include <linux/userfaultfd_k.h> 15#include <linux/userfaultfd_k.h>
16#include <linux/mmu_notifier.h> 16#include <linux/mmu_notifier.h>
17#include <linux/hugetlb.h>
18#include <linux/pagemap.h>
17#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
18#include "internal.h" 20#include "internal.h"
19 21
@@ -139,6 +141,183 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
139 return pmd; 141 return pmd;
140} 142}
141 143
144#ifdef CONFIG_HUGETLB_PAGE
145/*
146 * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is
147 * called with mmap_sem held, it will release mmap_sem before returning.
148 */
149static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
150 struct vm_area_struct *dst_vma,
151 unsigned long dst_start,
152 unsigned long src_start,
153 unsigned long len,
154 bool zeropage)
155{
156 ssize_t err;
157 pte_t *dst_pte;
158 unsigned long src_addr, dst_addr;
159 long copied;
160 struct page *page;
161 struct hstate *h;
162 unsigned long vma_hpagesize;
163 pgoff_t idx;
164 u32 hash;
165 struct address_space *mapping;
166
167 /*
168 * There is no default zero huge page for all huge page sizes as
169 * supported by hugetlb. A PMD_SIZE huge pages may exist as used
170 * by THP. Since we can not reliably insert a zero page, this
171 * feature is not supported.
172 */
173 if (zeropage) {
174 up_read(&dst_mm->mmap_sem);
175 return -EINVAL;
176 }
177
178 src_addr = src_start;
179 dst_addr = dst_start;
180 copied = 0;
181 page = NULL;
182 vma_hpagesize = vma_kernel_pagesize(dst_vma);
183
184 /*
185 * Validate alignment based on huge page size
186 */
187 err = -EINVAL;
188 if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
189 goto out_unlock;
190
191retry:
192 /*
193 * On routine entry dst_vma is set. If we had to drop mmap_sem and
194 * retry, dst_vma will be set to NULL and we must lookup again.
195 */
196 if (!dst_vma) {
197 err = -EINVAL;
198 dst_vma = find_vma(dst_mm, dst_start);
199 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
200 goto out_unlock;
201
202 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
203 goto out_unlock;
204
205 /*
206 * Make sure the vma is not shared, that the remaining dst
207 * range is both valid and fully within a single existing vma.
208 */
209 if (dst_vma->vm_flags & VM_SHARED)
210 goto out_unlock;
211 if (dst_start < dst_vma->vm_start ||
212 dst_start + len > dst_vma->vm_end)
213 goto out_unlock;
214 }
215
216 if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
217 (len - copied) & (vma_hpagesize - 1)))
218 goto out_unlock;
219
220 /*
221 * Only allow __mcopy_atomic_hugetlb on userfaultfd registered ranges.
222 */
223 if (!dst_vma->vm_userfaultfd_ctx.ctx)
224 goto out_unlock;
225
226 /*
227 * Ensure the dst_vma has a anon_vma.
228 */
229 err = -ENOMEM;
230 if (unlikely(anon_vma_prepare(dst_vma)))
231 goto out_unlock;
232
233 h = hstate_vma(dst_vma);
234
235 while (src_addr < src_start + len) {
236 pte_t dst_pteval;
237
238 BUG_ON(dst_addr >= dst_start + len);
239 VM_BUG_ON(dst_addr & ~huge_page_mask(h));
240
241 /*
242 * Serialize via hugetlb_fault_mutex
243 */
244 idx = linear_page_index(dst_vma, dst_addr);
245 mapping = dst_vma->vm_file->f_mapping;
246 hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping,
247 idx, dst_addr);
248 mutex_lock(&hugetlb_fault_mutex_table[hash]);
249
250 err = -ENOMEM;
251 dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
252 if (!dst_pte) {
253 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
254 goto out_unlock;
255 }
256
257 err = -EEXIST;
258 dst_pteval = huge_ptep_get(dst_pte);
259 if (!huge_pte_none(dst_pteval)) {
260 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
261 goto out_unlock;
262 }
263
264 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
265 dst_addr, src_addr, &page);
266
267 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
268
269 cond_resched();
270
271 if (unlikely(err == -EFAULT)) {
272 up_read(&dst_mm->mmap_sem);
273 BUG_ON(!page);
274
275 err = copy_huge_page_from_user(page,
276 (const void __user *)src_addr,
277 pages_per_huge_page(h));
278 if (unlikely(err)) {
279 err = -EFAULT;
280 goto out;
281 }
282 down_read(&dst_mm->mmap_sem);
283
284 dst_vma = NULL;
285 goto retry;
286 } else
287 BUG_ON(page);
288
289 if (!err) {
290 dst_addr += vma_hpagesize;
291 src_addr += vma_hpagesize;
292 copied += vma_hpagesize;
293
294 if (fatal_signal_pending(current))
295 err = -EINTR;
296 }
297 if (err)
298 break;
299 }
300
301out_unlock:
302 up_read(&dst_mm->mmap_sem);
303out:
304 if (page)
305 put_page(page);
306 BUG_ON(copied < 0);
307 BUG_ON(err > 0);
308 BUG_ON(!copied && !err);
309 return copied ? copied : err;
310}
311#else /* !CONFIG_HUGETLB_PAGE */
312/* fail at build time if gcc attempts to use this */
313extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
314 struct vm_area_struct *dst_vma,
315 unsigned long dst_start,
316 unsigned long src_start,
317 unsigned long len,
318 bool zeropage);
319#endif /* CONFIG_HUGETLB_PAGE */
320
142static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, 321static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
143 unsigned long dst_start, 322 unsigned long dst_start,
144 unsigned long src_start, 323 unsigned long src_start,
@@ -182,6 +361,13 @@ retry:
182 goto out_unlock; 361 goto out_unlock;
183 362
184 /* 363 /*
364 * If this is a HUGETLB vma, pass off to appropriate routine
365 */
366 if (is_vm_hugetlb_page(dst_vma))
367 return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
368 src_start, len, zeropage);
369
370 /*
185 * Be strict and only allow __mcopy_atomic on userfaultfd 371 * Be strict and only allow __mcopy_atomic on userfaultfd
186 * registered ranges to prevent userland errors going 372 * registered ranges to prevent userland errors going
187 * unnoticed. As far as the VM consistency is concerned, it 373 * unnoticed. As far as the VM consistency is concerned, it