diff options
author | Mike Kravetz <mike.kravetz@oracle.com> | 2017-02-22 18:42:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 19:41:28 -0500 |
commit | 60d4d2d2b40e44cd36bfb6049e8d9e2055a24f8a (patch) | |
tree | b622e72fc72555be1f09f5f496d9a54fcb1e2e3c /mm/userfaultfd.c | |
parent | 8fb5debc5fcd450470cdd789c2d80ef95ebb8cf4 (diff) |
userfaultfd: hugetlbfs: add __mcopy_atomic_hugetlb for huge page UFFDIO_COPY
__mcopy_atomic_hugetlb performs the UFFDIO_COPY operation for huge
pages. It is based on the existing __mcopy_atomic routine for normal
pages. Unlike normal pages, there is no huge page support for the
UFFDIO_ZEROPAGE operation.
Link: http://lkml.kernel.org/r/20161216144821.5183-19-aarcange@redhat.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r-- | mm/userfaultfd.c | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 9c2ed70ac78d..ef0495bfd17a 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <linux/swapops.h> | 14 | #include <linux/swapops.h> |
15 | #include <linux/userfaultfd_k.h> | 15 | #include <linux/userfaultfd_k.h> |
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/hugetlb.h> | ||
18 | #include <linux/pagemap.h> | ||
17 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
18 | #include "internal.h" | 20 | #include "internal.h" |
19 | 21 | ||
@@ -139,6 +141,183 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) | |||
139 | return pmd; | 141 | return pmd; |
140 | } | 142 | } |
141 | 143 | ||
144 | #ifdef CONFIG_HUGETLB_PAGE | ||
145 | /* | ||
146 | * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is | ||
147 | * called with mmap_sem held, it will release mmap_sem before returning. | ||
148 | */ | ||
149 | static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, | ||
150 | struct vm_area_struct *dst_vma, | ||
151 | unsigned long dst_start, | ||
152 | unsigned long src_start, | ||
153 | unsigned long len, | ||
154 | bool zeropage) | ||
155 | { | ||
156 | ssize_t err; | ||
157 | pte_t *dst_pte; | ||
158 | unsigned long src_addr, dst_addr; | ||
159 | long copied; | ||
160 | struct page *page; | ||
161 | struct hstate *h; | ||
162 | unsigned long vma_hpagesize; | ||
163 | pgoff_t idx; | ||
164 | u32 hash; | ||
165 | struct address_space *mapping; | ||
166 | |||
167 | /* | ||
168 | * There is no default zero huge page for all huge page sizes as | ||
169 | * supported by hugetlb. A PMD_SIZE huge pages may exist as used | ||
170 | * by THP. Since we can not reliably insert a zero page, this | ||
171 | * feature is not supported. | ||
172 | */ | ||
173 | if (zeropage) { | ||
174 | up_read(&dst_mm->mmap_sem); | ||
175 | return -EINVAL; | ||
176 | } | ||
177 | |||
178 | src_addr = src_start; | ||
179 | dst_addr = dst_start; | ||
180 | copied = 0; | ||
181 | page = NULL; | ||
182 | vma_hpagesize = vma_kernel_pagesize(dst_vma); | ||
183 | |||
184 | /* | ||
185 | * Validate alignment based on huge page size | ||
186 | */ | ||
187 | err = -EINVAL; | ||
188 | if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) | ||
189 | goto out_unlock; | ||
190 | |||
191 | retry: | ||
192 | /* | ||
193 | * On routine entry dst_vma is set. If we had to drop mmap_sem and | ||
194 | * retry, dst_vma will be set to NULL and we must lookup again. | ||
195 | */ | ||
196 | if (!dst_vma) { | ||
197 | err = -EINVAL; | ||
198 | dst_vma = find_vma(dst_mm, dst_start); | ||
199 | if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) | ||
200 | goto out_unlock; | ||
201 | |||
202 | if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) | ||
203 | goto out_unlock; | ||
204 | |||
205 | /* | ||
206 | * Make sure the vma is not shared, that the remaining dst | ||
207 | * range is both valid and fully within a single existing vma. | ||
208 | */ | ||
209 | if (dst_vma->vm_flags & VM_SHARED) | ||
210 | goto out_unlock; | ||
211 | if (dst_start < dst_vma->vm_start || | ||
212 | dst_start + len > dst_vma->vm_end) | ||
213 | goto out_unlock; | ||
214 | } | ||
215 | |||
216 | if (WARN_ON(dst_addr & (vma_hpagesize - 1) || | ||
217 | (len - copied) & (vma_hpagesize - 1))) | ||
218 | goto out_unlock; | ||
219 | |||
220 | /* | ||
221 | * Only allow __mcopy_atomic_hugetlb on userfaultfd registered ranges. | ||
222 | */ | ||
223 | if (!dst_vma->vm_userfaultfd_ctx.ctx) | ||
224 | goto out_unlock; | ||
225 | |||
226 | /* | ||
227 | * Ensure the dst_vma has a anon_vma. | ||
228 | */ | ||
229 | err = -ENOMEM; | ||
230 | if (unlikely(anon_vma_prepare(dst_vma))) | ||
231 | goto out_unlock; | ||
232 | |||
233 | h = hstate_vma(dst_vma); | ||
234 | |||
235 | while (src_addr < src_start + len) { | ||
236 | pte_t dst_pteval; | ||
237 | |||
238 | BUG_ON(dst_addr >= dst_start + len); | ||
239 | VM_BUG_ON(dst_addr & ~huge_page_mask(h)); | ||
240 | |||
241 | /* | ||
242 | * Serialize via hugetlb_fault_mutex | ||
243 | */ | ||
244 | idx = linear_page_index(dst_vma, dst_addr); | ||
245 | mapping = dst_vma->vm_file->f_mapping; | ||
246 | hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, | ||
247 | idx, dst_addr); | ||
248 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | ||
249 | |||
250 | err = -ENOMEM; | ||
251 | dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h)); | ||
252 | if (!dst_pte) { | ||
253 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
254 | goto out_unlock; | ||
255 | } | ||
256 | |||
257 | err = -EEXIST; | ||
258 | dst_pteval = huge_ptep_get(dst_pte); | ||
259 | if (!huge_pte_none(dst_pteval)) { | ||
260 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
261 | goto out_unlock; | ||
262 | } | ||
263 | |||
264 | err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, | ||
265 | dst_addr, src_addr, &page); | ||
266 | |||
267 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
268 | |||
269 | cond_resched(); | ||
270 | |||
271 | if (unlikely(err == -EFAULT)) { | ||
272 | up_read(&dst_mm->mmap_sem); | ||
273 | BUG_ON(!page); | ||
274 | |||
275 | err = copy_huge_page_from_user(page, | ||
276 | (const void __user *)src_addr, | ||
277 | pages_per_huge_page(h)); | ||
278 | if (unlikely(err)) { | ||
279 | err = -EFAULT; | ||
280 | goto out; | ||
281 | } | ||
282 | down_read(&dst_mm->mmap_sem); | ||
283 | |||
284 | dst_vma = NULL; | ||
285 | goto retry; | ||
286 | } else | ||
287 | BUG_ON(page); | ||
288 | |||
289 | if (!err) { | ||
290 | dst_addr += vma_hpagesize; | ||
291 | src_addr += vma_hpagesize; | ||
292 | copied += vma_hpagesize; | ||
293 | |||
294 | if (fatal_signal_pending(current)) | ||
295 | err = -EINTR; | ||
296 | } | ||
297 | if (err) | ||
298 | break; | ||
299 | } | ||
300 | |||
301 | out_unlock: | ||
302 | up_read(&dst_mm->mmap_sem); | ||
303 | out: | ||
304 | if (page) | ||
305 | put_page(page); | ||
306 | BUG_ON(copied < 0); | ||
307 | BUG_ON(err > 0); | ||
308 | BUG_ON(!copied && !err); | ||
309 | return copied ? copied : err; | ||
310 | } | ||
311 | #else /* !CONFIG_HUGETLB_PAGE */ | ||
312 | /* fail at build time if gcc attempts to use this */ | ||
313 | extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, | ||
314 | struct vm_area_struct *dst_vma, | ||
315 | unsigned long dst_start, | ||
316 | unsigned long src_start, | ||
317 | unsigned long len, | ||
318 | bool zeropage); | ||
319 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
320 | |||
142 | static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, | 321 | static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, |
143 | unsigned long dst_start, | 322 | unsigned long dst_start, |
144 | unsigned long src_start, | 323 | unsigned long src_start, |
@@ -182,6 +361,13 @@ retry: | |||
182 | goto out_unlock; | 361 | goto out_unlock; |
183 | 362 | ||
184 | /* | 363 | /* |
364 | * If this is a HUGETLB vma, pass off to appropriate routine | ||
365 | */ | ||
366 | if (is_vm_hugetlb_page(dst_vma)) | ||
367 | return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, | ||
368 | src_start, len, zeropage); | ||
369 | |||
370 | /* | ||
185 | * Be strict and only allow __mcopy_atomic on userfaultfd | 371 | * Be strict and only allow __mcopy_atomic on userfaultfd |
186 | * registered ranges to prevent userland errors going | 372 | * registered ranges to prevent userland errors going |
187 | * unnoticed. As far as the VM consistency is concerned, it | 373 | * unnoticed. As far as the VM consistency is concerned, it |