summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-08-06 19:15:40 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-08-16 11:02:52 -0400
commit2c7933f53f6bff7656e3324ca1a04e478bdc57c1 (patch)
treec74caf61c703e404f9d0b3f8df7cc1fe4944ee3c
parent70df291bf81ffda47ff84e6e2da4fbe21f95a861 (diff)
mm/mmu_notifiers: add a get/put scheme for the registration
Many places in the kernel have a flow where userspace will create some object and that object will need to connect to the subsystem's mmu_notifier subscription for the duration of its lifetime. In this case the subsystem is usually tracking multiple mm_structs and it is difficult to keep track of what struct mmu_notifier's have been allocated for what mm's. Since this has been open coded in a variety of exciting ways, provide core functionality to do this safely. This approach uses the struct mmu_notifier_ops * as a key to determine if the subsystem has a notifier registered on the mm or not. If there is a registration then the existing notifier struct is returned, otherwise the ops->alloc_notifiers() is used to create a new per-subsystem notifier for the mm. The destroy side incorporates an async call_srcu based destruction which will avoid bugs in the callers such as commit 6d7c3cde93c1 ("mm/hmm: fix use after free with struct hmm in the mmu notifiers"). Since we are inside the mmu notifier core locking is fairly simple, the allocation uses the same approach as for mmu_notifier_mm, the write side of the mmap_sem makes everything deterministic and we only need to do hlist_add_head_rcu() under the mm_take_all_locks(). The new users count and the discoverability in the hlist is fully serialized by the mmu_notifier_mm->lock. Link: https://lore.kernel.org/r/20190806231548.25242-4-jgg@ziepe.ca Co-developed-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Christoph Hellwig <hch@infradead.org> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> Tested-by: Ralph Campbell <rcampbell@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--include/linux/mmu_notifier.h35
-rw-r--r--mm/mmu_notifier.c156
2 files changed, 185 insertions, 6 deletions
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b6c004bd9f6a..31aa971315a1 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -211,6 +211,19 @@ struct mmu_notifier_ops {
211 */ 211 */
212 void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, 212 void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
213 unsigned long start, unsigned long end); 213 unsigned long start, unsigned long end);
214
215 /*
216 * These callbacks are used with the get/put interface to manage the
217 * lifetime of the mmu_notifier memory. alloc_notifier() returns a new
218 * notifier for use with the mm.
219 *
220 * free_notifier() is only called after the mmu_notifier has been
221 * fully put, calls to any ops callback are prevented and no ops
222 * callbacks are currently running. It is called from a SRCU callback
223 * and cannot sleep.
224 */
225 struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm);
226 void (*free_notifier)(struct mmu_notifier *mn);
214}; 227};
215 228
216/* 229/*
@@ -227,6 +240,9 @@ struct mmu_notifier_ops {
227struct mmu_notifier { 240struct mmu_notifier {
228 struct hlist_node hlist; 241 struct hlist_node hlist;
229 const struct mmu_notifier_ops *ops; 242 const struct mmu_notifier_ops *ops;
243 struct mm_struct *mm;
244 struct rcu_head rcu;
245 unsigned int users;
230}; 246};
231 247
232static inline int mm_has_notifiers(struct mm_struct *mm) 248static inline int mm_has_notifiers(struct mm_struct *mm)
@@ -234,6 +250,21 @@ static inline int mm_has_notifiers(struct mm_struct *mm)
234 return unlikely(mm->mmu_notifier_mm); 250 return unlikely(mm->mmu_notifier_mm);
235} 251}
236 252
253struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
254 struct mm_struct *mm);
255static inline struct mmu_notifier *
256mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
257{
258 struct mmu_notifier *ret;
259
260 down_write(&mm->mmap_sem);
261 ret = mmu_notifier_get_locked(ops, mm);
262 up_write(&mm->mmap_sem);
263 return ret;
264}
265void mmu_notifier_put(struct mmu_notifier *mn);
266void mmu_notifier_synchronize(void);
267
237extern int mmu_notifier_register(struct mmu_notifier *mn, 268extern int mmu_notifier_register(struct mmu_notifier *mn,
238 struct mm_struct *mm); 269 struct mm_struct *mm);
239extern int __mmu_notifier_register(struct mmu_notifier *mn, 270extern int __mmu_notifier_register(struct mmu_notifier *mn,
@@ -581,6 +612,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
581#define pudp_huge_clear_flush_notify pudp_huge_clear_flush 612#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
582#define set_pte_at_notify set_pte_at 613#define set_pte_at_notify set_pte_at
583 614
615static inline void mmu_notifier_synchronize(void)
616{
617}
618
584#endif /* CONFIG_MMU_NOTIFIER */ 619#endif /* CONFIG_MMU_NOTIFIER */
585 620
586#endif /* _LINUX_MMU_NOTIFIER_H */ 621#endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 696810f632ad..9e92ec8006fc 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -248,6 +248,9 @@ int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
248 lockdep_assert_held_write(&mm->mmap_sem); 248 lockdep_assert_held_write(&mm->mmap_sem);
249 BUG_ON(atomic_read(&mm->mm_users) <= 0); 249 BUG_ON(atomic_read(&mm->mm_users) <= 0);
250 250
251 mn->mm = mm;
252 mn->users = 1;
253
251 if (!mm->mmu_notifier_mm) { 254 if (!mm->mmu_notifier_mm) {
252 /* 255 /*
253 * kmalloc cannot be called under mm_take_all_locks(), but we 256 * kmalloc cannot be called under mm_take_all_locks(), but we
@@ -295,18 +298,24 @@ out_clean:
295} 298}
296EXPORT_SYMBOL_GPL(__mmu_notifier_register); 299EXPORT_SYMBOL_GPL(__mmu_notifier_register);
297 300
298/* 301/**
302 * mmu_notifier_register - Register a notifier on a mm
303 * @mn: The notifier to attach
304 * @mm: The mm to attach the notifier to
305 *
299 * Must not hold mmap_sem nor any other VM related lock when calling 306 * Must not hold mmap_sem nor any other VM related lock when calling
300 * this registration function. Must also ensure mm_users can't go down 307 * this registration function. Must also ensure mm_users can't go down
301 * to zero while this runs to avoid races with mmu_notifier_release, 308 * to zero while this runs to avoid races with mmu_notifier_release,
302 * so mm has to be current->mm or the mm should be pinned safely such 309 * so mm has to be current->mm or the mm should be pinned safely such
303 * as with get_task_mm(). If the mm is not current->mm, the mm_users 310 * as with get_task_mm(). If the mm is not current->mm, the mm_users
304 * pin should be released by calling mmput after mmu_notifier_register 311 * pin should be released by calling mmput after mmu_notifier_register
305 * returns. mmu_notifier_unregister must be always called to 312 * returns.
306 * unregister the notifier. mm_count is automatically pinned to allow 313 *
307 * mmu_notifier_unregister to safely run at any time later, before or 314 * mmu_notifier_unregister() or mmu_notifier_put() must be always called to
308 * after exit_mmap. ->release will always be called before exit_mmap 315 * unregister the notifier.
309 * frees the pages. 316 *
317 * While the caller has a mmu_notifier get the mn->mm pointer will remain
318 * valid, and can be converted to an active mm pointer via mmget_not_zero().
310 */ 319 */
311int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 320int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
312{ 321{
@@ -319,6 +328,72 @@ int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
319} 328}
320EXPORT_SYMBOL_GPL(mmu_notifier_register); 329EXPORT_SYMBOL_GPL(mmu_notifier_register);
321 330
331static struct mmu_notifier *
332find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
333{
334 struct mmu_notifier *mn;
335
336 spin_lock(&mm->mmu_notifier_mm->lock);
337 hlist_for_each_entry_rcu (mn, &mm->mmu_notifier_mm->list, hlist) {
338 if (mn->ops != ops)
339 continue;
340
341 if (likely(mn->users != UINT_MAX))
342 mn->users++;
343 else
344 mn = ERR_PTR(-EOVERFLOW);
345 spin_unlock(&mm->mmu_notifier_mm->lock);
346 return mn;
347 }
348 spin_unlock(&mm->mmu_notifier_mm->lock);
349 return NULL;
350}
351
352/**
353 * mmu_notifier_get_locked - Return the single struct mmu_notifier for
354 * the mm & ops
355 * @ops: The operations struct being subscribe with
356 * @mm : The mm to attach notifiers too
357 *
358 * This function either allocates a new mmu_notifier via
359 * ops->alloc_notifier(), or returns an already existing notifier on the
360 * list. The value of the ops pointer is used to determine when two notifiers
361 * are the same.
362 *
363 * Each call to mmu_notifier_get() must be paired with a call to
364 * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem.
365 *
366 * While the caller has a mmu_notifier get the mm pointer will remain valid,
367 * and can be converted to an active mm pointer via mmget_not_zero().
368 */
369struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
370 struct mm_struct *mm)
371{
372 struct mmu_notifier *mn;
373 int ret;
374
375 lockdep_assert_held_write(&mm->mmap_sem);
376
377 if (mm->mmu_notifier_mm) {
378 mn = find_get_mmu_notifier(mm, ops);
379 if (mn)
380 return mn;
381 }
382
383 mn = ops->alloc_notifier(mm);
384 if (IS_ERR(mn))
385 return mn;
386 mn->ops = ops;
387 ret = __mmu_notifier_register(mn, mm);
388 if (ret)
389 goto out_free;
390 return mn;
391out_free:
392 mn->ops->free_notifier(mn);
393 return ERR_PTR(ret);
394}
395EXPORT_SYMBOL_GPL(mmu_notifier_get_locked);
396
322/* this is called after the last mmu_notifier_unregister() returned */ 397/* this is called after the last mmu_notifier_unregister() returned */
323void __mmu_notifier_mm_destroy(struct mm_struct *mm) 398void __mmu_notifier_mm_destroy(struct mm_struct *mm)
324{ 399{
@@ -397,6 +472,75 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
397} 472}
398EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); 473EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
399 474
475static void mmu_notifier_free_rcu(struct rcu_head *rcu)
476{
477 struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu);
478 struct mm_struct *mm = mn->mm;
479
480 mn->ops->free_notifier(mn);
481 /* Pairs with the get in __mmu_notifier_register() */
482 mmdrop(mm);
483}
484
485/**
486 * mmu_notifier_put - Release the reference on the notifier
487 * @mn: The notifier to act on
488 *
489 * This function must be paired with each mmu_notifier_get(), it releases the
490 * reference obtained by the get. If this is the last reference then process
491 * to free the notifier will be run asynchronously.
492 *
493 * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release
494 * when the mm_struct is destroyed. Instead free_notifier is always called to
495 * release any resources held by the user.
496 *
497 * As ops->release is not guaranteed to be called, the user must ensure that
498 * all sptes are dropped, and no new sptes can be established before
499 * mmu_notifier_put() is called.
500 *
501 * This function can be called from the ops->release callback, however the
502 * caller must still ensure it is called pairwise with mmu_notifier_get().
503 *
504 * Modules calling this function must call mmu_notifier_synchronize() in
505 * their __exit functions to ensure the async work is completed.
506 */
507void mmu_notifier_put(struct mmu_notifier *mn)
508{
509 struct mm_struct *mm = mn->mm;
510
511 spin_lock(&mm->mmu_notifier_mm->lock);
512 if (WARN_ON(!mn->users) || --mn->users)
513 goto out_unlock;
514 hlist_del_init_rcu(&mn->hlist);
515 spin_unlock(&mm->mmu_notifier_mm->lock);
516
517 call_srcu(&srcu, &mn->rcu, mmu_notifier_free_rcu);
518 return;
519
520out_unlock:
521 spin_unlock(&mm->mmu_notifier_mm->lock);
522}
523EXPORT_SYMBOL_GPL(mmu_notifier_put);
524
525/**
526 * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed
527 *
528 * This function ensures that all outstanding async SRU work from
529 * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops
530 * associated with an unused mmu_notifier will no longer be called.
531 *
532 * Before using the caller must ensure that all of its mmu_notifiers have been
533 * fully released via mmu_notifier_put().
534 *
535 * Modules using the mmu_notifier_put() API should call this in their __exit
536 * function to avoid module unloading races.
537 */
538void mmu_notifier_synchronize(void)
539{
540 synchronize_srcu(&srcu);
541}
542EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
543
400bool 544bool
401mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range) 545mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range)
402{ 546{