aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/pagemap.h1
-rw-r--r--include/linux/rmap.h8
-rw-r--r--mm/mmap.c158
4 files changed, 170 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e695eaab4ce..866a3dbe5c75 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
1104 unsigned long addr, unsigned long len, pgoff_t pgoff); 1104 unsigned long addr, unsigned long len, pgoff_t pgoff);
1105extern void exit_mmap(struct mm_struct *); 1105extern void exit_mmap(struct mm_struct *);
1106 1106
1107extern int mm_take_all_locks(struct mm_struct *mm);
1108extern void mm_drop_all_locks(struct mm_struct *mm);
1109
1107#ifdef CONFIG_PROC_FS 1110#ifdef CONFIG_PROC_FS
1108/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ 1111/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
1109extern void added_exe_file_vma(struct mm_struct *mm); 1112extern void added_exe_file_vma(struct mm_struct *mm);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a81d81890422..a39b38ccdc97 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
20 */ 20 */
21#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ 21#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
22#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ 22#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */
23#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */
23 24
24static inline void mapping_set_error(struct address_space *mapping, int error) 25static inline void mapping_set_error(struct address_space *mapping, int error)
25{ 26{
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1383692ac5bd..69407f85e10b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
26 */ 26 */
27struct anon_vma { 27struct anon_vma {
28 spinlock_t lock; /* Serialize access to vma list */ 28 spinlock_t lock; /* Serialize access to vma list */
29 /*
30 * NOTE: the LSB of the head.next is set by
31 * mm_take_all_locks() _after_ taking the above lock. So the
32 * head must only be read/written after taking the above lock
33 * to be sure to see a valid next pointer. The LSB bit itself
34 * is serialized by a system wide lock only visible to
35 * mm_take_all_locks() (mm_all_locks_mutex).
36 */
29 struct list_head head; /* List of private "related" vmas */ 37 struct list_head head; /* List of private "related" vmas */
30}; 38};
31 39
diff --git a/mm/mmap.c b/mm/mmap.c
index 5e0cc99e9cd5..e5f9cb83d6d4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2268,3 +2268,161 @@ int install_special_mapping(struct mm_struct *mm,
2268 2268
2269 return 0; 2269 return 0;
2270} 2270}
2271
2272static DEFINE_MUTEX(mm_all_locks_mutex);
2273
2274static void vm_lock_anon_vma(struct anon_vma *anon_vma)
2275{
2276 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2277 /*
2278 * The LSB of head.next can't change from under us
2279 * because we hold the mm_all_locks_mutex.
2280 */
2281 spin_lock(&anon_vma->lock);
2282 /*
2283 * We can safely modify head.next after taking the
2284 * anon_vma->lock. If some other vma in this mm shares
2285 * the same anon_vma we won't take it again.
2286 *
2287 * No need of atomic instructions here, head.next
2288 * can't change from under us thanks to the
2289 * anon_vma->lock.
2290 */
2291 if (__test_and_set_bit(0, (unsigned long *)
2292 &anon_vma->head.next))
2293 BUG();
2294 }
2295}
2296
2297static void vm_lock_mapping(struct address_space *mapping)
2298{
2299 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2300 /*
2301 * AS_MM_ALL_LOCKS can't change from under us because
2302 * we hold the mm_all_locks_mutex.
2303 *
2304 * Operations on ->flags have to be atomic because
2305 * even if AS_MM_ALL_LOCKS is stable thanks to the
2306 * mm_all_locks_mutex, there may be other cpus
2307 * changing other bitflags in parallel to us.
2308 */
2309 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2310 BUG();
2311 spin_lock(&mapping->i_mmap_lock);
2312 }
2313}
2314
2315/*
2316 * This operation locks against the VM for all pte/vma/mm related
2317 * operations that could ever happen on a certain mm. This includes
2318 * vmtruncate, try_to_unmap, and all page faults.
2319 *
2320 * The caller must take the mmap_sem in write mode before calling
2321 * mm_take_all_locks(). The caller isn't allowed to release the
2322 * mmap_sem until mm_drop_all_locks() returns.
2323 *
2324 * mmap_sem in write mode is required in order to block all operations
2325 * that could modify pagetables and free pages without need of
2326 * altering the vma layout (for example populate_range() with
2327 * nonlinear vmas). It's also needed in write mode to avoid new
2328 * anon_vmas to be associated with existing vmas.
2329 *
2330 * A single task can't take more than one mm_take_all_locks() in a row
2331 * or it would deadlock.
2332 *
2333 * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
2334 * mapping->flags avoid to take the same lock twice, if more than one
2335 * vma in this mm is backed by the same anon_vma or address_space.
2336 *
2337 * We can take all the locks in random order because the VM code
2338 * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
2339 * takes more than one of them in a row. Secondly we're protected
2340 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
2341 *
2342 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
2343 * that may have to take thousand of locks.
2344 *
2345 * mm_take_all_locks() can fail if it's interrupted by signals.
2346 */
2347int mm_take_all_locks(struct mm_struct *mm)
2348{
2349 struct vm_area_struct *vma;
2350 int ret = -EINTR;
2351
2352 BUG_ON(down_read_trylock(&mm->mmap_sem));
2353
2354 mutex_lock(&mm_all_locks_mutex);
2355
2356 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2357 if (signal_pending(current))
2358 goto out_unlock;
2359 if (vma->anon_vma)
2360 vm_lock_anon_vma(vma->anon_vma);
2361 if (vma->vm_file && vma->vm_file->f_mapping)
2362 vm_lock_mapping(vma->vm_file->f_mapping);
2363 }
2364 ret = 0;
2365
2366out_unlock:
2367 if (ret)
2368 mm_drop_all_locks(mm);
2369
2370 return ret;
2371}
2372
2373static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2374{
2375 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2376 /*
2377 * The LSB of head.next can't change to 0 from under
2378 * us because we hold the mm_all_locks_mutex.
2379 *
2380 * We must however clear the bitflag before unlocking
2381 * the vma so the users using the anon_vma->head will
2382 * never see our bitflag.
2383 *
2384 * No need of atomic instructions here, head.next
2385 * can't change from under us until we release the
2386 * anon_vma->lock.
2387 */
2388 if (!__test_and_clear_bit(0, (unsigned long *)
2389 &anon_vma->head.next))
2390 BUG();
2391 spin_unlock(&anon_vma->lock);
2392 }
2393}
2394
2395static void vm_unlock_mapping(struct address_space *mapping)
2396{
2397 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2398 /*
2399 * AS_MM_ALL_LOCKS can't change to 0 from under us
2400 * because we hold the mm_all_locks_mutex.
2401 */
2402 spin_unlock(&mapping->i_mmap_lock);
2403 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2404 &mapping->flags))
2405 BUG();
2406 }
2407}
2408
2409/*
2410 * The mmap_sem cannot be released by the caller until
2411 * mm_drop_all_locks() returns.
2412 */
2413void mm_drop_all_locks(struct mm_struct *mm)
2414{
2415 struct vm_area_struct *vma;
2416
2417 BUG_ON(down_read_trylock(&mm->mmap_sem));
2418 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2419
2420 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2421 if (vma->anon_vma)
2422 vm_unlock_anon_vma(vma->anon_vma);
2423 if (vma->vm_file && vma->vm_file->f_mapping)
2424 vm_unlock_mapping(vma->vm_file->f_mapping);
2425 }
2426
2427 mutex_unlock(&mm_all_locks_mutex);
2428}