diff options
-rw-r--r-- | include/linux/mm.h | 3 | ||||
-rw-r--r-- | include/linux/pagemap.h | 1 | ||||
-rw-r--r-- | include/linux/rmap.h | 8 | ||||
-rw-r--r-- | mm/mmap.c | 158 |
4 files changed, 170 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6e695eaab4ce..866a3dbe5c75 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, | |||
1104 | unsigned long addr, unsigned long len, pgoff_t pgoff); | 1104 | unsigned long addr, unsigned long len, pgoff_t pgoff); |
1105 | extern void exit_mmap(struct mm_struct *); | 1105 | extern void exit_mmap(struct mm_struct *); |
1106 | 1106 | ||
1107 | extern int mm_take_all_locks(struct mm_struct *mm); | ||
1108 | extern void mm_drop_all_locks(struct mm_struct *mm); | ||
1109 | |||
1107 | #ifdef CONFIG_PROC_FS | 1110 | #ifdef CONFIG_PROC_FS |
1108 | /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ | 1111 | /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ |
1109 | extern void added_exe_file_vma(struct mm_struct *mm); | 1112 | extern void added_exe_file_vma(struct mm_struct *mm); |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a81d81890422..a39b38ccdc97 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ | 21 | #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ |
22 | #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ | 22 | #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ |
23 | #define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */ | ||
23 | 24 | ||
24 | static inline void mapping_set_error(struct address_space *mapping, int error) | 25 | static inline void mapping_set_error(struct address_space *mapping, int error) |
25 | { | 26 | { |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1383692ac5bd..69407f85e10b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -26,6 +26,14 @@ | |||
26 | */ | 26 | */ |
27 | struct anon_vma { | 27 | struct anon_vma { |
28 | spinlock_t lock; /* Serialize access to vma list */ | 28 | spinlock_t lock; /* Serialize access to vma list */ |
29 | /* | ||
30 | * NOTE: the LSB of the head.next is set by | ||
31 | * mm_take_all_locks() _after_ taking the above lock. So the | ||
32 | * head must only be read/written after taking the above lock | ||
33 | * to be sure to see a valid next pointer. The LSB bit itself | ||
34 | * is serialized by a system wide lock only visible to | ||
35 | * mm_take_all_locks() (mm_all_locks_mutex). | ||
36 | */ | ||
29 | struct list_head head; /* List of private "related" vmas */ | 37 | struct list_head head; /* List of private "related" vmas */ |
30 | }; | 38 | }; |
31 | 39 | ||
@@ -2268,3 +2268,161 @@ int install_special_mapping(struct mm_struct *mm, | |||
2268 | 2268 | ||
2269 | return 0; | 2269 | return 0; |
2270 | } | 2270 | } |
2271 | |||
2272 | static DEFINE_MUTEX(mm_all_locks_mutex); | ||
2273 | |||
2274 | static void vm_lock_anon_vma(struct anon_vma *anon_vma) | ||
2275 | { | ||
2276 | if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
2277 | /* | ||
2278 | * The LSB of head.next can't change from under us | ||
2279 | * because we hold the mm_all_locks_mutex. | ||
2280 | */ | ||
2281 | spin_lock(&anon_vma->lock); | ||
2282 | /* | ||
2283 | * We can safely modify head.next after taking the | ||
2284 | * anon_vma->lock. If some other vma in this mm shares | ||
2285 | * the same anon_vma we won't take it again. | ||
2286 | * | ||
2287 | * No need of atomic instructions here, head.next | ||
2288 | * can't change from under us thanks to the | ||
2289 | * anon_vma->lock. | ||
2290 | */ | ||
2291 | if (__test_and_set_bit(0, (unsigned long *) | ||
2292 | &anon_vma->head.next)) | ||
2293 | BUG(); | ||
2294 | } | ||
2295 | } | ||
2296 | |||
2297 | static void vm_lock_mapping(struct address_space *mapping) | ||
2298 | { | ||
2299 | if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
2300 | /* | ||
2301 | * AS_MM_ALL_LOCKS can't change from under us because | ||
2302 | * we hold the mm_all_locks_mutex. | ||
2303 | * | ||
2304 | * Operations on ->flags have to be atomic because | ||
2305 | * even if AS_MM_ALL_LOCKS is stable thanks to the | ||
2306 | * mm_all_locks_mutex, there may be other cpus | ||
2307 | * changing other bitflags in parallel to us. | ||
2308 | */ | ||
2309 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | ||
2310 | BUG(); | ||
2311 | spin_lock(&mapping->i_mmap_lock); | ||
2312 | } | ||
2313 | } | ||
2314 | |||
2315 | /* | ||
2316 | * This operation locks against the VM for all pte/vma/mm related | ||
2317 | * operations that could ever happen on a certain mm. This includes | ||
2318 | * vmtruncate, try_to_unmap, and all page faults. | ||
2319 | * | ||
2320 | * The caller must take the mmap_sem in write mode before calling | ||
2321 | * mm_take_all_locks(). The caller isn't allowed to release the | ||
2322 | * mmap_sem until mm_drop_all_locks() returns. | ||
2323 | * | ||
2324 | * mmap_sem in write mode is required in order to block all operations | ||
2325 | * that could modify pagetables and free pages without need of | ||
2326 | * altering the vma layout (for example populate_range() with | ||
2327 | * nonlinear vmas). It's also needed in write mode to avoid new | ||
2328 | * anon_vmas to be associated with existing vmas. | ||
2329 | * | ||
2330 | * A single task can't take more than one mm_take_all_locks() in a row | ||
2331 | * or it would deadlock. | ||
2332 | * | ||
2333 | * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in | ||
2334 | * mapping->flags avoid to take the same lock twice, if more than one | ||
2335 | * vma in this mm is backed by the same anon_vma or address_space. | ||
2336 | * | ||
2337 | * We can take all the locks in random order because the VM code | ||
2338 | * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never | ||
2339 | * takes more than one of them in a row. Secondly we're protected | ||
2340 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. | ||
2341 | * | ||
2342 | * mm_take_all_locks() and mm_drop_all_locks are expensive operations | ||
2343 | * that may have to take thousand of locks. | ||
2344 | * | ||
2345 | * mm_take_all_locks() can fail if it's interrupted by signals. | ||
2346 | */ | ||
2347 | int mm_take_all_locks(struct mm_struct *mm) | ||
2348 | { | ||
2349 | struct vm_area_struct *vma; | ||
2350 | int ret = -EINTR; | ||
2351 | |||
2352 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
2353 | |||
2354 | mutex_lock(&mm_all_locks_mutex); | ||
2355 | |||
2356 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2357 | if (signal_pending(current)) | ||
2358 | goto out_unlock; | ||
2359 | if (vma->anon_vma) | ||
2360 | vm_lock_anon_vma(vma->anon_vma); | ||
2361 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
2362 | vm_lock_mapping(vma->vm_file->f_mapping); | ||
2363 | } | ||
2364 | ret = 0; | ||
2365 | |||
2366 | out_unlock: | ||
2367 | if (ret) | ||
2368 | mm_drop_all_locks(mm); | ||
2369 | |||
2370 | return ret; | ||
2371 | } | ||
2372 | |||
2373 | static void vm_unlock_anon_vma(struct anon_vma *anon_vma) | ||
2374 | { | ||
2375 | if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
2376 | /* | ||
2377 | * The LSB of head.next can't change to 0 from under | ||
2378 | * us because we hold the mm_all_locks_mutex. | ||
2379 | * | ||
2380 | * We must however clear the bitflag before unlocking | ||
2381 | * the vma so the users using the anon_vma->head will | ||
2382 | * never see our bitflag. | ||
2383 | * | ||
2384 | * No need of atomic instructions here, head.next | ||
2385 | * can't change from under us until we release the | ||
2386 | * anon_vma->lock. | ||
2387 | */ | ||
2388 | if (!__test_and_clear_bit(0, (unsigned long *) | ||
2389 | &anon_vma->head.next)) | ||
2390 | BUG(); | ||
2391 | spin_unlock(&anon_vma->lock); | ||
2392 | } | ||
2393 | } | ||
2394 | |||
2395 | static void vm_unlock_mapping(struct address_space *mapping) | ||
2396 | { | ||
2397 | if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
2398 | /* | ||
2399 | * AS_MM_ALL_LOCKS can't change to 0 from under us | ||
2400 | * because we hold the mm_all_locks_mutex. | ||
2401 | */ | ||
2402 | spin_unlock(&mapping->i_mmap_lock); | ||
2403 | if (!test_and_clear_bit(AS_MM_ALL_LOCKS, | ||
2404 | &mapping->flags)) | ||
2405 | BUG(); | ||
2406 | } | ||
2407 | } | ||
2408 | |||
2409 | /* | ||
2410 | * The mmap_sem cannot be released by the caller until | ||
2411 | * mm_drop_all_locks() returns. | ||
2412 | */ | ||
2413 | void mm_drop_all_locks(struct mm_struct *mm) | ||
2414 | { | ||
2415 | struct vm_area_struct *vma; | ||
2416 | |||
2417 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
2418 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); | ||
2419 | |||
2420 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2421 | if (vma->anon_vma) | ||
2422 | vm_unlock_anon_vma(vma->anon_vma); | ||
2423 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
2424 | vm_unlock_mapping(vma->vm_file->f_mapping); | ||
2425 | } | ||
2426 | |||
2427 | mutex_unlock(&mm_all_locks_mutex); | ||
2428 | } | ||