diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 16:32:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 16:32:19 -0400 |
commit | 3a990a52f9f25f45469e272017a31e7a3fda60ed (patch) | |
tree | 366f639d9ce1e907b65caa72bc098df6c4b5a240 | |
parent | 3556485f1595e3964ba539e39ea682acbb835cee (diff) | |
parent | f5cc4eef9987d0b517364d01e290d6438e47ee5d (diff) |
Merge branch 'vm' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull munmap/truncate race fixes from Al Viro:
"Fixes for racy use of unmap_vmas() on truncate-related codepaths"
* 'vm' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
VM: make zap_page_range() callers that act on a single VMA use separate helper
VM: make unmap_vmas() return void
VM: don't bother with feeding upper limit to tlb_finish_mmu() in exit_mmap()
VM: make zap_page_range() return void
VM: can't go through the inner loop in unmap_vmas() more than once...
VM: unmap_page_range() can return void
-rw-r--r-- | include/linux/mm.h | 4 | ||||
-rw-r--r-- | mm/memory.c | 133 | ||||
-rw-r--r-- | mm/mmap.c | 5 |
3 files changed, 84 insertions, 58 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 17b27cd269c4..b5bb54d6d667 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -893,9 +893,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
893 | 893 | ||
894 | int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, | 894 | int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, |
895 | unsigned long size); | 895 | unsigned long size); |
896 | unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | 896 | void zap_page_range(struct vm_area_struct *vma, unsigned long address, |
897 | unsigned long size, struct zap_details *); | 897 | unsigned long size, struct zap_details *); |
898 | unsigned long unmap_vmas(struct mmu_gather *tlb, | 898 | void unmap_vmas(struct mmu_gather *tlb, |
899 | struct vm_area_struct *start_vma, unsigned long start_addr, | 899 | struct vm_area_struct *start_vma, unsigned long start_addr, |
900 | unsigned long end_addr, unsigned long *nr_accounted, | 900 | unsigned long end_addr, unsigned long *nr_accounted, |
901 | struct zap_details *); | 901 | struct zap_details *); |
diff --git a/mm/memory.c b/mm/memory.c index 347e5fad1cfa..8438c157e4d9 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1282,10 +1282,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, | |||
1282 | return addr; | 1282 | return addr; |
1283 | } | 1283 | } |
1284 | 1284 | ||
1285 | static unsigned long unmap_page_range(struct mmu_gather *tlb, | 1285 | static void unmap_page_range(struct mmu_gather *tlb, |
1286 | struct vm_area_struct *vma, | 1286 | struct vm_area_struct *vma, |
1287 | unsigned long addr, unsigned long end, | 1287 | unsigned long addr, unsigned long end, |
1288 | struct zap_details *details) | 1288 | struct zap_details *details) |
1289 | { | 1289 | { |
1290 | pgd_t *pgd; | 1290 | pgd_t *pgd; |
1291 | unsigned long next; | 1291 | unsigned long next; |
@@ -1305,8 +1305,47 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
1305 | } while (pgd++, addr = next, addr != end); | 1305 | } while (pgd++, addr = next, addr != end); |
1306 | tlb_end_vma(tlb, vma); | 1306 | tlb_end_vma(tlb, vma); |
1307 | mem_cgroup_uncharge_end(); | 1307 | mem_cgroup_uncharge_end(); |
1308 | } | ||
1308 | 1309 | ||
1309 | return addr; | 1310 | |
1311 | static void unmap_single_vma(struct mmu_gather *tlb, | ||
1312 | struct vm_area_struct *vma, unsigned long start_addr, | ||
1313 | unsigned long end_addr, unsigned long *nr_accounted, | ||
1314 | struct zap_details *details) | ||
1315 | { | ||
1316 | unsigned long start = max(vma->vm_start, start_addr); | ||
1317 | unsigned long end; | ||
1318 | |||
1319 | if (start >= vma->vm_end) | ||
1320 | return; | ||
1321 | end = min(vma->vm_end, end_addr); | ||
1322 | if (end <= vma->vm_start) | ||
1323 | return; | ||
1324 | |||
1325 | if (vma->vm_flags & VM_ACCOUNT) | ||
1326 | *nr_accounted += (end - start) >> PAGE_SHIFT; | ||
1327 | |||
1328 | if (unlikely(is_pfn_mapping(vma))) | ||
1329 | untrack_pfn_vma(vma, 0, 0); | ||
1330 | |||
1331 | if (start != end) { | ||
1332 | if (unlikely(is_vm_hugetlb_page(vma))) { | ||
1333 | /* | ||
1334 | * It is undesirable to test vma->vm_file as it | ||
1335 | * should be non-null for valid hugetlb area. | ||
1336 | * However, vm_file will be NULL in the error | ||
1337 | * cleanup path of do_mmap_pgoff. When | ||
1338 | * hugetlbfs ->mmap method fails, | ||
1339 | * do_mmap_pgoff() nullifies vma->vm_file | ||
1340 | * before calling this function to clean up. | ||
1341 | * Since no pte has actually been setup, it is | ||
1342 | * safe to do nothing in this case. | ||
1343 | */ | ||
1344 | if (vma->vm_file) | ||
1345 | unmap_hugepage_range(vma, start, end, NULL); | ||
1346 | } else | ||
1347 | unmap_page_range(tlb, vma, start, end, details); | ||
1348 | } | ||
1310 | } | 1349 | } |
1311 | 1350 | ||
1312 | /** | 1351 | /** |
@@ -1318,8 +1357,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
1318 | * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here | 1357 | * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here |
1319 | * @details: details of nonlinear truncation or shared cache invalidation | 1358 | * @details: details of nonlinear truncation or shared cache invalidation |
1320 | * | 1359 | * |
1321 | * Returns the end address of the unmapping (restart addr if interrupted). | ||
1322 | * | ||
1323 | * Unmap all pages in the vma list. | 1360 | * Unmap all pages in the vma list. |
1324 | * | 1361 | * |
1325 | * Only addresses between `start' and `end' will be unmapped. | 1362 | * Only addresses between `start' and `end' will be unmapped. |
@@ -1331,55 +1368,18 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
1331 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() | 1368 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() |
1332 | * drops the lock and schedules. | 1369 | * drops the lock and schedules. |
1333 | */ | 1370 | */ |
1334 | unsigned long unmap_vmas(struct mmu_gather *tlb, | 1371 | void unmap_vmas(struct mmu_gather *tlb, |
1335 | struct vm_area_struct *vma, unsigned long start_addr, | 1372 | struct vm_area_struct *vma, unsigned long start_addr, |
1336 | unsigned long end_addr, unsigned long *nr_accounted, | 1373 | unsigned long end_addr, unsigned long *nr_accounted, |
1337 | struct zap_details *details) | 1374 | struct zap_details *details) |
1338 | { | 1375 | { |
1339 | unsigned long start = start_addr; | ||
1340 | struct mm_struct *mm = vma->vm_mm; | 1376 | struct mm_struct *mm = vma->vm_mm; |
1341 | 1377 | ||
1342 | mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); | 1378 | mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); |
1343 | for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { | 1379 | for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) |
1344 | unsigned long end; | 1380 | unmap_single_vma(tlb, vma, start_addr, end_addr, nr_accounted, |
1345 | 1381 | details); | |
1346 | start = max(vma->vm_start, start_addr); | ||
1347 | if (start >= vma->vm_end) | ||
1348 | continue; | ||
1349 | end = min(vma->vm_end, end_addr); | ||
1350 | if (end <= vma->vm_start) | ||
1351 | continue; | ||
1352 | |||
1353 | if (vma->vm_flags & VM_ACCOUNT) | ||
1354 | *nr_accounted += (end - start) >> PAGE_SHIFT; | ||
1355 | |||
1356 | if (unlikely(is_pfn_mapping(vma))) | ||
1357 | untrack_pfn_vma(vma, 0, 0); | ||
1358 | |||
1359 | while (start != end) { | ||
1360 | if (unlikely(is_vm_hugetlb_page(vma))) { | ||
1361 | /* | ||
1362 | * It is undesirable to test vma->vm_file as it | ||
1363 | * should be non-null for valid hugetlb area. | ||
1364 | * However, vm_file will be NULL in the error | ||
1365 | * cleanup path of do_mmap_pgoff. When | ||
1366 | * hugetlbfs ->mmap method fails, | ||
1367 | * do_mmap_pgoff() nullifies vma->vm_file | ||
1368 | * before calling this function to clean up. | ||
1369 | * Since no pte has actually been setup, it is | ||
1370 | * safe to do nothing in this case. | ||
1371 | */ | ||
1372 | if (vma->vm_file) | ||
1373 | unmap_hugepage_range(vma, start, end, NULL); | ||
1374 | |||
1375 | start = end; | ||
1376 | } else | ||
1377 | start = unmap_page_range(tlb, vma, start, end, details); | ||
1378 | } | ||
1379 | } | ||
1380 | |||
1381 | mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); | 1382 | mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); |
1382 | return start; /* which is now the end (or restart) address */ | ||
1383 | } | 1383 | } |
1384 | 1384 | ||
1385 | /** | 1385 | /** |
@@ -1388,8 +1388,34 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, | |||
1388 | * @address: starting address of pages to zap | 1388 | * @address: starting address of pages to zap |
1389 | * @size: number of bytes to zap | 1389 | * @size: number of bytes to zap |
1390 | * @details: details of nonlinear truncation or shared cache invalidation | 1390 | * @details: details of nonlinear truncation or shared cache invalidation |
1391 | * | ||
1392 | * Caller must protect the VMA list | ||
1393 | */ | ||
1394 | void zap_page_range(struct vm_area_struct *vma, unsigned long address, | ||
1395 | unsigned long size, struct zap_details *details) | ||
1396 | { | ||
1397 | struct mm_struct *mm = vma->vm_mm; | ||
1398 | struct mmu_gather tlb; | ||
1399 | unsigned long end = address + size; | ||
1400 | unsigned long nr_accounted = 0; | ||
1401 | |||
1402 | lru_add_drain(); | ||
1403 | tlb_gather_mmu(&tlb, mm, 0); | ||
1404 | update_hiwater_rss(mm); | ||
1405 | unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); | ||
1406 | tlb_finish_mmu(&tlb, address, end); | ||
1407 | } | ||
1408 | |||
1409 | /** | ||
1410 | * zap_page_range_single - remove user pages in a given range | ||
1411 | * @vma: vm_area_struct holding the applicable pages | ||
1412 | * @address: starting address of pages to zap | ||
1413 | * @size: number of bytes to zap | ||
1414 | * @details: details of nonlinear truncation or shared cache invalidation | ||
1415 | * | ||
1416 | * The range must fit into one VMA. | ||
1391 | */ | 1417 | */ |
1392 | unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | 1418 | static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, |
1393 | unsigned long size, struct zap_details *details) | 1419 | unsigned long size, struct zap_details *details) |
1394 | { | 1420 | { |
1395 | struct mm_struct *mm = vma->vm_mm; | 1421 | struct mm_struct *mm = vma->vm_mm; |
@@ -1400,9 +1426,10 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
1400 | lru_add_drain(); | 1426 | lru_add_drain(); |
1401 | tlb_gather_mmu(&tlb, mm, 0); | 1427 | tlb_gather_mmu(&tlb, mm, 0); |
1402 | update_hiwater_rss(mm); | 1428 | update_hiwater_rss(mm); |
1403 | end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); | 1429 | mmu_notifier_invalidate_range_start(mm, address, end); |
1430 | unmap_single_vma(&tlb, vma, address, end, &nr_accounted, details); | ||
1431 | mmu_notifier_invalidate_range_end(mm, address, end); | ||
1404 | tlb_finish_mmu(&tlb, address, end); | 1432 | tlb_finish_mmu(&tlb, address, end); |
1405 | return end; | ||
1406 | } | 1433 | } |
1407 | 1434 | ||
1408 | /** | 1435 | /** |
@@ -1423,7 +1450,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, | |||
1423 | if (address < vma->vm_start || address + size > vma->vm_end || | 1450 | if (address < vma->vm_start || address + size > vma->vm_end || |
1424 | !(vma->vm_flags & VM_PFNMAP)) | 1451 | !(vma->vm_flags & VM_PFNMAP)) |
1425 | return -1; | 1452 | return -1; |
1426 | zap_page_range(vma, address, size, NULL); | 1453 | zap_page_range_single(vma, address, size, NULL); |
1427 | return 0; | 1454 | return 0; |
1428 | } | 1455 | } |
1429 | EXPORT_SYMBOL_GPL(zap_vma_ptes); | 1456 | EXPORT_SYMBOL_GPL(zap_vma_ptes); |
@@ -2770,7 +2797,7 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, | |||
2770 | unsigned long start_addr, unsigned long end_addr, | 2797 | unsigned long start_addr, unsigned long end_addr, |
2771 | struct zap_details *details) | 2798 | struct zap_details *details) |
2772 | { | 2799 | { |
2773 | zap_page_range(vma, start_addr, end_addr - start_addr, details); | 2800 | zap_page_range_single(vma, start_addr, end_addr - start_addr, details); |
2774 | } | 2801 | } |
2775 | 2802 | ||
2776 | static inline void unmap_mapping_range_tree(struct prio_tree_root *root, | 2803 | static inline void unmap_mapping_range_tree(struct prio_tree_root *root, |
@@ -2237,7 +2237,6 @@ void exit_mmap(struct mm_struct *mm) | |||
2237 | struct mmu_gather tlb; | 2237 | struct mmu_gather tlb; |
2238 | struct vm_area_struct *vma; | 2238 | struct vm_area_struct *vma; |
2239 | unsigned long nr_accounted = 0; | 2239 | unsigned long nr_accounted = 0; |
2240 | unsigned long end; | ||
2241 | 2240 | ||
2242 | /* mm's last user has gone, and its about to be pulled down */ | 2241 | /* mm's last user has gone, and its about to be pulled down */ |
2243 | mmu_notifier_release(mm); | 2242 | mmu_notifier_release(mm); |
@@ -2262,11 +2261,11 @@ void exit_mmap(struct mm_struct *mm) | |||
2262 | tlb_gather_mmu(&tlb, mm, 1); | 2261 | tlb_gather_mmu(&tlb, mm, 1); |
2263 | /* update_hiwater_rss(mm) here? but nobody should be looking */ | 2262 | /* update_hiwater_rss(mm) here? but nobody should be looking */ |
2264 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 2263 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
2265 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); | 2264 | unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
2266 | vm_unacct_memory(nr_accounted); | 2265 | vm_unacct_memory(nr_accounted); |
2267 | 2266 | ||
2268 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); | 2267 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); |
2269 | tlb_finish_mmu(&tlb, 0, end); | 2268 | tlb_finish_mmu(&tlb, 0, -1); |
2270 | 2269 | ||
2271 | /* | 2270 | /* |
2272 | * Walk the list again, actually closing and freeing it, | 2271 | * Walk the list again, actually closing and freeing it, |