aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 16:32:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 16:32:19 -0400
commit3a990a52f9f25f45469e272017a31e7a3fda60ed (patch)
tree366f639d9ce1e907b65caa72bc098df6c4b5a240 /mm/memory.c
parent3556485f1595e3964ba539e39ea682acbb835cee (diff)
parentf5cc4eef9987d0b517364d01e290d6438e47ee5d (diff)
Merge branch 'vm' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull munmap/truncate race fixes from Al Viro: "Fixes for racy use of unmap_vmas() on truncate-related codepaths" * 'vm' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: VM: make zap_page_range() callers that act on a single VMA use separate helper VM: make unmap_vmas() return void VM: don't bother with feeding upper limit to tlb_finish_mmu() in exit_mmap() VM: make zap_page_range() return void VM: can't go through the inner loop in unmap_vmas() more than once... VM: unmap_page_range() can return void
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c133
1 files changed, 80 insertions, 53 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 347e5fad1cfa..8438c157e4d9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1282,10 +1282,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
1282 return addr; 1282 return addr;
1283} 1283}
1284 1284
1285static unsigned long unmap_page_range(struct mmu_gather *tlb, 1285static void unmap_page_range(struct mmu_gather *tlb,
1286 struct vm_area_struct *vma, 1286 struct vm_area_struct *vma,
1287 unsigned long addr, unsigned long end, 1287 unsigned long addr, unsigned long end,
1288 struct zap_details *details) 1288 struct zap_details *details)
1289{ 1289{
1290 pgd_t *pgd; 1290 pgd_t *pgd;
1291 unsigned long next; 1291 unsigned long next;
@@ -1305,8 +1305,47 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
1305 } while (pgd++, addr = next, addr != end); 1305 } while (pgd++, addr = next, addr != end);
1306 tlb_end_vma(tlb, vma); 1306 tlb_end_vma(tlb, vma);
1307 mem_cgroup_uncharge_end(); 1307 mem_cgroup_uncharge_end();
1308}
1308 1309
1309 return addr; 1310
1311static void unmap_single_vma(struct mmu_gather *tlb,
1312 struct vm_area_struct *vma, unsigned long start_addr,
1313 unsigned long end_addr, unsigned long *nr_accounted,
1314 struct zap_details *details)
1315{
1316 unsigned long start = max(vma->vm_start, start_addr);
1317 unsigned long end;
1318
1319 if (start >= vma->vm_end)
1320 return;
1321 end = min(vma->vm_end, end_addr);
1322 if (end <= vma->vm_start)
1323 return;
1324
1325 if (vma->vm_flags & VM_ACCOUNT)
1326 *nr_accounted += (end - start) >> PAGE_SHIFT;
1327
1328 if (unlikely(is_pfn_mapping(vma)))
1329 untrack_pfn_vma(vma, 0, 0);
1330
1331 if (start != end) {
1332 if (unlikely(is_vm_hugetlb_page(vma))) {
1333 /*
1334 * It is undesirable to test vma->vm_file as it
1335 * should be non-null for valid hugetlb area.
1336 * However, vm_file will be NULL in the error
1337 * cleanup path of do_mmap_pgoff. When
1338 * hugetlbfs ->mmap method fails,
1339 * do_mmap_pgoff() nullifies vma->vm_file
1340 * before calling this function to clean up.
1341 * Since no pte has actually been setup, it is
1342 * safe to do nothing in this case.
1343 */
1344 if (vma->vm_file)
1345 unmap_hugepage_range(vma, start, end, NULL);
1346 } else
1347 unmap_page_range(tlb, vma, start, end, details);
1348 }
1310} 1349}
1311 1350
1312/** 1351/**
@@ -1318,8 +1357,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
1318 * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here 1357 * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
1319 * @details: details of nonlinear truncation or shared cache invalidation 1358 * @details: details of nonlinear truncation or shared cache invalidation
1320 * 1359 *
1321 * Returns the end address of the unmapping (restart addr if interrupted).
1322 *
1323 * Unmap all pages in the vma list. 1360 * Unmap all pages in the vma list.
1324 * 1361 *
1325 * Only addresses between `start' and `end' will be unmapped. 1362 * Only addresses between `start' and `end' will be unmapped.
@@ -1331,55 +1368,18 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
1331 * ensure that any thus-far unmapped pages are flushed before unmap_vmas() 1368 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1332 * drops the lock and schedules. 1369 * drops the lock and schedules.
1333 */ 1370 */
1334unsigned long unmap_vmas(struct mmu_gather *tlb, 1371void unmap_vmas(struct mmu_gather *tlb,
1335 struct vm_area_struct *vma, unsigned long start_addr, 1372 struct vm_area_struct *vma, unsigned long start_addr,
1336 unsigned long end_addr, unsigned long *nr_accounted, 1373 unsigned long end_addr, unsigned long *nr_accounted,
1337 struct zap_details *details) 1374 struct zap_details *details)
1338{ 1375{
1339 unsigned long start = start_addr;
1340 struct mm_struct *mm = vma->vm_mm; 1376 struct mm_struct *mm = vma->vm_mm;
1341 1377
1342 mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); 1378 mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
1343 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { 1379 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
1344 unsigned long end; 1380 unmap_single_vma(tlb, vma, start_addr, end_addr, nr_accounted,
1345 1381 details);
1346 start = max(vma->vm_start, start_addr);
1347 if (start >= vma->vm_end)
1348 continue;
1349 end = min(vma->vm_end, end_addr);
1350 if (end <= vma->vm_start)
1351 continue;
1352
1353 if (vma->vm_flags & VM_ACCOUNT)
1354 *nr_accounted += (end - start) >> PAGE_SHIFT;
1355
1356 if (unlikely(is_pfn_mapping(vma)))
1357 untrack_pfn_vma(vma, 0, 0);
1358
1359 while (start != end) {
1360 if (unlikely(is_vm_hugetlb_page(vma))) {
1361 /*
1362 * It is undesirable to test vma->vm_file as it
1363 * should be non-null for valid hugetlb area.
1364 * However, vm_file will be NULL in the error
1365 * cleanup path of do_mmap_pgoff. When
1366 * hugetlbfs ->mmap method fails,
1367 * do_mmap_pgoff() nullifies vma->vm_file
1368 * before calling this function to clean up.
1369 * Since no pte has actually been setup, it is
1370 * safe to do nothing in this case.
1371 */
1372 if (vma->vm_file)
1373 unmap_hugepage_range(vma, start, end, NULL);
1374
1375 start = end;
1376 } else
1377 start = unmap_page_range(tlb, vma, start, end, details);
1378 }
1379 }
1380
1381 mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); 1382 mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
1382 return start; /* which is now the end (or restart) address */
1383} 1383}
1384 1384
1385/** 1385/**
@@ -1388,8 +1388,34 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
1388 * @address: starting address of pages to zap 1388 * @address: starting address of pages to zap
1389 * @size: number of bytes to zap 1389 * @size: number of bytes to zap
1390 * @details: details of nonlinear truncation or shared cache invalidation 1390 * @details: details of nonlinear truncation or shared cache invalidation
1391 *
1392 * Caller must protect the VMA list
1393 */
1394void zap_page_range(struct vm_area_struct *vma, unsigned long address,
1395 unsigned long size, struct zap_details *details)
1396{
1397 struct mm_struct *mm = vma->vm_mm;
1398 struct mmu_gather tlb;
1399 unsigned long end = address + size;
1400 unsigned long nr_accounted = 0;
1401
1402 lru_add_drain();
1403 tlb_gather_mmu(&tlb, mm, 0);
1404 update_hiwater_rss(mm);
1405 unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
1406 tlb_finish_mmu(&tlb, address, end);
1407}
1408
1409/**
1410 * zap_page_range_single - remove user pages in a given range
1411 * @vma: vm_area_struct holding the applicable pages
1412 * @address: starting address of pages to zap
1413 * @size: number of bytes to zap
1414 * @details: details of nonlinear truncation or shared cache invalidation
1415 *
1416 * The range must fit into one VMA.
1391 */ 1417 */
1392unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 1418static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
1393 unsigned long size, struct zap_details *details) 1419 unsigned long size, struct zap_details *details)
1394{ 1420{
1395 struct mm_struct *mm = vma->vm_mm; 1421 struct mm_struct *mm = vma->vm_mm;
@@ -1400,9 +1426,10 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
1400 lru_add_drain(); 1426 lru_add_drain();
1401 tlb_gather_mmu(&tlb, mm, 0); 1427 tlb_gather_mmu(&tlb, mm, 0);
1402 update_hiwater_rss(mm); 1428 update_hiwater_rss(mm);
1403 end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); 1429 mmu_notifier_invalidate_range_start(mm, address, end);
1430 unmap_single_vma(&tlb, vma, address, end, &nr_accounted, details);
1431 mmu_notifier_invalidate_range_end(mm, address, end);
1404 tlb_finish_mmu(&tlb, address, end); 1432 tlb_finish_mmu(&tlb, address, end);
1405 return end;
1406} 1433}
1407 1434
1408/** 1435/**
@@ -1423,7 +1450,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
1423 if (address < vma->vm_start || address + size > vma->vm_end || 1450 if (address < vma->vm_start || address + size > vma->vm_end ||
1424 !(vma->vm_flags & VM_PFNMAP)) 1451 !(vma->vm_flags & VM_PFNMAP))
1425 return -1; 1452 return -1;
1426 zap_page_range(vma, address, size, NULL); 1453 zap_page_range_single(vma, address, size, NULL);
1427 return 0; 1454 return 0;
1428} 1455}
1429EXPORT_SYMBOL_GPL(zap_vma_ptes); 1456EXPORT_SYMBOL_GPL(zap_vma_ptes);
@@ -2770,7 +2797,7 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
2770 unsigned long start_addr, unsigned long end_addr, 2797 unsigned long start_addr, unsigned long end_addr,
2771 struct zap_details *details) 2798 struct zap_details *details)
2772{ 2799{
2773 zap_page_range(vma, start_addr, end_addr - start_addr, details); 2800 zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
2774} 2801}
2775 2802
2776static inline void unmap_mapping_range_tree(struct prio_tree_root *root, 2803static inline void unmap_mapping_range_tree(struct prio_tree_root *root,