diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 225 |
1 files changed, 2 insertions, 223 deletions
@@ -590,9 +590,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
590 | if (!vma->anon_vma || !page__anon_vma || | 590 | if (!vma->anon_vma || !page__anon_vma || |
591 | vma->anon_vma->root != page__anon_vma->root) | 591 | vma->anon_vma->root != page__anon_vma->root) |
592 | return -EFAULT; | 592 | return -EFAULT; |
593 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { | 593 | } else if (page->mapping) { |
594 | if (!vma->vm_file || | 594 | if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) |
595 | vma->vm_file->f_mapping != page->mapping) | ||
596 | return -EFAULT; | 595 | return -EFAULT; |
597 | } else | 596 | } else |
598 | return -EFAULT; | 597 | return -EFAULT; |
@@ -1274,7 +1273,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1274 | if (pte_soft_dirty(pteval)) | 1273 | if (pte_soft_dirty(pteval)) |
1275 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1274 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
1276 | set_pte_at(mm, address, pte, swp_pte); | 1275 | set_pte_at(mm, address, pte, swp_pte); |
1277 | BUG_ON(pte_file(*pte)); | ||
1278 | } else if (IS_ENABLED(CONFIG_MIGRATION) && | 1276 | } else if (IS_ENABLED(CONFIG_MIGRATION) && |
1279 | (flags & TTU_MIGRATION)) { | 1277 | (flags & TTU_MIGRATION)) { |
1280 | /* Establish migration entry for a file page */ | 1278 | /* Establish migration entry for a file page */ |
@@ -1316,211 +1314,6 @@ out_mlock: | |||
1316 | return ret; | 1314 | return ret; |
1317 | } | 1315 | } |
1318 | 1316 | ||
1319 | /* | ||
1320 | * objrmap doesn't work for nonlinear VMAs because the assumption that | ||
1321 | * offset-into-file correlates with offset-into-virtual-addresses does not hold. | ||
1322 | * Consequently, given a particular page and its ->index, we cannot locate the | ||
1323 | * ptes which are mapping that page without an exhaustive linear search. | ||
1324 | * | ||
1325 | * So what this code does is a mini "virtual scan" of each nonlinear VMA which | ||
1326 | * maps the file to which the target page belongs. The ->vm_private_data field | ||
1327 | * holds the current cursor into that scan. Successive searches will circulate | ||
1328 | * around the vma's virtual address space. | ||
1329 | * | ||
1330 | * So as more replacement pressure is applied to the pages in a nonlinear VMA, | ||
1331 | * more scanning pressure is placed against them as well. Eventually pages | ||
1332 | * will become fully unmapped and are eligible for eviction. | ||
1333 | * | ||
1334 | * For very sparsely populated VMAs this is a little inefficient - chances are | ||
1335 | * there there won't be many ptes located within the scan cluster. In this case | ||
1336 | * maybe we could scan further - to the end of the pte page, perhaps. | ||
1337 | * | ||
1338 | * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can | ||
1339 | * acquire it without blocking. If vma locked, mlock the pages in the cluster, | ||
1340 | * rather than unmapping them. If we encounter the "check_page" that vmscan is | ||
1341 | * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN. | ||
1342 | */ | ||
1343 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) | ||
1344 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) | ||
1345 | |||
1346 | static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | ||
1347 | struct vm_area_struct *vma, struct page *check_page) | ||
1348 | { | ||
1349 | struct mm_struct *mm = vma->vm_mm; | ||
1350 | pmd_t *pmd; | ||
1351 | pte_t *pte; | ||
1352 | pte_t pteval; | ||
1353 | spinlock_t *ptl; | ||
1354 | struct page *page; | ||
1355 | unsigned long address; | ||
1356 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
1357 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
1358 | unsigned long end; | ||
1359 | int ret = SWAP_AGAIN; | ||
1360 | int locked_vma = 0; | ||
1361 | |||
1362 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | ||
1363 | end = address + CLUSTER_SIZE; | ||
1364 | if (address < vma->vm_start) | ||
1365 | address = vma->vm_start; | ||
1366 | if (end > vma->vm_end) | ||
1367 | end = vma->vm_end; | ||
1368 | |||
1369 | pmd = mm_find_pmd(mm, address); | ||
1370 | if (!pmd) | ||
1371 | return ret; | ||
1372 | |||
1373 | mmun_start = address; | ||
1374 | mmun_end = end; | ||
1375 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
1376 | |||
1377 | /* | ||
1378 | * If we can acquire the mmap_sem for read, and vma is VM_LOCKED, | ||
1379 | * keep the sem while scanning the cluster for mlocking pages. | ||
1380 | */ | ||
1381 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
1382 | locked_vma = (vma->vm_flags & VM_LOCKED); | ||
1383 | if (!locked_vma) | ||
1384 | up_read(&vma->vm_mm->mmap_sem); /* don't need it */ | ||
1385 | } | ||
1386 | |||
1387 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
1388 | |||
1389 | /* Update high watermark before we lower rss */ | ||
1390 | update_hiwater_rss(mm); | ||
1391 | |||
1392 | for (; address < end; pte++, address += PAGE_SIZE) { | ||
1393 | if (!pte_present(*pte)) | ||
1394 | continue; | ||
1395 | page = vm_normal_page(vma, address, *pte); | ||
1396 | BUG_ON(!page || PageAnon(page)); | ||
1397 | |||
1398 | if (locked_vma) { | ||
1399 | if (page == check_page) { | ||
1400 | /* we know we have check_page locked */ | ||
1401 | mlock_vma_page(page); | ||
1402 | ret = SWAP_MLOCK; | ||
1403 | } else if (trylock_page(page)) { | ||
1404 | /* | ||
1405 | * If we can lock the page, perform mlock. | ||
1406 | * Otherwise leave the page alone, it will be | ||
1407 | * eventually encountered again later. | ||
1408 | */ | ||
1409 | mlock_vma_page(page); | ||
1410 | unlock_page(page); | ||
1411 | } | ||
1412 | continue; /* don't unmap */ | ||
1413 | } | ||
1414 | |||
1415 | /* | ||
1416 | * No need for _notify because we're within an | ||
1417 | * mmu_notifier_invalidate_range_ {start|end} scope. | ||
1418 | */ | ||
1419 | if (ptep_clear_flush_young(vma, address, pte)) | ||
1420 | continue; | ||
1421 | |||
1422 | /* Nuke the page table entry. */ | ||
1423 | flush_cache_page(vma, address, pte_pfn(*pte)); | ||
1424 | pteval = ptep_clear_flush_notify(vma, address, pte); | ||
1425 | |||
1426 | /* If nonlinear, store the file page offset in the pte. */ | ||
1427 | if (page->index != linear_page_index(vma, address)) { | ||
1428 | pte_t ptfile = pgoff_to_pte(page->index); | ||
1429 | if (pte_soft_dirty(pteval)) | ||
1430 | ptfile = pte_file_mksoft_dirty(ptfile); | ||
1431 | set_pte_at(mm, address, pte, ptfile); | ||
1432 | } | ||
1433 | |||
1434 | /* Move the dirty bit to the physical page now the pte is gone. */ | ||
1435 | if (pte_dirty(pteval)) | ||
1436 | set_page_dirty(page); | ||
1437 | |||
1438 | page_remove_rmap(page); | ||
1439 | page_cache_release(page); | ||
1440 | dec_mm_counter(mm, MM_FILEPAGES); | ||
1441 | (*mapcount)--; | ||
1442 | } | ||
1443 | pte_unmap_unlock(pte - 1, ptl); | ||
1444 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1445 | if (locked_vma) | ||
1446 | up_read(&vma->vm_mm->mmap_sem); | ||
1447 | return ret; | ||
1448 | } | ||
1449 | |||
1450 | static int try_to_unmap_nonlinear(struct page *page, | ||
1451 | struct address_space *mapping, void *arg) | ||
1452 | { | ||
1453 | struct vm_area_struct *vma; | ||
1454 | int ret = SWAP_AGAIN; | ||
1455 | unsigned long cursor; | ||
1456 | unsigned long max_nl_cursor = 0; | ||
1457 | unsigned long max_nl_size = 0; | ||
1458 | unsigned int mapcount; | ||
1459 | |||
1460 | list_for_each_entry(vma, | ||
1461 | &mapping->i_mmap_nonlinear, shared.nonlinear) { | ||
1462 | |||
1463 | cursor = (unsigned long) vma->vm_private_data; | ||
1464 | if (cursor > max_nl_cursor) | ||
1465 | max_nl_cursor = cursor; | ||
1466 | cursor = vma->vm_end - vma->vm_start; | ||
1467 | if (cursor > max_nl_size) | ||
1468 | max_nl_size = cursor; | ||
1469 | } | ||
1470 | |||
1471 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ | ||
1472 | return SWAP_FAIL; | ||
1473 | } | ||
1474 | |||
1475 | /* | ||
1476 | * We don't try to search for this page in the nonlinear vmas, | ||
1477 | * and page_referenced wouldn't have found it anyway. Instead | ||
1478 | * just walk the nonlinear vmas trying to age and unmap some. | ||
1479 | * The mapcount of the page we came in with is irrelevant, | ||
1480 | * but even so use it as a guide to how hard we should try? | ||
1481 | */ | ||
1482 | mapcount = page_mapcount(page); | ||
1483 | if (!mapcount) | ||
1484 | return ret; | ||
1485 | |||
1486 | cond_resched(); | ||
1487 | |||
1488 | max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; | ||
1489 | if (max_nl_cursor == 0) | ||
1490 | max_nl_cursor = CLUSTER_SIZE; | ||
1491 | |||
1492 | do { | ||
1493 | list_for_each_entry(vma, | ||
1494 | &mapping->i_mmap_nonlinear, shared.nonlinear) { | ||
1495 | |||
1496 | cursor = (unsigned long) vma->vm_private_data; | ||
1497 | while (cursor < max_nl_cursor && | ||
1498 | cursor < vma->vm_end - vma->vm_start) { | ||
1499 | if (try_to_unmap_cluster(cursor, &mapcount, | ||
1500 | vma, page) == SWAP_MLOCK) | ||
1501 | ret = SWAP_MLOCK; | ||
1502 | cursor += CLUSTER_SIZE; | ||
1503 | vma->vm_private_data = (void *) cursor; | ||
1504 | if ((int)mapcount <= 0) | ||
1505 | return ret; | ||
1506 | } | ||
1507 | vma->vm_private_data = (void *) max_nl_cursor; | ||
1508 | } | ||
1509 | cond_resched(); | ||
1510 | max_nl_cursor += CLUSTER_SIZE; | ||
1511 | } while (max_nl_cursor <= max_nl_size); | ||
1512 | |||
1513 | /* | ||
1514 | * Don't loop forever (perhaps all the remaining pages are | ||
1515 | * in locked vmas). Reset cursor on all unreserved nonlinear | ||
1516 | * vmas, now forgetting on which ones it had fallen behind. | ||
1517 | */ | ||
1518 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) | ||
1519 | vma->vm_private_data = NULL; | ||
1520 | |||
1521 | return ret; | ||
1522 | } | ||
1523 | |||
1524 | bool is_vma_temporary_stack(struct vm_area_struct *vma) | 1317 | bool is_vma_temporary_stack(struct vm_area_struct *vma) |
1525 | { | 1318 | { |
1526 | int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); | 1319 | int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); |
@@ -1566,7 +1359,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1566 | .rmap_one = try_to_unmap_one, | 1359 | .rmap_one = try_to_unmap_one, |
1567 | .arg = (void *)flags, | 1360 | .arg = (void *)flags, |
1568 | .done = page_not_mapped, | 1361 | .done = page_not_mapped, |
1569 | .file_nonlinear = try_to_unmap_nonlinear, | ||
1570 | .anon_lock = page_lock_anon_vma_read, | 1362 | .anon_lock = page_lock_anon_vma_read, |
1571 | }; | 1363 | }; |
1572 | 1364 | ||
@@ -1612,12 +1404,6 @@ int try_to_munlock(struct page *page) | |||
1612 | .rmap_one = try_to_unmap_one, | 1404 | .rmap_one = try_to_unmap_one, |
1613 | .arg = (void *)TTU_MUNLOCK, | 1405 | .arg = (void *)TTU_MUNLOCK, |
1614 | .done = page_not_mapped, | 1406 | .done = page_not_mapped, |
1615 | /* | ||
1616 | * We don't bother to try to find the munlocked page in | ||
1617 | * nonlinears. It's costly. Instead, later, page reclaim logic | ||
1618 | * may call try_to_unmap() and recover PG_mlocked lazily. | ||
1619 | */ | ||
1620 | .file_nonlinear = NULL, | ||
1621 | .anon_lock = page_lock_anon_vma_read, | 1407 | .anon_lock = page_lock_anon_vma_read, |
1622 | 1408 | ||
1623 | }; | 1409 | }; |
@@ -1748,13 +1534,6 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | |||
1748 | goto done; | 1534 | goto done; |
1749 | } | 1535 | } |
1750 | 1536 | ||
1751 | if (!rwc->file_nonlinear) | ||
1752 | goto done; | ||
1753 | |||
1754 | if (list_empty(&mapping->i_mmap_nonlinear)) | ||
1755 | goto done; | ||
1756 | |||
1757 | ret = rwc->file_nonlinear(page, mapping, rwc->arg); | ||
1758 | done: | 1537 | done: |
1759 | i_mmap_unlock_read(mapping); | 1538 | i_mmap_unlock_read(mapping); |
1760 | return ret; | 1539 | return ret; |