diff options
| author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2017-08-10 18:24:27 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-10 18:54:07 -0400 |
| commit | aac2fea94f7a3df8ad1eeb477eb2643f81fd5393 (patch) | |
| tree | 95ee6a145bf9308130fc0c672c968bc73458f8e1 /mm/rmap.c | |
| parent | d041353dc98a6339182cd6f628b4c8f111278cb3 (diff) | |
rmap: do not call mmu_notifier_invalidate_page() under ptl
MMU notifiers can sleep, but in page_mkclean_one() we call
mmu_notifier_invalidate_page() under page table lock.
Let's instead use mmu_notifier_invalidate_range() outside
page_vma_mapped_walk() loop.
[jglisse@redhat.com: try_to_unmap_one() do not call mmu_notifier under ptl]
Link: http://lkml.kernel.org/r/20170809204333.27485-1-jglisse@redhat.com
Link: http://lkml.kernel.org/r/20170804134928.l4klfcnqatni7vsc@black.fi.intel.com
Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Reported-by: axie <axie@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Writer, Tim" <Tim.Writer@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/rmap.c')
| -rw-r--r-- | mm/rmap.c | 52 |
1 files changed, 30 insertions, 22 deletions
| @@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
| 888 | .flags = PVMW_SYNC, | 888 | .flags = PVMW_SYNC, |
| 889 | }; | 889 | }; |
| 890 | int *cleaned = arg; | 890 | int *cleaned = arg; |
| 891 | bool invalidation_needed = false; | ||
| 891 | 892 | ||
| 892 | while (page_vma_mapped_walk(&pvmw)) { | 893 | while (page_vma_mapped_walk(&pvmw)) { |
| 893 | int ret = 0; | 894 | int ret = 0; |
| 894 | address = pvmw.address; | ||
| 895 | if (pvmw.pte) { | 895 | if (pvmw.pte) { |
| 896 | pte_t entry; | 896 | pte_t entry; |
| 897 | pte_t *pte = pvmw.pte; | 897 | pte_t *pte = pvmw.pte; |
| @@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
| 899 | if (!pte_dirty(*pte) && !pte_write(*pte)) | 899 | if (!pte_dirty(*pte) && !pte_write(*pte)) |
| 900 | continue; | 900 | continue; |
| 901 | 901 | ||
| 902 | flush_cache_page(vma, address, pte_pfn(*pte)); | 902 | flush_cache_page(vma, pvmw.address, pte_pfn(*pte)); |
| 903 | entry = ptep_clear_flush(vma, address, pte); | 903 | entry = ptep_clear_flush(vma, pvmw.address, pte); |
| 904 | entry = pte_wrprotect(entry); | 904 | entry = pte_wrprotect(entry); |
| 905 | entry = pte_mkclean(entry); | 905 | entry = pte_mkclean(entry); |
| 906 | set_pte_at(vma->vm_mm, address, pte, entry); | 906 | set_pte_at(vma->vm_mm, pvmw.address, pte, entry); |
| 907 | ret = 1; | 907 | ret = 1; |
| 908 | } else { | 908 | } else { |
| 909 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE | 909 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
| @@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
| 913 | if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) | 913 | if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) |
| 914 | continue; | 914 | continue; |
| 915 | 915 | ||
| 916 | flush_cache_page(vma, address, page_to_pfn(page)); | 916 | flush_cache_page(vma, pvmw.address, page_to_pfn(page)); |
| 917 | entry = pmdp_huge_clear_flush(vma, address, pmd); | 917 | entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd); |
| 918 | entry = pmd_wrprotect(entry); | 918 | entry = pmd_wrprotect(entry); |
| 919 | entry = pmd_mkclean(entry); | 919 | entry = pmd_mkclean(entry); |
| 920 | set_pmd_at(vma->vm_mm, address, pmd, entry); | 920 | set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry); |
| 921 | ret = 1; | 921 | ret = 1; |
| 922 | #else | 922 | #else |
| 923 | /* unexpected pmd-mapped page? */ | 923 | /* unexpected pmd-mapped page? */ |
| @@ -926,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
| 926 | } | 926 | } |
| 927 | 927 | ||
| 928 | if (ret) { | 928 | if (ret) { |
| 929 | mmu_notifier_invalidate_page(vma->vm_mm, address); | ||
| 930 | (*cleaned)++; | 929 | (*cleaned)++; |
| 930 | invalidation_needed = true; | ||
| 931 | } | 931 | } |
| 932 | } | 932 | } |
| 933 | 933 | ||
| 934 | if (invalidation_needed) { | ||
| 935 | mmu_notifier_invalidate_range(vma->vm_mm, address, | ||
| 936 | address + (1UL << compound_order(page))); | ||
| 937 | } | ||
| 938 | |||
| 934 | return true; | 939 | return true; |
| 935 | } | 940 | } |
| 936 | 941 | ||
| @@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1323 | }; | 1328 | }; |
| 1324 | pte_t pteval; | 1329 | pte_t pteval; |
| 1325 | struct page *subpage; | 1330 | struct page *subpage; |
| 1326 | bool ret = true; | 1331 | bool ret = true, invalidation_needed = false; |
| 1327 | enum ttu_flags flags = (enum ttu_flags)arg; | 1332 | enum ttu_flags flags = (enum ttu_flags)arg; |
| 1328 | 1333 | ||
| 1329 | /* munlock has nothing to gain from examining un-locked vmas */ | 1334 | /* munlock has nothing to gain from examining un-locked vmas */ |
| @@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1363 | VM_BUG_ON_PAGE(!pvmw.pte, page); | 1368 | VM_BUG_ON_PAGE(!pvmw.pte, page); |
| 1364 | 1369 | ||
| 1365 | subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); | 1370 | subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); |
| 1366 | address = pvmw.address; | ||
| 1367 | |||
| 1368 | 1371 | ||
| 1369 | if (!(flags & TTU_IGNORE_ACCESS)) { | 1372 | if (!(flags & TTU_IGNORE_ACCESS)) { |
| 1370 | if (ptep_clear_flush_young_notify(vma, address, | 1373 | if (ptep_clear_flush_young_notify(vma, pvmw.address, |
| 1371 | pvmw.pte)) { | 1374 | pvmw.pte)) { |
| 1372 | ret = false; | 1375 | ret = false; |
| 1373 | page_vma_mapped_walk_done(&pvmw); | 1376 | page_vma_mapped_walk_done(&pvmw); |
| @@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1376 | } | 1379 | } |
| 1377 | 1380 | ||
| 1378 | /* Nuke the page table entry. */ | 1381 | /* Nuke the page table entry. */ |
| 1379 | flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); | 1382 | flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte)); |
| 1380 | if (should_defer_flush(mm, flags)) { | 1383 | if (should_defer_flush(mm, flags)) { |
| 1381 | /* | 1384 | /* |
| 1382 | * We clear the PTE but do not flush so potentially | 1385 | * We clear the PTE but do not flush so potentially |
| @@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1386 | * transition on a cached TLB entry is written through | 1389 | * transition on a cached TLB entry is written through |
| 1387 | * and traps if the PTE is unmapped. | 1390 | * and traps if the PTE is unmapped. |
| 1388 | */ | 1391 | */ |
| 1389 | pteval = ptep_get_and_clear(mm, address, pvmw.pte); | 1392 | pteval = ptep_get_and_clear(mm, pvmw.address, |
| 1393 | pvmw.pte); | ||
| 1390 | 1394 | ||
| 1391 | set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); | 1395 | set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); |
| 1392 | } else { | 1396 | } else { |
| 1393 | pteval = ptep_clear_flush(vma, address, pvmw.pte); | 1397 | pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte); |
| 1394 | } | 1398 | } |
| 1395 | 1399 | ||
| 1396 | /* Move the dirty bit to the page. Now the pte is gone. */ | 1400 | /* Move the dirty bit to the page. Now the pte is gone. */ |
| @@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1405 | if (PageHuge(page)) { | 1409 | if (PageHuge(page)) { |
| 1406 | int nr = 1 << compound_order(page); | 1410 | int nr = 1 << compound_order(page); |
| 1407 | hugetlb_count_sub(nr, mm); | 1411 | hugetlb_count_sub(nr, mm); |
| 1408 | set_huge_swap_pte_at(mm, address, | 1412 | set_huge_swap_pte_at(mm, pvmw.address, |
| 1409 | pvmw.pte, pteval, | 1413 | pvmw.pte, pteval, |
| 1410 | vma_mmu_pagesize(vma)); | 1414 | vma_mmu_pagesize(vma)); |
| 1411 | } else { | 1415 | } else { |
| 1412 | dec_mm_counter(mm, mm_counter(page)); | 1416 | dec_mm_counter(mm, mm_counter(page)); |
| 1413 | set_pte_at(mm, address, pvmw.pte, pteval); | 1417 | set_pte_at(mm, pvmw.address, pvmw.pte, pteval); |
| 1414 | } | 1418 | } |
| 1415 | 1419 | ||
| 1416 | } else if (pte_unused(pteval)) { | 1420 | } else if (pte_unused(pteval)) { |
| @@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1434 | swp_pte = swp_entry_to_pte(entry); | 1438 | swp_pte = swp_entry_to_pte(entry); |
| 1435 | if (pte_soft_dirty(pteval)) | 1439 | if (pte_soft_dirty(pteval)) |
| 1436 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1440 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
| 1437 | set_pte_at(mm, address, pvmw.pte, swp_pte); | 1441 | set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); |
| 1438 | } else if (PageAnon(page)) { | 1442 | } else if (PageAnon(page)) { |
| 1439 | swp_entry_t entry = { .val = page_private(subpage) }; | 1443 | swp_entry_t entry = { .val = page_private(subpage) }; |
| 1440 | pte_t swp_pte; | 1444 | pte_t swp_pte; |
| @@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1460 | * If the page was redirtied, it cannot be | 1464 | * If the page was redirtied, it cannot be |
| 1461 | * discarded. Remap the page to page table. | 1465 | * discarded. Remap the page to page table. |
| 1462 | */ | 1466 | */ |
| 1463 | set_pte_at(mm, address, pvmw.pte, pteval); | 1467 | set_pte_at(mm, pvmw.address, pvmw.pte, pteval); |
| 1464 | SetPageSwapBacked(page); | 1468 | SetPageSwapBacked(page); |
| 1465 | ret = false; | 1469 | ret = false; |
| 1466 | page_vma_mapped_walk_done(&pvmw); | 1470 | page_vma_mapped_walk_done(&pvmw); |
| @@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1468 | } | 1472 | } |
| 1469 | 1473 | ||
| 1470 | if (swap_duplicate(entry) < 0) { | 1474 | if (swap_duplicate(entry) < 0) { |
| 1471 | set_pte_at(mm, address, pvmw.pte, pteval); | 1475 | set_pte_at(mm, pvmw.address, pvmw.pte, pteval); |
| 1472 | ret = false; | 1476 | ret = false; |
| 1473 | page_vma_mapped_walk_done(&pvmw); | 1477 | page_vma_mapped_walk_done(&pvmw); |
| 1474 | break; | 1478 | break; |
| @@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 1484 | swp_pte = swp_entry_to_pte(entry); | 1488 | swp_pte = swp_entry_to_pte(entry); |
| 1485 | if (pte_soft_dirty(pteval)) | 1489 | if (pte_soft_dirty(pteval)) |
| 1486 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1490 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
| 1487 | set_pte_at(mm, address, pvmw.pte, swp_pte); | 1491 | set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); |
| 1488 | } else | 1492 | } else |
| 1489 | dec_mm_counter(mm, mm_counter_file(page)); | 1493 | dec_mm_counter(mm, mm_counter_file(page)); |
| 1490 | discard: | 1494 | discard: |
| 1491 | page_remove_rmap(subpage, PageHuge(page)); | 1495 | page_remove_rmap(subpage, PageHuge(page)); |
| 1492 | put_page(page); | 1496 | put_page(page); |
| 1493 | mmu_notifier_invalidate_page(mm, address); | 1497 | invalidation_needed = true; |
| 1494 | } | 1498 | } |
| 1499 | |||
| 1500 | if (invalidation_needed) | ||
| 1501 | mmu_notifier_invalidate_range(mm, address, | ||
| 1502 | address + (1UL << compound_order(page))); | ||
| 1495 | return ret; | 1503 | return ret; |
| 1496 | } | 1504 | } |
| 1497 | 1505 | ||
