summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2017-08-10 18:24:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-08-10 18:54:07 -0400
commitaac2fea94f7a3df8ad1eeb477eb2643f81fd5393 (patch)
tree95ee6a145bf9308130fc0c672c968bc73458f8e1 /mm
parentd041353dc98a6339182cd6f628b4c8f111278cb3 (diff)
rmap: do not call mmu_notifier_invalidate_page() under ptl
MMU notifiers can sleep, but in page_mkclean_one() we call mmu_notifier_invalidate_page() under page table lock. Let's instead use mmu_notifier_invalidate_range() outside page_vma_mapped_walk() loop. [jglisse@redhat.com: try_to_unmap_one() do not call mmu_notifier under ptl] Link: http://lkml.kernel.org/r/20170809204333.27485-1-jglisse@redhat.com Link: http://lkml.kernel.org/r/20170804134928.l4klfcnqatni7vsc@black.fi.intel.com Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()") Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Reported-by: axie <axie@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: "Writer, Tim" <Tim.Writer@amd.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/rmap.c52
1 files changed, 30 insertions, 22 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index c8993c63eb25..c1286d47aa1f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
888 .flags = PVMW_SYNC, 888 .flags = PVMW_SYNC,
889 }; 889 };
890 int *cleaned = arg; 890 int *cleaned = arg;
891 bool invalidation_needed = false;
891 892
892 while (page_vma_mapped_walk(&pvmw)) { 893 while (page_vma_mapped_walk(&pvmw)) {
893 int ret = 0; 894 int ret = 0;
894 address = pvmw.address;
895 if (pvmw.pte) { 895 if (pvmw.pte) {
896 pte_t entry; 896 pte_t entry;
897 pte_t *pte = pvmw.pte; 897 pte_t *pte = pvmw.pte;
@@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
899 if (!pte_dirty(*pte) && !pte_write(*pte)) 899 if (!pte_dirty(*pte) && !pte_write(*pte))
900 continue; 900 continue;
901 901
902 flush_cache_page(vma, address, pte_pfn(*pte)); 902 flush_cache_page(vma, pvmw.address, pte_pfn(*pte));
903 entry = ptep_clear_flush(vma, address, pte); 903 entry = ptep_clear_flush(vma, pvmw.address, pte);
904 entry = pte_wrprotect(entry); 904 entry = pte_wrprotect(entry);
905 entry = pte_mkclean(entry); 905 entry = pte_mkclean(entry);
906 set_pte_at(vma->vm_mm, address, pte, entry); 906 set_pte_at(vma->vm_mm, pvmw.address, pte, entry);
907 ret = 1; 907 ret = 1;
908 } else { 908 } else {
909#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE 909#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
@@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
913 if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) 913 if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
914 continue; 914 continue;
915 915
916 flush_cache_page(vma, address, page_to_pfn(page)); 916 flush_cache_page(vma, pvmw.address, page_to_pfn(page));
917 entry = pmdp_huge_clear_flush(vma, address, pmd); 917 entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd);
918 entry = pmd_wrprotect(entry); 918 entry = pmd_wrprotect(entry);
919 entry = pmd_mkclean(entry); 919 entry = pmd_mkclean(entry);
920 set_pmd_at(vma->vm_mm, address, pmd, entry); 920 set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry);
921 ret = 1; 921 ret = 1;
922#else 922#else
923 /* unexpected pmd-mapped page? */ 923 /* unexpected pmd-mapped page? */
@@ -926,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
926 } 926 }
927 927
928 if (ret) { 928 if (ret) {
929 mmu_notifier_invalidate_page(vma->vm_mm, address);
930 (*cleaned)++; 929 (*cleaned)++;
930 invalidation_needed = true;
931 } 931 }
932 } 932 }
933 933
934 if (invalidation_needed) {
935 mmu_notifier_invalidate_range(vma->vm_mm, address,
936 address + (1UL << compound_order(page)));
937 }
938
934 return true; 939 return true;
935} 940}
936 941
@@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1323 }; 1328 };
1324 pte_t pteval; 1329 pte_t pteval;
1325 struct page *subpage; 1330 struct page *subpage;
1326 bool ret = true; 1331 bool ret = true, invalidation_needed = false;
1327 enum ttu_flags flags = (enum ttu_flags)arg; 1332 enum ttu_flags flags = (enum ttu_flags)arg;
1328 1333
1329 /* munlock has nothing to gain from examining un-locked vmas */ 1334 /* munlock has nothing to gain from examining un-locked vmas */
@@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1363 VM_BUG_ON_PAGE(!pvmw.pte, page); 1368 VM_BUG_ON_PAGE(!pvmw.pte, page);
1364 1369
1365 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); 1370 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
1366 address = pvmw.address;
1367
1368 1371
1369 if (!(flags & TTU_IGNORE_ACCESS)) { 1372 if (!(flags & TTU_IGNORE_ACCESS)) {
1370 if (ptep_clear_flush_young_notify(vma, address, 1373 if (ptep_clear_flush_young_notify(vma, pvmw.address,
1371 pvmw.pte)) { 1374 pvmw.pte)) {
1372 ret = false; 1375 ret = false;
1373 page_vma_mapped_walk_done(&pvmw); 1376 page_vma_mapped_walk_done(&pvmw);
@@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1376 } 1379 }
1377 1380
1378 /* Nuke the page table entry. */ 1381 /* Nuke the page table entry. */
1379 flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); 1382 flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte));
1380 if (should_defer_flush(mm, flags)) { 1383 if (should_defer_flush(mm, flags)) {
1381 /* 1384 /*
1382 * We clear the PTE but do not flush so potentially 1385 * We clear the PTE but do not flush so potentially
@@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1386 * transition on a cached TLB entry is written through 1389 * transition on a cached TLB entry is written through
1387 * and traps if the PTE is unmapped. 1390 * and traps if the PTE is unmapped.
1388 */ 1391 */
1389 pteval = ptep_get_and_clear(mm, address, pvmw.pte); 1392 pteval = ptep_get_and_clear(mm, pvmw.address,
1393 pvmw.pte);
1390 1394
1391 set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); 1395 set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
1392 } else { 1396 } else {
1393 pteval = ptep_clear_flush(vma, address, pvmw.pte); 1397 pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
1394 } 1398 }
1395 1399
1396 /* Move the dirty bit to the page. Now the pte is gone. */ 1400 /* Move the dirty bit to the page. Now the pte is gone. */
@@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1405 if (PageHuge(page)) { 1409 if (PageHuge(page)) {
1406 int nr = 1 << compound_order(page); 1410 int nr = 1 << compound_order(page);
1407 hugetlb_count_sub(nr, mm); 1411 hugetlb_count_sub(nr, mm);
1408 set_huge_swap_pte_at(mm, address, 1412 set_huge_swap_pte_at(mm, pvmw.address,
1409 pvmw.pte, pteval, 1413 pvmw.pte, pteval,
1410 vma_mmu_pagesize(vma)); 1414 vma_mmu_pagesize(vma));
1411 } else { 1415 } else {
1412 dec_mm_counter(mm, mm_counter(page)); 1416 dec_mm_counter(mm, mm_counter(page));
1413 set_pte_at(mm, address, pvmw.pte, pteval); 1417 set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
1414 } 1418 }
1415 1419
1416 } else if (pte_unused(pteval)) { 1420 } else if (pte_unused(pteval)) {
@@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1434 swp_pte = swp_entry_to_pte(entry); 1438 swp_pte = swp_entry_to_pte(entry);
1435 if (pte_soft_dirty(pteval)) 1439 if (pte_soft_dirty(pteval))
1436 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1440 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1437 set_pte_at(mm, address, pvmw.pte, swp_pte); 1441 set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
1438 } else if (PageAnon(page)) { 1442 } else if (PageAnon(page)) {
1439 swp_entry_t entry = { .val = page_private(subpage) }; 1443 swp_entry_t entry = { .val = page_private(subpage) };
1440 pte_t swp_pte; 1444 pte_t swp_pte;
@@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1460 * If the page was redirtied, it cannot be 1464 * If the page was redirtied, it cannot be
1461 * discarded. Remap the page to page table. 1465 * discarded. Remap the page to page table.
1462 */ 1466 */
1463 set_pte_at(mm, address, pvmw.pte, pteval); 1467 set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
1464 SetPageSwapBacked(page); 1468 SetPageSwapBacked(page);
1465 ret = false; 1469 ret = false;
1466 page_vma_mapped_walk_done(&pvmw); 1470 page_vma_mapped_walk_done(&pvmw);
@@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1468 } 1472 }
1469 1473
1470 if (swap_duplicate(entry) < 0) { 1474 if (swap_duplicate(entry) < 0) {
1471 set_pte_at(mm, address, pvmw.pte, pteval); 1475 set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
1472 ret = false; 1476 ret = false;
1473 page_vma_mapped_walk_done(&pvmw); 1477 page_vma_mapped_walk_done(&pvmw);
1474 break; 1478 break;
@@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1484 swp_pte = swp_entry_to_pte(entry); 1488 swp_pte = swp_entry_to_pte(entry);
1485 if (pte_soft_dirty(pteval)) 1489 if (pte_soft_dirty(pteval))
1486 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1490 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1487 set_pte_at(mm, address, pvmw.pte, swp_pte); 1491 set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
1488 } else 1492 } else
1489 dec_mm_counter(mm, mm_counter_file(page)); 1493 dec_mm_counter(mm, mm_counter_file(page));
1490discard: 1494discard:
1491 page_remove_rmap(subpage, PageHuge(page)); 1495 page_remove_rmap(subpage, PageHuge(page));
1492 put_page(page); 1496 put_page(page);
1493 mmu_notifier_invalidate_page(mm, address); 1497 invalidation_needed = true;
1494 } 1498 }
1499
1500 if (invalidation_needed)
1501 mmu_notifier_invalidate_range(mm, address,
1502 address + (1UL << compound_order(page)));
1495 return ret; 1503 return ret;
1496} 1504}
1497 1505