aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h7
-rw-r--r--mm/internal.h4
-rw-r--r--mm/rmap.c28
-rw-r--r--mm/vmscan.c7
4 files changed, 38 insertions, 8 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3c602c20c717..a4ab9daa387c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1354,6 +1354,13 @@ struct tlbflush_unmap_batch {
1354 1354
1355 /* True if any bit in cpumask is set */ 1355 /* True if any bit in cpumask is set */
1356 bool flush_required; 1356 bool flush_required;
1357
1358 /*
1359 * If true then the PTE was dirty when unmapped. The entry must be
1360 * flushed before IO is initiated or a stale TLB entry potentially
1361 * allows an update without redirtying the page.
1362 */
1363 bool writable;
1357}; 1364};
1358 1365
1359struct task_struct { 1366struct task_struct {
diff --git a/mm/internal.h b/mm/internal.h
index bd6372ac5f7f..1195dd2d6a2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -431,10 +431,14 @@ struct tlbflush_unmap_batch;
431 431
432#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH 432#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
433void try_to_unmap_flush(void); 433void try_to_unmap_flush(void);
434void try_to_unmap_flush_dirty(void);
434#else 435#else
435static inline void try_to_unmap_flush(void) 436static inline void try_to_unmap_flush(void)
436{ 437{
437} 438}
439static inline void try_to_unmap_flush_dirty(void)
440{
441}
438 442
439#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ 443#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
440#endif /* __MM_INTERNAL_H */ 444#endif /* __MM_INTERNAL_H */
diff --git a/mm/rmap.c b/mm/rmap.c
index 326d5d89e45c..0db38e7d0a72 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -626,16 +626,34 @@ void try_to_unmap_flush(void)
626 } 626 }
627 cpumask_clear(&tlb_ubc->cpumask); 627 cpumask_clear(&tlb_ubc->cpumask);
628 tlb_ubc->flush_required = false; 628 tlb_ubc->flush_required = false;
629 tlb_ubc->writable = false;
629 put_cpu(); 630 put_cpu();
630} 631}
631 632
633/* Flush iff there are potentially writable TLB entries that can race with IO */
634void try_to_unmap_flush_dirty(void)
635{
636 struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
637
638 if (tlb_ubc->writable)
639 try_to_unmap_flush();
640}
641
632static void set_tlb_ubc_flush_pending(struct mm_struct *mm, 642static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
633 struct page *page) 643 struct page *page, bool writable)
634{ 644{
635 struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc; 645 struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
636 646
637 cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm)); 647 cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
638 tlb_ubc->flush_required = true; 648 tlb_ubc->flush_required = true;
649
650 /*
651 * If the PTE was dirty then it's best to assume it's writable. The
652 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
653 * before the page is queued for IO.
654 */
655 if (writable)
656 tlb_ubc->writable = true;
639} 657}
640 658
641/* 659/*
@@ -658,7 +676,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
658} 676}
659#else 677#else
660static void set_tlb_ubc_flush_pending(struct mm_struct *mm, 678static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
661 struct page *page) 679 struct page *page, bool writable)
662{ 680{
663} 681}
664 682
@@ -1315,11 +1333,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1315 */ 1333 */
1316 pteval = ptep_get_and_clear(mm, address, pte); 1334 pteval = ptep_get_and_clear(mm, address, pte);
1317 1335
1318 /* Potentially writable TLBs must be flushed before IO */ 1336 set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval));
1319 if (pte_dirty(pteval))
1320 flush_tlb_page(vma, address);
1321 else
1322 set_tlb_ubc_flush_pending(mm, page);
1323 } else { 1337 } else {
1324 pteval = ptep_clear_flush(vma, address, pte); 1338 pteval = ptep_clear_flush(vma, address, pte);
1325 } 1339 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 99ec00d6a5dd..b1139039122a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1098,7 +1098,12 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1098 if (!sc->may_writepage) 1098 if (!sc->may_writepage)
1099 goto keep_locked; 1099 goto keep_locked;
1100 1100
1101 /* Page is dirty, try to write it out here */ 1101 /*
1102 * Page is dirty. Flush the TLB if a writable entry
1103 * potentially exists to avoid CPU writes after IO
1104 * starts and then write it out here.
1105 */
1106 try_to_unmap_flush_dirty();
1102 switch (pageout(page, mapping, sc)) { 1107 switch (pageout(page, mapping, sc)) {
1103 case PAGE_KEEP: 1108 case PAGE_KEEP:
1104 goto keep_locked; 1109 goto keep_locked;