diff options
-rw-r--r-- | include/linux/sched.h | 7 | ||||
-rw-r--r-- | mm/internal.h | 4 | ||||
-rw-r--r-- | mm/rmap.c | 28 | ||||
-rw-r--r-- | mm/vmscan.c | 7 |
4 files changed, 38 insertions, 8 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 3c602c20c717..a4ab9daa387c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1354,6 +1354,13 @@ struct tlbflush_unmap_batch { | |||
1354 | 1354 | ||
1355 | /* True if any bit in cpumask is set */ | 1355 | /* True if any bit in cpumask is set */ |
1356 | bool flush_required; | 1356 | bool flush_required; |
1357 | |||
1358 | /* | ||
1359 | * If true then the PTE was dirty when unmapped. The entry must be | ||
1360 | * flushed before IO is initiated or a stale TLB entry potentially | ||
1361 | * allows an update without redirtying the page. | ||
1362 | */ | ||
1363 | bool writable; | ||
1357 | }; | 1364 | }; |
1358 | 1365 | ||
1359 | struct task_struct { | 1366 | struct task_struct { |
diff --git a/mm/internal.h b/mm/internal.h index bd6372ac5f7f..1195dd2d6a2b 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -431,10 +431,14 @@ struct tlbflush_unmap_batch; | |||
431 | 431 | ||
432 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH | 432 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH |
433 | void try_to_unmap_flush(void); | 433 | void try_to_unmap_flush(void); |
434 | void try_to_unmap_flush_dirty(void); | ||
434 | #else | 435 | #else |
435 | static inline void try_to_unmap_flush(void) | 436 | static inline void try_to_unmap_flush(void) |
436 | { | 437 | { |
437 | } | 438 | } |
439 | static inline void try_to_unmap_flush_dirty(void) | ||
440 | { | ||
441 | } | ||
438 | 442 | ||
439 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ | 443 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ |
440 | #endif /* __MM_INTERNAL_H */ | 444 | #endif /* __MM_INTERNAL_H */ |
@@ -626,16 +626,34 @@ void try_to_unmap_flush(void) | |||
626 | } | 626 | } |
627 | cpumask_clear(&tlb_ubc->cpumask); | 627 | cpumask_clear(&tlb_ubc->cpumask); |
628 | tlb_ubc->flush_required = false; | 628 | tlb_ubc->flush_required = false; |
629 | tlb_ubc->writable = false; | ||
629 | put_cpu(); | 630 | put_cpu(); |
630 | } | 631 | } |
631 | 632 | ||
633 | /* Flush iff there are potentially writable TLB entries that can race with IO */ | ||
634 | void try_to_unmap_flush_dirty(void) | ||
635 | { | ||
636 | struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; | ||
637 | |||
638 | if (tlb_ubc->writable) | ||
639 | try_to_unmap_flush(); | ||
640 | } | ||
641 | |||
632 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, | 642 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, |
633 | struct page *page) | 643 | struct page *page, bool writable) |
634 | { | 644 | { |
635 | struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; | 645 | struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; |
636 | 646 | ||
637 | cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm)); | 647 | cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm)); |
638 | tlb_ubc->flush_required = true; | 648 | tlb_ubc->flush_required = true; |
649 | |||
650 | /* | ||
651 | * If the PTE was dirty then it's best to assume it's writable. The | ||
652 | * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() | ||
653 | * before the page is queued for IO. | ||
654 | */ | ||
655 | if (writable) | ||
656 | tlb_ubc->writable = true; | ||
639 | } | 657 | } |
640 | 658 | ||
641 | /* | 659 | /* |
@@ -658,7 +676,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) | |||
658 | } | 676 | } |
659 | #else | 677 | #else |
660 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, | 678 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, |
661 | struct page *page) | 679 | struct page *page, bool writable) |
662 | { | 680 | { |
663 | } | 681 | } |
664 | 682 | ||
@@ -1315,11 +1333,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1315 | */ | 1333 | */ |
1316 | pteval = ptep_get_and_clear(mm, address, pte); | 1334 | pteval = ptep_get_and_clear(mm, address, pte); |
1317 | 1335 | ||
1318 | /* Potentially writable TLBs must be flushed before IO */ | 1336 | set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval)); |
1319 | if (pte_dirty(pteval)) | ||
1320 | flush_tlb_page(vma, address); | ||
1321 | else | ||
1322 | set_tlb_ubc_flush_pending(mm, page); | ||
1323 | } else { | 1337 | } else { |
1324 | pteval = ptep_clear_flush(vma, address, pte); | 1338 | pteval = ptep_clear_flush(vma, address, pte); |
1325 | } | 1339 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 99ec00d6a5dd..b1139039122a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1098,7 +1098,12 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
1098 | if (!sc->may_writepage) | 1098 | if (!sc->may_writepage) |
1099 | goto keep_locked; | 1099 | goto keep_locked; |
1100 | 1100 | ||
1101 | /* Page is dirty, try to write it out here */ | 1101 | /* |
1102 | * Page is dirty. Flush the TLB if a writable entry | ||
1103 | * potentially exists to avoid CPU writes after IO | ||
1104 | * starts and then write it out here. | ||
1105 | */ | ||
1106 | try_to_unmap_flush_dirty(); | ||
1102 | switch (pageout(page, mapping, sc)) { | 1107 | switch (pageout(page, mapping, sc)) { |
1103 | case PAGE_KEEP: | 1108 | case PAGE_KEEP: |
1104 | goto keep_locked; | 1109 | goto keep_locked; |