diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 50 |
1 files changed, 47 insertions, 3 deletions
diff --git a/mm/memory.c b/mm/memory.c index 9aefaae46858..d14b251a25a6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1045,6 +1045,26 @@ no_page_table: | |||
| 1045 | return page; | 1045 | return page; |
| 1046 | } | 1046 | } |
| 1047 | 1047 | ||
| 1048 | /* Can we do the FOLL_ANON optimization? */ | ||
| 1049 | static inline int use_zero_page(struct vm_area_struct *vma) | ||
| 1050 | { | ||
| 1051 | /* | ||
| 1052 | * We don't want to optimize FOLL_ANON for make_pages_present() | ||
| 1053 | * when it tries to page in a VM_LOCKED region. As to VM_SHARED, | ||
| 1054 | * we want to get the page from the page tables to make sure | ||
| 1055 | * that we serialize and update with any other user of that | ||
| 1056 | * mapping. | ||
| 1057 | */ | ||
| 1058 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) | ||
| 1059 | return 0; | ||
| 1060 | /* | ||
| 1061 | * And if we have a fault or a nopfn routine, it's not an | ||
| 1062 | * anonymous region. | ||
| 1063 | */ | ||
| 1064 | return !vma->vm_ops || | ||
| 1065 | (!vma->vm_ops->fault && !vma->vm_ops->nopfn); | ||
| 1066 | } | ||
| 1067 | |||
| 1048 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1068 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
| 1049 | unsigned long start, int len, int write, int force, | 1069 | unsigned long start, int len, int write, int force, |
| 1050 | struct page **pages, struct vm_area_struct **vmas) | 1070 | struct page **pages, struct vm_area_struct **vmas) |
| @@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 1119 | foll_flags = FOLL_TOUCH; | 1139 | foll_flags = FOLL_TOUCH; |
| 1120 | if (pages) | 1140 | if (pages) |
| 1121 | foll_flags |= FOLL_GET; | 1141 | foll_flags |= FOLL_GET; |
| 1122 | if (!write && !(vma->vm_flags & VM_LOCKED) && | 1142 | if (!write && use_zero_page(vma)) |
| 1123 | (!vma->vm_ops || !vma->vm_ops->fault)) | ||
| 1124 | foll_flags |= FOLL_ANON; | 1143 | foll_flags |= FOLL_ANON; |
| 1125 | 1144 | ||
| 1126 | do { | 1145 | do { |
| @@ -1766,7 +1785,6 @@ gotten: | |||
| 1766 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1785 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
| 1767 | if (likely(pte_same(*page_table, orig_pte))) { | 1786 | if (likely(pte_same(*page_table, orig_pte))) { |
| 1768 | if (old_page) { | 1787 | if (old_page) { |
| 1769 | page_remove_rmap(old_page, vma); | ||
| 1770 | if (!PageAnon(old_page)) { | 1788 | if (!PageAnon(old_page)) { |
| 1771 | dec_mm_counter(mm, file_rss); | 1789 | dec_mm_counter(mm, file_rss); |
| 1772 | inc_mm_counter(mm, anon_rss); | 1790 | inc_mm_counter(mm, anon_rss); |
| @@ -1788,6 +1806,32 @@ gotten: | |||
| 1788 | lru_cache_add_active(new_page); | 1806 | lru_cache_add_active(new_page); |
| 1789 | page_add_new_anon_rmap(new_page, vma, address); | 1807 | page_add_new_anon_rmap(new_page, vma, address); |
| 1790 | 1808 | ||
| 1809 | if (old_page) { | ||
| 1810 | /* | ||
| 1811 | * Only after switching the pte to the new page may | ||
| 1812 | * we remove the mapcount here. Otherwise another | ||
| 1813 | * process may come and find the rmap count decremented | ||
| 1814 | * before the pte is switched to the new page, and | ||
| 1815 | * "reuse" the old page writing into it while our pte | ||
| 1816 | * here still points into it and can be read by other | ||
| 1817 | * threads. | ||
| 1818 | * | ||
| 1819 | * The critical issue is to order this | ||
| 1820 | * page_remove_rmap with the ptp_clear_flush above. | ||
| 1821 | * Those stores are ordered by (if nothing else,) | ||
| 1822 | * the barrier present in the atomic_add_negative | ||
| 1823 | * in page_remove_rmap. | ||
| 1824 | * | ||
| 1825 | * Then the TLB flush in ptep_clear_flush ensures that | ||
| 1826 | * no process can access the old page before the | ||
| 1827 | * decremented mapcount is visible. And the old page | ||
| 1828 | * cannot be reused until after the decremented | ||
| 1829 | * mapcount is visible. So transitively, TLBs to | ||
| 1830 | * old page will be flushed before it can be reused. | ||
| 1831 | */ | ||
| 1832 | page_remove_rmap(old_page, vma); | ||
| 1833 | } | ||
| 1834 | |||
| 1791 | /* Free the old page.. */ | 1835 | /* Free the old page.. */ |
| 1792 | new_page = old_page; | 1836 | new_page = old_page; |
| 1793 | ret |= VM_FAULT_WRITE; | 1837 | ret |= VM_FAULT_WRITE; |
