diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 369 |
1 files changed, 258 insertions, 111 deletions
@@ -23,21 +23,22 @@ | |||
23 | * inode->i_mutex (while writing or truncating, not reading or faulting) | 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) |
24 | * mm->mmap_sem | 24 | * mm->mmap_sem |
25 | * page->flags PG_locked (lock_page) | 25 | * page->flags PG_locked (lock_page) |
26 | * mapping->i_mmap_rwsem | 26 | * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share) |
27 | * anon_vma->rwsem | 27 | * mapping->i_mmap_rwsem |
28 | * mm->page_table_lock or pte_lock | 28 | * anon_vma->rwsem |
29 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) | 29 | * mm->page_table_lock or pte_lock |
30 | * swap_lock (in swap_duplicate, swap_info_get) | 30 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) |
31 | * mmlist_lock (in mmput, drain_mmlist and others) | 31 | * swap_lock (in swap_duplicate, swap_info_get) |
32 | * mapping->private_lock (in __set_page_dirty_buffers) | 32 | * mmlist_lock (in mmput, drain_mmlist and others) |
33 | * mem_cgroup_{begin,end}_page_stat (memcg->move_lock) | 33 | * mapping->private_lock (in __set_page_dirty_buffers) |
34 | * mapping->tree_lock (widely used) | 34 | * mem_cgroup_{begin,end}_page_stat (memcg->move_lock) |
35 | * inode->i_lock (in set_page_dirty's __mark_inode_dirty) | 35 | * mapping->tree_lock (widely used) |
36 | * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) | 36 | * inode->i_lock (in set_page_dirty's __mark_inode_dirty) |
37 | * sb_lock (within inode_lock in fs/fs-writeback.c) | 37 | * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) |
38 | * mapping->tree_lock (widely used, in set_page_dirty, | 38 | * sb_lock (within inode_lock in fs/fs-writeback.c) |
39 | * in arch-dependent flush_dcache_mmap_lock, | 39 | * mapping->tree_lock (widely used, in set_page_dirty, |
40 | * within bdi.wb->list_lock in __sync_single_inode) | 40 | * in arch-dependent flush_dcache_mmap_lock, |
41 | * within bdi.wb->list_lock in __sync_single_inode) | ||
41 | * | 42 | * |
42 | * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) | 43 | * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) |
43 | * ->tasklist_lock | 44 | * ->tasklist_lock |
@@ -567,27 +568,6 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) | |||
567 | anon_vma_unlock_read(anon_vma); | 568 | anon_vma_unlock_read(anon_vma); |
568 | } | 569 | } |
569 | 570 | ||
570 | /* | ||
571 | * At what user virtual address is page expected in @vma? | ||
572 | */ | ||
573 | static inline unsigned long | ||
574 | __vma_address(struct page *page, struct vm_area_struct *vma) | ||
575 | { | ||
576 | pgoff_t pgoff = page_to_pgoff(page); | ||
577 | return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | ||
578 | } | ||
579 | |||
580 | inline unsigned long | ||
581 | vma_address(struct page *page, struct vm_area_struct *vma) | ||
582 | { | ||
583 | unsigned long address = __vma_address(page, vma); | ||
584 | |||
585 | /* page should be within @vma mapping range */ | ||
586 | VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); | ||
587 | |||
588 | return address; | ||
589 | } | ||
590 | |||
591 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH | 571 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH |
592 | static void percpu_flush_tlb_batch_pages(void *data) | 572 | static void percpu_flush_tlb_batch_pages(void *data) |
593 | { | 573 | { |
@@ -819,6 +799,96 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | |||
819 | return 1; | 799 | return 1; |
820 | } | 800 | } |
821 | 801 | ||
802 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
803 | /* | ||
804 | * Check that @page is mapped at @address into @mm. In contrast to | ||
805 | * page_check_address(), this function can handle transparent huge pages. | ||
806 | * | ||
807 | * On success returns true with pte mapped and locked. For PMD-mapped | ||
808 | * transparent huge pages *@ptep is set to NULL. | ||
809 | */ | ||
810 | bool page_check_address_transhuge(struct page *page, struct mm_struct *mm, | ||
811 | unsigned long address, pmd_t **pmdp, | ||
812 | pte_t **ptep, spinlock_t **ptlp) | ||
813 | { | ||
814 | pgd_t *pgd; | ||
815 | pud_t *pud; | ||
816 | pmd_t *pmd; | ||
817 | pte_t *pte; | ||
818 | spinlock_t *ptl; | ||
819 | |||
820 | if (unlikely(PageHuge(page))) { | ||
821 | /* when pud is not present, pte will be NULL */ | ||
822 | pte = huge_pte_offset(mm, address); | ||
823 | if (!pte) | ||
824 | return false; | ||
825 | |||
826 | ptl = huge_pte_lockptr(page_hstate(page), mm, pte); | ||
827 | pmd = NULL; | ||
828 | goto check_pte; | ||
829 | } | ||
830 | |||
831 | pgd = pgd_offset(mm, address); | ||
832 | if (!pgd_present(*pgd)) | ||
833 | return false; | ||
834 | pud = pud_offset(pgd, address); | ||
835 | if (!pud_present(*pud)) | ||
836 | return false; | ||
837 | pmd = pmd_offset(pud, address); | ||
838 | |||
839 | if (pmd_trans_huge(*pmd)) { | ||
840 | ptl = pmd_lock(mm, pmd); | ||
841 | if (!pmd_present(*pmd)) | ||
842 | goto unlock_pmd; | ||
843 | if (unlikely(!pmd_trans_huge(*pmd))) { | ||
844 | spin_unlock(ptl); | ||
845 | goto map_pte; | ||
846 | } | ||
847 | |||
848 | if (pmd_page(*pmd) != page) | ||
849 | goto unlock_pmd; | ||
850 | |||
851 | pte = NULL; | ||
852 | goto found; | ||
853 | unlock_pmd: | ||
854 | spin_unlock(ptl); | ||
855 | return false; | ||
856 | } else { | ||
857 | pmd_t pmde = *pmd; | ||
858 | |||
859 | barrier(); | ||
860 | if (!pmd_present(pmde) || pmd_trans_huge(pmde)) | ||
861 | return false; | ||
862 | } | ||
863 | map_pte: | ||
864 | pte = pte_offset_map(pmd, address); | ||
865 | if (!pte_present(*pte)) { | ||
866 | pte_unmap(pte); | ||
867 | return false; | ||
868 | } | ||
869 | |||
870 | ptl = pte_lockptr(mm, pmd); | ||
871 | check_pte: | ||
872 | spin_lock(ptl); | ||
873 | |||
874 | if (!pte_present(*pte)) { | ||
875 | pte_unmap_unlock(pte, ptl); | ||
876 | return false; | ||
877 | } | ||
878 | |||
879 | /* THP can be referenced by any subpage */ | ||
880 | if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) { | ||
881 | pte_unmap_unlock(pte, ptl); | ||
882 | return false; | ||
883 | } | ||
884 | found: | ||
885 | *ptep = pte; | ||
886 | *pmdp = pmd; | ||
887 | *ptlp = ptl; | ||
888 | return true; | ||
889 | } | ||
890 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
891 | |||
822 | struct page_referenced_arg { | 892 | struct page_referenced_arg { |
823 | int mapcount; | 893 | int mapcount; |
824 | int referenced; | 894 | int referenced; |
@@ -832,49 +902,24 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
832 | unsigned long address, void *arg) | 902 | unsigned long address, void *arg) |
833 | { | 903 | { |
834 | struct mm_struct *mm = vma->vm_mm; | 904 | struct mm_struct *mm = vma->vm_mm; |
905 | struct page_referenced_arg *pra = arg; | ||
906 | pmd_t *pmd; | ||
907 | pte_t *pte; | ||
835 | spinlock_t *ptl; | 908 | spinlock_t *ptl; |
836 | int referenced = 0; | 909 | int referenced = 0; |
837 | struct page_referenced_arg *pra = arg; | ||
838 | |||
839 | if (unlikely(PageTransHuge(page))) { | ||
840 | pmd_t *pmd; | ||
841 | 910 | ||
842 | /* | 911 | if (!page_check_address_transhuge(page, mm, address, &pmd, &pte, &ptl)) |
843 | * rmap might return false positives; we must filter | 912 | return SWAP_AGAIN; |
844 | * these out using page_check_address_pmd(). | ||
845 | */ | ||
846 | pmd = page_check_address_pmd(page, mm, address, | ||
847 | PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); | ||
848 | if (!pmd) | ||
849 | return SWAP_AGAIN; | ||
850 | |||
851 | if (vma->vm_flags & VM_LOCKED) { | ||
852 | spin_unlock(ptl); | ||
853 | pra->vm_flags |= VM_LOCKED; | ||
854 | return SWAP_FAIL; /* To break the loop */ | ||
855 | } | ||
856 | 913 | ||
857 | /* go ahead even if the pmd is pmd_trans_splitting() */ | 914 | if (vma->vm_flags & VM_LOCKED) { |
858 | if (pmdp_clear_flush_young_notify(vma, address, pmd)) | 915 | if (pte) |
859 | referenced++; | 916 | pte_unmap(pte); |
860 | spin_unlock(ptl); | 917 | spin_unlock(ptl); |
861 | } else { | 918 | pra->vm_flags |= VM_LOCKED; |
862 | pte_t *pte; | 919 | return SWAP_FAIL; /* To break the loop */ |
863 | 920 | } | |
864 | /* | ||
865 | * rmap might return false positives; we must filter | ||
866 | * these out using page_check_address(). | ||
867 | */ | ||
868 | pte = page_check_address(page, mm, address, &ptl, 0); | ||
869 | if (!pte) | ||
870 | return SWAP_AGAIN; | ||
871 | |||
872 | if (vma->vm_flags & VM_LOCKED) { | ||
873 | pte_unmap_unlock(pte, ptl); | ||
874 | pra->vm_flags |= VM_LOCKED; | ||
875 | return SWAP_FAIL; /* To break the loop */ | ||
876 | } | ||
877 | 921 | ||
922 | if (pte) { | ||
878 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 923 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
879 | /* | 924 | /* |
880 | * Don't treat a reference through a sequentially read | 925 | * Don't treat a reference through a sequentially read |
@@ -886,8 +931,15 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
886 | if (likely(!(vma->vm_flags & VM_SEQ_READ))) | 931 | if (likely(!(vma->vm_flags & VM_SEQ_READ))) |
887 | referenced++; | 932 | referenced++; |
888 | } | 933 | } |
889 | pte_unmap_unlock(pte, ptl); | 934 | pte_unmap(pte); |
935 | } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { | ||
936 | if (pmdp_clear_flush_young_notify(vma, address, pmd)) | ||
937 | referenced++; | ||
938 | } else { | ||
939 | /* unexpected pmd-mapped page? */ | ||
940 | WARN_ON_ONCE(1); | ||
890 | } | 941 | } |
942 | spin_unlock(ptl); | ||
891 | 943 | ||
892 | if (referenced) | 944 | if (referenced) |
893 | clear_page_idle(page); | 945 | clear_page_idle(page); |
@@ -935,7 +987,7 @@ int page_referenced(struct page *page, | |||
935 | int ret; | 987 | int ret; |
936 | int we_locked = 0; | 988 | int we_locked = 0; |
937 | struct page_referenced_arg pra = { | 989 | struct page_referenced_arg pra = { |
938 | .mapcount = page_mapcount(page), | 990 | .mapcount = total_mapcount(page), |
939 | .memcg = memcg, | 991 | .memcg = memcg, |
940 | }; | 992 | }; |
941 | struct rmap_walk_control rwc = { | 993 | struct rmap_walk_control rwc = { |
@@ -1124,7 +1176,7 @@ static void __page_check_anon_rmap(struct page *page, | |||
1124 | * over the call to page_add_new_anon_rmap. | 1176 | * over the call to page_add_new_anon_rmap. |
1125 | */ | 1177 | */ |
1126 | BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); | 1178 | BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); |
1127 | BUG_ON(page->index != linear_page_index(vma, address)); | 1179 | BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address)); |
1128 | #endif | 1180 | #endif |
1129 | } | 1181 | } |
1130 | 1182 | ||
@@ -1133,6 +1185,7 @@ static void __page_check_anon_rmap(struct page *page, | |||
1133 | * @page: the page to add the mapping to | 1185 | * @page: the page to add the mapping to |
1134 | * @vma: the vm area in which the mapping is added | 1186 | * @vma: the vm area in which the mapping is added |
1135 | * @address: the user virtual address mapped | 1187 | * @address: the user virtual address mapped |
1188 | * @compound: charge the page as compound or small page | ||
1136 | * | 1189 | * |
1137 | * The caller needs to hold the pte lock, and the page must be locked in | 1190 | * The caller needs to hold the pte lock, and the page must be locked in |
1138 | * the anon_vma case: to serialize mapping,index checking after setting, | 1191 | * the anon_vma case: to serialize mapping,index checking after setting, |
@@ -1140,9 +1193,9 @@ static void __page_check_anon_rmap(struct page *page, | |||
1140 | * (but PageKsm is never downgraded to PageAnon). | 1193 | * (but PageKsm is never downgraded to PageAnon). |
1141 | */ | 1194 | */ |
1142 | void page_add_anon_rmap(struct page *page, | 1195 | void page_add_anon_rmap(struct page *page, |
1143 | struct vm_area_struct *vma, unsigned long address) | 1196 | struct vm_area_struct *vma, unsigned long address, bool compound) |
1144 | { | 1197 | { |
1145 | do_page_add_anon_rmap(page, vma, address, 0); | 1198 | do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0); |
1146 | } | 1199 | } |
1147 | 1200 | ||
1148 | /* | 1201 | /* |
@@ -1151,29 +1204,44 @@ void page_add_anon_rmap(struct page *page, | |||
1151 | * Everybody else should continue to use page_add_anon_rmap above. | 1204 | * Everybody else should continue to use page_add_anon_rmap above. |
1152 | */ | 1205 | */ |
1153 | void do_page_add_anon_rmap(struct page *page, | 1206 | void do_page_add_anon_rmap(struct page *page, |
1154 | struct vm_area_struct *vma, unsigned long address, int exclusive) | 1207 | struct vm_area_struct *vma, unsigned long address, int flags) |
1155 | { | 1208 | { |
1156 | int first = atomic_inc_and_test(&page->_mapcount); | 1209 | bool compound = flags & RMAP_COMPOUND; |
1210 | bool first; | ||
1211 | |||
1212 | if (compound) { | ||
1213 | atomic_t *mapcount; | ||
1214 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
1215 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
1216 | mapcount = compound_mapcount_ptr(page); | ||
1217 | first = atomic_inc_and_test(mapcount); | ||
1218 | } else { | ||
1219 | first = atomic_inc_and_test(&page->_mapcount); | ||
1220 | } | ||
1221 | |||
1157 | if (first) { | 1222 | if (first) { |
1223 | int nr = compound ? hpage_nr_pages(page) : 1; | ||
1158 | /* | 1224 | /* |
1159 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because | 1225 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
1160 | * these counters are not modified in interrupt context, and | 1226 | * these counters are not modified in interrupt context, and |
1161 | * pte lock(a spinlock) is held, which implies preemption | 1227 | * pte lock(a spinlock) is held, which implies preemption |
1162 | * disabled. | 1228 | * disabled. |
1163 | */ | 1229 | */ |
1164 | if (PageTransHuge(page)) | 1230 | if (compound) { |
1165 | __inc_zone_page_state(page, | 1231 | __inc_zone_page_state(page, |
1166 | NR_ANON_TRANSPARENT_HUGEPAGES); | 1232 | NR_ANON_TRANSPARENT_HUGEPAGES); |
1167 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, | 1233 | } |
1168 | hpage_nr_pages(page)); | 1234 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr); |
1169 | } | 1235 | } |
1170 | if (unlikely(PageKsm(page))) | 1236 | if (unlikely(PageKsm(page))) |
1171 | return; | 1237 | return; |
1172 | 1238 | ||
1173 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 1239 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
1240 | |||
1174 | /* address might be in next vma when migration races vma_adjust */ | 1241 | /* address might be in next vma when migration races vma_adjust */ |
1175 | if (first) | 1242 | if (first) |
1176 | __page_set_anon_rmap(page, vma, address, exclusive); | 1243 | __page_set_anon_rmap(page, vma, address, |
1244 | flags & RMAP_EXCLUSIVE); | ||
1177 | else | 1245 | else |
1178 | __page_check_anon_rmap(page, vma, address); | 1246 | __page_check_anon_rmap(page, vma, address); |
1179 | } | 1247 | } |
@@ -1183,21 +1251,31 @@ void do_page_add_anon_rmap(struct page *page, | |||
1183 | * @page: the page to add the mapping to | 1251 | * @page: the page to add the mapping to |
1184 | * @vma: the vm area in which the mapping is added | 1252 | * @vma: the vm area in which the mapping is added |
1185 | * @address: the user virtual address mapped | 1253 | * @address: the user virtual address mapped |
1254 | * @compound: charge the page as compound or small page | ||
1186 | * | 1255 | * |
1187 | * Same as page_add_anon_rmap but must only be called on *new* pages. | 1256 | * Same as page_add_anon_rmap but must only be called on *new* pages. |
1188 | * This means the inc-and-test can be bypassed. | 1257 | * This means the inc-and-test can be bypassed. |
1189 | * Page does not have to be locked. | 1258 | * Page does not have to be locked. |
1190 | */ | 1259 | */ |
1191 | void page_add_new_anon_rmap(struct page *page, | 1260 | void page_add_new_anon_rmap(struct page *page, |
1192 | struct vm_area_struct *vma, unsigned long address) | 1261 | struct vm_area_struct *vma, unsigned long address, bool compound) |
1193 | { | 1262 | { |
1263 | int nr = compound ? hpage_nr_pages(page) : 1; | ||
1264 | |||
1194 | VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); | 1265 | VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); |
1195 | SetPageSwapBacked(page); | 1266 | SetPageSwapBacked(page); |
1196 | atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ | 1267 | if (compound) { |
1197 | if (PageTransHuge(page)) | 1268 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
1269 | /* increment count (starts at -1) */ | ||
1270 | atomic_set(compound_mapcount_ptr(page), 0); | ||
1198 | __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); | 1271 | __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
1199 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, | 1272 | } else { |
1200 | hpage_nr_pages(page)); | 1273 | /* Anon THP always mapped first with PMD */ |
1274 | VM_BUG_ON_PAGE(PageTransCompound(page), page); | ||
1275 | /* increment count (starts at -1) */ | ||
1276 | atomic_set(&page->_mapcount, 0); | ||
1277 | } | ||
1278 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr); | ||
1201 | __page_set_anon_rmap(page, vma, address, 1); | 1279 | __page_set_anon_rmap(page, vma, address, 1); |
1202 | } | 1280 | } |
1203 | 1281 | ||
@@ -1225,12 +1303,15 @@ static void page_remove_file_rmap(struct page *page) | |||
1225 | 1303 | ||
1226 | memcg = mem_cgroup_begin_page_stat(page); | 1304 | memcg = mem_cgroup_begin_page_stat(page); |
1227 | 1305 | ||
1228 | /* page still mapped by someone else? */ | 1306 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ |
1229 | if (!atomic_add_negative(-1, &page->_mapcount)) | 1307 | if (unlikely(PageHuge(page))) { |
1308 | /* hugetlb pages are always mapped with pmds */ | ||
1309 | atomic_dec(compound_mapcount_ptr(page)); | ||
1230 | goto out; | 1310 | goto out; |
1311 | } | ||
1231 | 1312 | ||
1232 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ | 1313 | /* page still mapped by someone else? */ |
1233 | if (unlikely(PageHuge(page))) | 1314 | if (!atomic_add_negative(-1, &page->_mapcount)) |
1234 | goto out; | 1315 | goto out; |
1235 | 1316 | ||
1236 | /* | 1317 | /* |
@@ -1247,41 +1328,79 @@ out: | |||
1247 | mem_cgroup_end_page_stat(memcg); | 1328 | mem_cgroup_end_page_stat(memcg); |
1248 | } | 1329 | } |
1249 | 1330 | ||
1331 | static void page_remove_anon_compound_rmap(struct page *page) | ||
1332 | { | ||
1333 | int i, nr; | ||
1334 | |||
1335 | if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) | ||
1336 | return; | ||
1337 | |||
1338 | /* Hugepages are not counted in NR_ANON_PAGES for now. */ | ||
1339 | if (unlikely(PageHuge(page))) | ||
1340 | return; | ||
1341 | |||
1342 | if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) | ||
1343 | return; | ||
1344 | |||
1345 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); | ||
1346 | |||
1347 | if (TestClearPageDoubleMap(page)) { | ||
1348 | /* | ||
1349 | * Subpages can be mapped with PTEs too. Check how many of | ||
1350 | * themi are still mapped. | ||
1351 | */ | ||
1352 | for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { | ||
1353 | if (atomic_add_negative(-1, &page[i]._mapcount)) | ||
1354 | nr++; | ||
1355 | } | ||
1356 | } else { | ||
1357 | nr = HPAGE_PMD_NR; | ||
1358 | } | ||
1359 | |||
1360 | if (unlikely(PageMlocked(page))) | ||
1361 | clear_page_mlock(page); | ||
1362 | |||
1363 | if (nr) { | ||
1364 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr); | ||
1365 | deferred_split_huge_page(page); | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1250 | /** | 1369 | /** |
1251 | * page_remove_rmap - take down pte mapping from a page | 1370 | * page_remove_rmap - take down pte mapping from a page |
1252 | * @page: page to remove mapping from | 1371 | * @page: page to remove mapping from |
1372 | * @compound: uncharge the page as compound or small page | ||
1253 | * | 1373 | * |
1254 | * The caller needs to hold the pte lock. | 1374 | * The caller needs to hold the pte lock. |
1255 | */ | 1375 | */ |
1256 | void page_remove_rmap(struct page *page) | 1376 | void page_remove_rmap(struct page *page, bool compound) |
1257 | { | 1377 | { |
1258 | if (!PageAnon(page)) { | 1378 | if (!PageAnon(page)) { |
1379 | VM_BUG_ON_PAGE(compound && !PageHuge(page), page); | ||
1259 | page_remove_file_rmap(page); | 1380 | page_remove_file_rmap(page); |
1260 | return; | 1381 | return; |
1261 | } | 1382 | } |
1262 | 1383 | ||
1384 | if (compound) | ||
1385 | return page_remove_anon_compound_rmap(page); | ||
1386 | |||
1263 | /* page still mapped by someone else? */ | 1387 | /* page still mapped by someone else? */ |
1264 | if (!atomic_add_negative(-1, &page->_mapcount)) | 1388 | if (!atomic_add_negative(-1, &page->_mapcount)) |
1265 | return; | 1389 | return; |
1266 | 1390 | ||
1267 | /* Hugepages are not counted in NR_ANON_PAGES for now. */ | ||
1268 | if (unlikely(PageHuge(page))) | ||
1269 | return; | ||
1270 | |||
1271 | /* | 1391 | /* |
1272 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because | 1392 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
1273 | * these counters are not modified in interrupt context, and | 1393 | * these counters are not modified in interrupt context, and |
1274 | * pte lock(a spinlock) is held, which implies preemption disabled. | 1394 | * pte lock(a spinlock) is held, which implies preemption disabled. |
1275 | */ | 1395 | */ |
1276 | if (PageTransHuge(page)) | 1396 | __dec_zone_page_state(page, NR_ANON_PAGES); |
1277 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); | ||
1278 | |||
1279 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, | ||
1280 | -hpage_nr_pages(page)); | ||
1281 | 1397 | ||
1282 | if (unlikely(PageMlocked(page))) | 1398 | if (unlikely(PageMlocked(page))) |
1283 | clear_page_mlock(page); | 1399 | clear_page_mlock(page); |
1284 | 1400 | ||
1401 | if (PageTransCompound(page)) | ||
1402 | deferred_split_huge_page(compound_head(page)); | ||
1403 | |||
1285 | /* | 1404 | /* |
1286 | * It would be tidy to reset the PageAnon mapping here, | 1405 | * It would be tidy to reset the PageAnon mapping here, |
1287 | * but that might overwrite a racing page_add_anon_rmap | 1406 | * but that might overwrite a racing page_add_anon_rmap |
@@ -1293,6 +1412,11 @@ void page_remove_rmap(struct page *page) | |||
1293 | */ | 1412 | */ |
1294 | } | 1413 | } |
1295 | 1414 | ||
1415 | struct rmap_private { | ||
1416 | enum ttu_flags flags; | ||
1417 | int lazyfreed; | ||
1418 | }; | ||
1419 | |||
1296 | /* | 1420 | /* |
1297 | * @arg: enum ttu_flags will be passed to this argument | 1421 | * @arg: enum ttu_flags will be passed to this argument |
1298 | */ | 1422 | */ |
@@ -1304,7 +1428,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1304 | pte_t pteval; | 1428 | pte_t pteval; |
1305 | spinlock_t *ptl; | 1429 | spinlock_t *ptl; |
1306 | int ret = SWAP_AGAIN; | 1430 | int ret = SWAP_AGAIN; |
1307 | enum ttu_flags flags = (enum ttu_flags)arg; | 1431 | struct rmap_private *rp = arg; |
1432 | enum ttu_flags flags = rp->flags; | ||
1308 | 1433 | ||
1309 | /* munlock has nothing to gain from examining un-locked vmas */ | 1434 | /* munlock has nothing to gain from examining un-locked vmas */ |
1310 | if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) | 1435 | if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) |
@@ -1396,6 +1521,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1396 | * See handle_pte_fault() ... | 1521 | * See handle_pte_fault() ... |
1397 | */ | 1522 | */ |
1398 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); | 1523 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
1524 | |||
1525 | if (!PageDirty(page) && (flags & TTU_LZFREE)) { | ||
1526 | /* It's a freeable page by MADV_FREE */ | ||
1527 | dec_mm_counter(mm, MM_ANONPAGES); | ||
1528 | rp->lazyfreed++; | ||
1529 | goto discard; | ||
1530 | } | ||
1531 | |||
1399 | if (swap_duplicate(entry) < 0) { | 1532 | if (swap_duplicate(entry) < 0) { |
1400 | set_pte_at(mm, address, pte, pteval); | 1533 | set_pte_at(mm, address, pte, pteval); |
1401 | ret = SWAP_FAIL; | 1534 | ret = SWAP_FAIL; |
@@ -1416,7 +1549,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1416 | } else | 1549 | } else |
1417 | dec_mm_counter(mm, mm_counter_file(page)); | 1550 | dec_mm_counter(mm, mm_counter_file(page)); |
1418 | 1551 | ||
1419 | page_remove_rmap(page); | 1552 | discard: |
1553 | page_remove_rmap(page, PageHuge(page)); | ||
1420 | page_cache_release(page); | 1554 | page_cache_release(page); |
1421 | 1555 | ||
1422 | out_unmap: | 1556 | out_unmap: |
@@ -1468,9 +1602,14 @@ static int page_not_mapped(struct page *page) | |||
1468 | int try_to_unmap(struct page *page, enum ttu_flags flags) | 1602 | int try_to_unmap(struct page *page, enum ttu_flags flags) |
1469 | { | 1603 | { |
1470 | int ret; | 1604 | int ret; |
1605 | struct rmap_private rp = { | ||
1606 | .flags = flags, | ||
1607 | .lazyfreed = 0, | ||
1608 | }; | ||
1609 | |||
1471 | struct rmap_walk_control rwc = { | 1610 | struct rmap_walk_control rwc = { |
1472 | .rmap_one = try_to_unmap_one, | 1611 | .rmap_one = try_to_unmap_one, |
1473 | .arg = (void *)flags, | 1612 | .arg = &rp, |
1474 | .done = page_not_mapped, | 1613 | .done = page_not_mapped, |
1475 | .anon_lock = page_lock_anon_vma_read, | 1614 | .anon_lock = page_lock_anon_vma_read, |
1476 | }; | 1615 | }; |
@@ -1490,8 +1629,11 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1490 | 1629 | ||
1491 | ret = rmap_walk(page, &rwc); | 1630 | ret = rmap_walk(page, &rwc); |
1492 | 1631 | ||
1493 | if (ret != SWAP_MLOCK && !page_mapped(page)) | 1632 | if (ret != SWAP_MLOCK && !page_mapped(page)) { |
1494 | ret = SWAP_SUCCESS; | 1633 | ret = SWAP_SUCCESS; |
1634 | if (rp.lazyfreed && !PageDirty(page)) | ||
1635 | ret = SWAP_LZFREE; | ||
1636 | } | ||
1495 | return ret; | 1637 | return ret; |
1496 | } | 1638 | } |
1497 | 1639 | ||
@@ -1513,9 +1655,14 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1513 | int try_to_munlock(struct page *page) | 1655 | int try_to_munlock(struct page *page) |
1514 | { | 1656 | { |
1515 | int ret; | 1657 | int ret; |
1658 | struct rmap_private rp = { | ||
1659 | .flags = TTU_MUNLOCK, | ||
1660 | .lazyfreed = 0, | ||
1661 | }; | ||
1662 | |||
1516 | struct rmap_walk_control rwc = { | 1663 | struct rmap_walk_control rwc = { |
1517 | .rmap_one = try_to_unmap_one, | 1664 | .rmap_one = try_to_unmap_one, |
1518 | .arg = (void *)TTU_MUNLOCK, | 1665 | .arg = &rp, |
1519 | .done = page_not_mapped, | 1666 | .done = page_not_mapped, |
1520 | .anon_lock = page_lock_anon_vma_read, | 1667 | .anon_lock = page_lock_anon_vma_read, |
1521 | 1668 | ||
@@ -1698,7 +1845,7 @@ void hugepage_add_anon_rmap(struct page *page, | |||
1698 | BUG_ON(!PageLocked(page)); | 1845 | BUG_ON(!PageLocked(page)); |
1699 | BUG_ON(!anon_vma); | 1846 | BUG_ON(!anon_vma); |
1700 | /* address might be in next vma when migration races vma_adjust */ | 1847 | /* address might be in next vma when migration races vma_adjust */ |
1701 | first = atomic_inc_and_test(&page->_mapcount); | 1848 | first = atomic_inc_and_test(compound_mapcount_ptr(page)); |
1702 | if (first) | 1849 | if (first) |
1703 | __hugepage_set_anon_rmap(page, vma, address, 0); | 1850 | __hugepage_set_anon_rmap(page, vma, address, 0); |
1704 | } | 1851 | } |
@@ -1707,7 +1854,7 @@ void hugepage_add_new_anon_rmap(struct page *page, | |||
1707 | struct vm_area_struct *vma, unsigned long address) | 1854 | struct vm_area_struct *vma, unsigned long address) |
1708 | { | 1855 | { |
1709 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 1856 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
1710 | atomic_set(&page->_mapcount, 0); | 1857 | atomic_set(compound_mapcount_ptr(page), 0); |
1711 | __hugepage_set_anon_rmap(page, vma, address, 1); | 1858 | __hugepage_set_anon_rmap(page, vma, address, 1); |
1712 | } | 1859 | } |
1713 | #endif /* CONFIG_HUGETLB_PAGE */ | 1860 | #endif /* CONFIG_HUGETLB_PAGE */ |