aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c369
1 files changed, 258 insertions, 111 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 622756c16ac8..79f3bf047f38 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,21 +23,22 @@
23 * inode->i_mutex (while writing or truncating, not reading or faulting) 23 * inode->i_mutex (while writing or truncating, not reading or faulting)
24 * mm->mmap_sem 24 * mm->mmap_sem
25 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
26 * mapping->i_mmap_rwsem 26 * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
27 * anon_vma->rwsem 27 * mapping->i_mmap_rwsem
28 * mm->page_table_lock or pte_lock 28 * anon_vma->rwsem
29 * zone->lru_lock (in mark_page_accessed, isolate_lru_page) 29 * mm->page_table_lock or pte_lock
30 * swap_lock (in swap_duplicate, swap_info_get) 30 * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
31 * mmlist_lock (in mmput, drain_mmlist and others) 31 * swap_lock (in swap_duplicate, swap_info_get)
32 * mapping->private_lock (in __set_page_dirty_buffers) 32 * mmlist_lock (in mmput, drain_mmlist and others)
33 * mem_cgroup_{begin,end}_page_stat (memcg->move_lock) 33 * mapping->private_lock (in __set_page_dirty_buffers)
34 * mapping->tree_lock (widely used) 34 * mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
35 * inode->i_lock (in set_page_dirty's __mark_inode_dirty) 35 * mapping->tree_lock (widely used)
36 * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) 36 * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
37 * sb_lock (within inode_lock in fs/fs-writeback.c) 37 * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
38 * mapping->tree_lock (widely used, in set_page_dirty, 38 * sb_lock (within inode_lock in fs/fs-writeback.c)
39 * in arch-dependent flush_dcache_mmap_lock, 39 * mapping->tree_lock (widely used, in set_page_dirty,
40 * within bdi.wb->list_lock in __sync_single_inode) 40 * in arch-dependent flush_dcache_mmap_lock,
41 * within bdi.wb->list_lock in __sync_single_inode)
41 * 42 *
42 * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) 43 * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
43 * ->tasklist_lock 44 * ->tasklist_lock
@@ -567,27 +568,6 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
567 anon_vma_unlock_read(anon_vma); 568 anon_vma_unlock_read(anon_vma);
568} 569}
569 570
570/*
571 * At what user virtual address is page expected in @vma?
572 */
573static inline unsigned long
574__vma_address(struct page *page, struct vm_area_struct *vma)
575{
576 pgoff_t pgoff = page_to_pgoff(page);
577 return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
578}
579
580inline unsigned long
581vma_address(struct page *page, struct vm_area_struct *vma)
582{
583 unsigned long address = __vma_address(page, vma);
584
585 /* page should be within @vma mapping range */
586 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
587
588 return address;
589}
590
591#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH 571#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
592static void percpu_flush_tlb_batch_pages(void *data) 572static void percpu_flush_tlb_batch_pages(void *data)
593{ 573{
@@ -819,6 +799,96 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
819 return 1; 799 return 1;
820} 800}
821 801
802#ifdef CONFIG_TRANSPARENT_HUGEPAGE
803/*
804 * Check that @page is mapped at @address into @mm. In contrast to
805 * page_check_address(), this function can handle transparent huge pages.
806 *
807 * On success returns true with pte mapped and locked. For PMD-mapped
808 * transparent huge pages *@ptep is set to NULL.
809 */
810bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
811 unsigned long address, pmd_t **pmdp,
812 pte_t **ptep, spinlock_t **ptlp)
813{
814 pgd_t *pgd;
815 pud_t *pud;
816 pmd_t *pmd;
817 pte_t *pte;
818 spinlock_t *ptl;
819
820 if (unlikely(PageHuge(page))) {
821 /* when pud is not present, pte will be NULL */
822 pte = huge_pte_offset(mm, address);
823 if (!pte)
824 return false;
825
826 ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
827 pmd = NULL;
828 goto check_pte;
829 }
830
831 pgd = pgd_offset(mm, address);
832 if (!pgd_present(*pgd))
833 return false;
834 pud = pud_offset(pgd, address);
835 if (!pud_present(*pud))
836 return false;
837 pmd = pmd_offset(pud, address);
838
839 if (pmd_trans_huge(*pmd)) {
840 ptl = pmd_lock(mm, pmd);
841 if (!pmd_present(*pmd))
842 goto unlock_pmd;
843 if (unlikely(!pmd_trans_huge(*pmd))) {
844 spin_unlock(ptl);
845 goto map_pte;
846 }
847
848 if (pmd_page(*pmd) != page)
849 goto unlock_pmd;
850
851 pte = NULL;
852 goto found;
853unlock_pmd:
854 spin_unlock(ptl);
855 return false;
856 } else {
857 pmd_t pmde = *pmd;
858
859 barrier();
860 if (!pmd_present(pmde) || pmd_trans_huge(pmde))
861 return false;
862 }
863map_pte:
864 pte = pte_offset_map(pmd, address);
865 if (!pte_present(*pte)) {
866 pte_unmap(pte);
867 return false;
868 }
869
870 ptl = pte_lockptr(mm, pmd);
871check_pte:
872 spin_lock(ptl);
873
874 if (!pte_present(*pte)) {
875 pte_unmap_unlock(pte, ptl);
876 return false;
877 }
878
879 /* THP can be referenced by any subpage */
880 if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
881 pte_unmap_unlock(pte, ptl);
882 return false;
883 }
884found:
885 *ptep = pte;
886 *pmdp = pmd;
887 *ptlp = ptl;
888 return true;
889}
890#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
891
822struct page_referenced_arg { 892struct page_referenced_arg {
823 int mapcount; 893 int mapcount;
824 int referenced; 894 int referenced;
@@ -832,49 +902,24 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
832 unsigned long address, void *arg) 902 unsigned long address, void *arg)
833{ 903{
834 struct mm_struct *mm = vma->vm_mm; 904 struct mm_struct *mm = vma->vm_mm;
905 struct page_referenced_arg *pra = arg;
906 pmd_t *pmd;
907 pte_t *pte;
835 spinlock_t *ptl; 908 spinlock_t *ptl;
836 int referenced = 0; 909 int referenced = 0;
837 struct page_referenced_arg *pra = arg;
838
839 if (unlikely(PageTransHuge(page))) {
840 pmd_t *pmd;
841 910
842 /* 911 if (!page_check_address_transhuge(page, mm, address, &pmd, &pte, &ptl))
843 * rmap might return false positives; we must filter 912 return SWAP_AGAIN;
844 * these out using page_check_address_pmd().
845 */
846 pmd = page_check_address_pmd(page, mm, address,
847 PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
848 if (!pmd)
849 return SWAP_AGAIN;
850
851 if (vma->vm_flags & VM_LOCKED) {
852 spin_unlock(ptl);
853 pra->vm_flags |= VM_LOCKED;
854 return SWAP_FAIL; /* To break the loop */
855 }
856 913
857 /* go ahead even if the pmd is pmd_trans_splitting() */ 914 if (vma->vm_flags & VM_LOCKED) {
858 if (pmdp_clear_flush_young_notify(vma, address, pmd)) 915 if (pte)
859 referenced++; 916 pte_unmap(pte);
860 spin_unlock(ptl); 917 spin_unlock(ptl);
861 } else { 918 pra->vm_flags |= VM_LOCKED;
862 pte_t *pte; 919 return SWAP_FAIL; /* To break the loop */
863 920 }
864 /*
865 * rmap might return false positives; we must filter
866 * these out using page_check_address().
867 */
868 pte = page_check_address(page, mm, address, &ptl, 0);
869 if (!pte)
870 return SWAP_AGAIN;
871
872 if (vma->vm_flags & VM_LOCKED) {
873 pte_unmap_unlock(pte, ptl);
874 pra->vm_flags |= VM_LOCKED;
875 return SWAP_FAIL; /* To break the loop */
876 }
877 921
922 if (pte) {
878 if (ptep_clear_flush_young_notify(vma, address, pte)) { 923 if (ptep_clear_flush_young_notify(vma, address, pte)) {
879 /* 924 /*
880 * Don't treat a reference through a sequentially read 925 * Don't treat a reference through a sequentially read
@@ -886,8 +931,15 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
886 if (likely(!(vma->vm_flags & VM_SEQ_READ))) 931 if (likely(!(vma->vm_flags & VM_SEQ_READ)))
887 referenced++; 932 referenced++;
888 } 933 }
889 pte_unmap_unlock(pte, ptl); 934 pte_unmap(pte);
935 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
936 if (pmdp_clear_flush_young_notify(vma, address, pmd))
937 referenced++;
938 } else {
939 /* unexpected pmd-mapped page? */
940 WARN_ON_ONCE(1);
890 } 941 }
942 spin_unlock(ptl);
891 943
892 if (referenced) 944 if (referenced)
893 clear_page_idle(page); 945 clear_page_idle(page);
@@ -935,7 +987,7 @@ int page_referenced(struct page *page,
935 int ret; 987 int ret;
936 int we_locked = 0; 988 int we_locked = 0;
937 struct page_referenced_arg pra = { 989 struct page_referenced_arg pra = {
938 .mapcount = page_mapcount(page), 990 .mapcount = total_mapcount(page),
939 .memcg = memcg, 991 .memcg = memcg,
940 }; 992 };
941 struct rmap_walk_control rwc = { 993 struct rmap_walk_control rwc = {
@@ -1124,7 +1176,7 @@ static void __page_check_anon_rmap(struct page *page,
1124 * over the call to page_add_new_anon_rmap. 1176 * over the call to page_add_new_anon_rmap.
1125 */ 1177 */
1126 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); 1178 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1127 BUG_ON(page->index != linear_page_index(vma, address)); 1179 BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address));
1128#endif 1180#endif
1129} 1181}
1130 1182
@@ -1133,6 +1185,7 @@ static void __page_check_anon_rmap(struct page *page,
1133 * @page: the page to add the mapping to 1185 * @page: the page to add the mapping to
1134 * @vma: the vm area in which the mapping is added 1186 * @vma: the vm area in which the mapping is added
1135 * @address: the user virtual address mapped 1187 * @address: the user virtual address mapped
1188 * @compound: charge the page as compound or small page
1136 * 1189 *
1137 * The caller needs to hold the pte lock, and the page must be locked in 1190 * The caller needs to hold the pte lock, and the page must be locked in
1138 * the anon_vma case: to serialize mapping,index checking after setting, 1191 * the anon_vma case: to serialize mapping,index checking after setting,
@@ -1140,9 +1193,9 @@ static void __page_check_anon_rmap(struct page *page,
1140 * (but PageKsm is never downgraded to PageAnon). 1193 * (but PageKsm is never downgraded to PageAnon).
1141 */ 1194 */
1142void page_add_anon_rmap(struct page *page, 1195void page_add_anon_rmap(struct page *page,
1143 struct vm_area_struct *vma, unsigned long address) 1196 struct vm_area_struct *vma, unsigned long address, bool compound)
1144{ 1197{
1145 do_page_add_anon_rmap(page, vma, address, 0); 1198 do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
1146} 1199}
1147 1200
1148/* 1201/*
@@ -1151,29 +1204,44 @@ void page_add_anon_rmap(struct page *page,
1151 * Everybody else should continue to use page_add_anon_rmap above. 1204 * Everybody else should continue to use page_add_anon_rmap above.
1152 */ 1205 */
1153void do_page_add_anon_rmap(struct page *page, 1206void do_page_add_anon_rmap(struct page *page,
1154 struct vm_area_struct *vma, unsigned long address, int exclusive) 1207 struct vm_area_struct *vma, unsigned long address, int flags)
1155{ 1208{
1156 int first = atomic_inc_and_test(&page->_mapcount); 1209 bool compound = flags & RMAP_COMPOUND;
1210 bool first;
1211
1212 if (compound) {
1213 atomic_t *mapcount;
1214 VM_BUG_ON_PAGE(!PageLocked(page), page);
1215 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1216 mapcount = compound_mapcount_ptr(page);
1217 first = atomic_inc_and_test(mapcount);
1218 } else {
1219 first = atomic_inc_and_test(&page->_mapcount);
1220 }
1221
1157 if (first) { 1222 if (first) {
1223 int nr = compound ? hpage_nr_pages(page) : 1;
1158 /* 1224 /*
1159 * We use the irq-unsafe __{inc|mod}_zone_page_stat because 1225 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
1160 * these counters are not modified in interrupt context, and 1226 * these counters are not modified in interrupt context, and
1161 * pte lock(a spinlock) is held, which implies preemption 1227 * pte lock(a spinlock) is held, which implies preemption
1162 * disabled. 1228 * disabled.
1163 */ 1229 */
1164 if (PageTransHuge(page)) 1230 if (compound) {
1165 __inc_zone_page_state(page, 1231 __inc_zone_page_state(page,
1166 NR_ANON_TRANSPARENT_HUGEPAGES); 1232 NR_ANON_TRANSPARENT_HUGEPAGES);
1167 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, 1233 }
1168 hpage_nr_pages(page)); 1234 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
1169 } 1235 }
1170 if (unlikely(PageKsm(page))) 1236 if (unlikely(PageKsm(page)))
1171 return; 1237 return;
1172 1238
1173 VM_BUG_ON_PAGE(!PageLocked(page), page); 1239 VM_BUG_ON_PAGE(!PageLocked(page), page);
1240
1174 /* address might be in next vma when migration races vma_adjust */ 1241 /* address might be in next vma when migration races vma_adjust */
1175 if (first) 1242 if (first)
1176 __page_set_anon_rmap(page, vma, address, exclusive); 1243 __page_set_anon_rmap(page, vma, address,
1244 flags & RMAP_EXCLUSIVE);
1177 else 1245 else
1178 __page_check_anon_rmap(page, vma, address); 1246 __page_check_anon_rmap(page, vma, address);
1179} 1247}
@@ -1183,21 +1251,31 @@ void do_page_add_anon_rmap(struct page *page,
1183 * @page: the page to add the mapping to 1251 * @page: the page to add the mapping to
1184 * @vma: the vm area in which the mapping is added 1252 * @vma: the vm area in which the mapping is added
1185 * @address: the user virtual address mapped 1253 * @address: the user virtual address mapped
1254 * @compound: charge the page as compound or small page
1186 * 1255 *
1187 * Same as page_add_anon_rmap but must only be called on *new* pages. 1256 * Same as page_add_anon_rmap but must only be called on *new* pages.
1188 * This means the inc-and-test can be bypassed. 1257 * This means the inc-and-test can be bypassed.
1189 * Page does not have to be locked. 1258 * Page does not have to be locked.
1190 */ 1259 */
1191void page_add_new_anon_rmap(struct page *page, 1260void page_add_new_anon_rmap(struct page *page,
1192 struct vm_area_struct *vma, unsigned long address) 1261 struct vm_area_struct *vma, unsigned long address, bool compound)
1193{ 1262{
1263 int nr = compound ? hpage_nr_pages(page) : 1;
1264
1194 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); 1265 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
1195 SetPageSwapBacked(page); 1266 SetPageSwapBacked(page);
1196 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 1267 if (compound) {
1197 if (PageTransHuge(page)) 1268 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1269 /* increment count (starts at -1) */
1270 atomic_set(compound_mapcount_ptr(page), 0);
1198 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); 1271 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1199 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, 1272 } else {
1200 hpage_nr_pages(page)); 1273 /* Anon THP always mapped first with PMD */
1274 VM_BUG_ON_PAGE(PageTransCompound(page), page);
1275 /* increment count (starts at -1) */
1276 atomic_set(&page->_mapcount, 0);
1277 }
1278 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
1201 __page_set_anon_rmap(page, vma, address, 1); 1279 __page_set_anon_rmap(page, vma, address, 1);
1202} 1280}
1203 1281
@@ -1225,12 +1303,15 @@ static void page_remove_file_rmap(struct page *page)
1225 1303
1226 memcg = mem_cgroup_begin_page_stat(page); 1304 memcg = mem_cgroup_begin_page_stat(page);
1227 1305
1228 /* page still mapped by someone else? */ 1306 /* Hugepages are not counted in NR_FILE_MAPPED for now. */
1229 if (!atomic_add_negative(-1, &page->_mapcount)) 1307 if (unlikely(PageHuge(page))) {
1308 /* hugetlb pages are always mapped with pmds */
1309 atomic_dec(compound_mapcount_ptr(page));
1230 goto out; 1310 goto out;
1311 }
1231 1312
1232 /* Hugepages are not counted in NR_FILE_MAPPED for now. */ 1313 /* page still mapped by someone else? */
1233 if (unlikely(PageHuge(page))) 1314 if (!atomic_add_negative(-1, &page->_mapcount))
1234 goto out; 1315 goto out;
1235 1316
1236 /* 1317 /*
@@ -1247,41 +1328,79 @@ out:
1247 mem_cgroup_end_page_stat(memcg); 1328 mem_cgroup_end_page_stat(memcg);
1248} 1329}
1249 1330
1331static void page_remove_anon_compound_rmap(struct page *page)
1332{
1333 int i, nr;
1334
1335 if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1336 return;
1337
1338 /* Hugepages are not counted in NR_ANON_PAGES for now. */
1339 if (unlikely(PageHuge(page)))
1340 return;
1341
1342 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1343 return;
1344
1345 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1346
1347 if (TestClearPageDoubleMap(page)) {
1348 /*
1349 * Subpages can be mapped with PTEs too. Check how many of
1350 * themi are still mapped.
1351 */
1352 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1353 if (atomic_add_negative(-1, &page[i]._mapcount))
1354 nr++;
1355 }
1356 } else {
1357 nr = HPAGE_PMD_NR;
1358 }
1359
1360 if (unlikely(PageMlocked(page)))
1361 clear_page_mlock(page);
1362
1363 if (nr) {
1364 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);
1365 deferred_split_huge_page(page);
1366 }
1367}
1368
1250/** 1369/**
1251 * page_remove_rmap - take down pte mapping from a page 1370 * page_remove_rmap - take down pte mapping from a page
1252 * @page: page to remove mapping from 1371 * @page: page to remove mapping from
1372 * @compound: uncharge the page as compound or small page
1253 * 1373 *
1254 * The caller needs to hold the pte lock. 1374 * The caller needs to hold the pte lock.
1255 */ 1375 */
1256void page_remove_rmap(struct page *page) 1376void page_remove_rmap(struct page *page, bool compound)
1257{ 1377{
1258 if (!PageAnon(page)) { 1378 if (!PageAnon(page)) {
1379 VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
1259 page_remove_file_rmap(page); 1380 page_remove_file_rmap(page);
1260 return; 1381 return;
1261 } 1382 }
1262 1383
1384 if (compound)
1385 return page_remove_anon_compound_rmap(page);
1386
1263 /* page still mapped by someone else? */ 1387 /* page still mapped by someone else? */
1264 if (!atomic_add_negative(-1, &page->_mapcount)) 1388 if (!atomic_add_negative(-1, &page->_mapcount))
1265 return; 1389 return;
1266 1390
1267 /* Hugepages are not counted in NR_ANON_PAGES for now. */
1268 if (unlikely(PageHuge(page)))
1269 return;
1270
1271 /* 1391 /*
1272 * We use the irq-unsafe __{inc|mod}_zone_page_stat because 1392 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
1273 * these counters are not modified in interrupt context, and 1393 * these counters are not modified in interrupt context, and
1274 * pte lock(a spinlock) is held, which implies preemption disabled. 1394 * pte lock(a spinlock) is held, which implies preemption disabled.
1275 */ 1395 */
1276 if (PageTransHuge(page)) 1396 __dec_zone_page_state(page, NR_ANON_PAGES);
1277 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1278
1279 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1280 -hpage_nr_pages(page));
1281 1397
1282 if (unlikely(PageMlocked(page))) 1398 if (unlikely(PageMlocked(page)))
1283 clear_page_mlock(page); 1399 clear_page_mlock(page);
1284 1400
1401 if (PageTransCompound(page))
1402 deferred_split_huge_page(compound_head(page));
1403
1285 /* 1404 /*
1286 * It would be tidy to reset the PageAnon mapping here, 1405 * It would be tidy to reset the PageAnon mapping here,
1287 * but that might overwrite a racing page_add_anon_rmap 1406 * but that might overwrite a racing page_add_anon_rmap
@@ -1293,6 +1412,11 @@ void page_remove_rmap(struct page *page)
1293 */ 1412 */
1294} 1413}
1295 1414
1415struct rmap_private {
1416 enum ttu_flags flags;
1417 int lazyfreed;
1418};
1419
1296/* 1420/*
1297 * @arg: enum ttu_flags will be passed to this argument 1421 * @arg: enum ttu_flags will be passed to this argument
1298 */ 1422 */
@@ -1304,7 +1428,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1304 pte_t pteval; 1428 pte_t pteval;
1305 spinlock_t *ptl; 1429 spinlock_t *ptl;
1306 int ret = SWAP_AGAIN; 1430 int ret = SWAP_AGAIN;
1307 enum ttu_flags flags = (enum ttu_flags)arg; 1431 struct rmap_private *rp = arg;
1432 enum ttu_flags flags = rp->flags;
1308 1433
1309 /* munlock has nothing to gain from examining un-locked vmas */ 1434 /* munlock has nothing to gain from examining un-locked vmas */
1310 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) 1435 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
@@ -1396,6 +1521,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1396 * See handle_pte_fault() ... 1521 * See handle_pte_fault() ...
1397 */ 1522 */
1398 VM_BUG_ON_PAGE(!PageSwapCache(page), page); 1523 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
1524
1525 if (!PageDirty(page) && (flags & TTU_LZFREE)) {
1526 /* It's a freeable page by MADV_FREE */
1527 dec_mm_counter(mm, MM_ANONPAGES);
1528 rp->lazyfreed++;
1529 goto discard;
1530 }
1531
1399 if (swap_duplicate(entry) < 0) { 1532 if (swap_duplicate(entry) < 0) {
1400 set_pte_at(mm, address, pte, pteval); 1533 set_pte_at(mm, address, pte, pteval);
1401 ret = SWAP_FAIL; 1534 ret = SWAP_FAIL;
@@ -1416,7 +1549,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1416 } else 1549 } else
1417 dec_mm_counter(mm, mm_counter_file(page)); 1550 dec_mm_counter(mm, mm_counter_file(page));
1418 1551
1419 page_remove_rmap(page); 1552discard:
1553 page_remove_rmap(page, PageHuge(page));
1420 page_cache_release(page); 1554 page_cache_release(page);
1421 1555
1422out_unmap: 1556out_unmap:
@@ -1468,9 +1602,14 @@ static int page_not_mapped(struct page *page)
1468int try_to_unmap(struct page *page, enum ttu_flags flags) 1602int try_to_unmap(struct page *page, enum ttu_flags flags)
1469{ 1603{
1470 int ret; 1604 int ret;
1605 struct rmap_private rp = {
1606 .flags = flags,
1607 .lazyfreed = 0,
1608 };
1609
1471 struct rmap_walk_control rwc = { 1610 struct rmap_walk_control rwc = {
1472 .rmap_one = try_to_unmap_one, 1611 .rmap_one = try_to_unmap_one,
1473 .arg = (void *)flags, 1612 .arg = &rp,
1474 .done = page_not_mapped, 1613 .done = page_not_mapped,
1475 .anon_lock = page_lock_anon_vma_read, 1614 .anon_lock = page_lock_anon_vma_read,
1476 }; 1615 };
@@ -1490,8 +1629,11 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1490 1629
1491 ret = rmap_walk(page, &rwc); 1630 ret = rmap_walk(page, &rwc);
1492 1631
1493 if (ret != SWAP_MLOCK && !page_mapped(page)) 1632 if (ret != SWAP_MLOCK && !page_mapped(page)) {
1494 ret = SWAP_SUCCESS; 1633 ret = SWAP_SUCCESS;
1634 if (rp.lazyfreed && !PageDirty(page))
1635 ret = SWAP_LZFREE;
1636 }
1495 return ret; 1637 return ret;
1496} 1638}
1497 1639
@@ -1513,9 +1655,14 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1513int try_to_munlock(struct page *page) 1655int try_to_munlock(struct page *page)
1514{ 1656{
1515 int ret; 1657 int ret;
1658 struct rmap_private rp = {
1659 .flags = TTU_MUNLOCK,
1660 .lazyfreed = 0,
1661 };
1662
1516 struct rmap_walk_control rwc = { 1663 struct rmap_walk_control rwc = {
1517 .rmap_one = try_to_unmap_one, 1664 .rmap_one = try_to_unmap_one,
1518 .arg = (void *)TTU_MUNLOCK, 1665 .arg = &rp,
1519 .done = page_not_mapped, 1666 .done = page_not_mapped,
1520 .anon_lock = page_lock_anon_vma_read, 1667 .anon_lock = page_lock_anon_vma_read,
1521 1668
@@ -1698,7 +1845,7 @@ void hugepage_add_anon_rmap(struct page *page,
1698 BUG_ON(!PageLocked(page)); 1845 BUG_ON(!PageLocked(page));
1699 BUG_ON(!anon_vma); 1846 BUG_ON(!anon_vma);
1700 /* address might be in next vma when migration races vma_adjust */ 1847 /* address might be in next vma when migration races vma_adjust */
1701 first = atomic_inc_and_test(&page->_mapcount); 1848 first = atomic_inc_and_test(compound_mapcount_ptr(page));
1702 if (first) 1849 if (first)
1703 __hugepage_set_anon_rmap(page, vma, address, 0); 1850 __hugepage_set_anon_rmap(page, vma, address, 0);
1704} 1851}
@@ -1707,7 +1854,7 @@ void hugepage_add_new_anon_rmap(struct page *page,
1707 struct vm_area_struct *vma, unsigned long address) 1854 struct vm_area_struct *vma, unsigned long address)
1708{ 1855{
1709 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 1856 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1710 atomic_set(&page->_mapcount, 0); 1857 atomic_set(compound_mapcount_ptr(page), 0);
1711 __hugepage_set_anon_rmap(page, vma, address, 1); 1858 __hugepage_set_anon_rmap(page, vma, address, 1);
1712} 1859}
1713#endif /* CONFIG_HUGETLB_PAGE */ 1860#endif /* CONFIG_HUGETLB_PAGE */