aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c257
1 files changed, 220 insertions, 37 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index e8d639b16c6d..7e60df99018e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -53,6 +53,8 @@
53 53
54#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
55 55
56#include "internal.h"
57
56struct kmem_cache *anon_vma_cachep; 58struct kmem_cache *anon_vma_cachep;
57 59
58/** 60/**
@@ -290,6 +292,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
290 return NULL; 292 return NULL;
291} 293}
292 294
295/**
296 * page_mapped_in_vma - check whether a page is really mapped in a VMA
297 * @page: the page to test
298 * @vma: the VMA to test
299 *
300 * Returns 1 if the page is mapped into the page tables of the VMA, 0
301 * if the page is not mapped into the page tables of this VMA. Only
302 * valid for normal file or anonymous VMAs.
303 */
304static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
305{
306 unsigned long address;
307 pte_t *pte;
308 spinlock_t *ptl;
309
310 address = vma_address(page, vma);
311 if (address == -EFAULT) /* out of vma range */
312 return 0;
313 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
314 if (!pte) /* the page is not in this mm */
315 return 0;
316 pte_unmap_unlock(pte, ptl);
317
318 return 1;
319}
320
293/* 321/*
294 * Subfunctions of page_referenced: page_referenced_one called 322 * Subfunctions of page_referenced: page_referenced_one called
295 * repeatedly from either page_referenced_anon or page_referenced_file. 323 * repeatedly from either page_referenced_anon or page_referenced_file.
@@ -311,10 +339,17 @@ static int page_referenced_one(struct page *page,
311 if (!pte) 339 if (!pte)
312 goto out; 340 goto out;
313 341
342 /*
343 * Don't want to elevate referenced for mlocked page that gets this far,
344 * in order that it progresses to try_to_unmap and is moved to the
345 * unevictable list.
346 */
314 if (vma->vm_flags & VM_LOCKED) { 347 if (vma->vm_flags & VM_LOCKED) {
315 referenced++;
316 *mapcount = 1; /* break early from loop */ 348 *mapcount = 1; /* break early from loop */
317 } else if (ptep_clear_flush_young_notify(vma, address, pte)) 349 goto out_unmap;
350 }
351
352 if (ptep_clear_flush_young_notify(vma, address, pte))
318 referenced++; 353 referenced++;
319 354
320 /* Pretend the page is referenced if the task has the 355 /* Pretend the page is referenced if the task has the
@@ -323,6 +358,7 @@ static int page_referenced_one(struct page *page,
323 rwsem_is_locked(&mm->mmap_sem)) 358 rwsem_is_locked(&mm->mmap_sem))
324 referenced++; 359 referenced++;
325 360
361out_unmap:
326 (*mapcount)--; 362 (*mapcount)--;
327 pte_unmap_unlock(pte, ptl); 363 pte_unmap_unlock(pte, ptl);
328out: 364out:
@@ -412,11 +448,6 @@ static int page_referenced_file(struct page *page,
412 */ 448 */
413 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 449 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
414 continue; 450 continue;
415 if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
416 == (VM_LOCKED|VM_MAYSHARE)) {
417 referenced++;
418 break;
419 }
420 referenced += page_referenced_one(page, vma, &mapcount); 451 referenced += page_referenced_one(page, vma, &mapcount);
421 if (!mapcount) 452 if (!mapcount)
422 break; 453 break;
@@ -739,11 +770,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
739 * If it's recently referenced (perhaps page_referenced 770 * If it's recently referenced (perhaps page_referenced
740 * skipped over this mm) then we should reactivate it. 771 * skipped over this mm) then we should reactivate it.
741 */ 772 */
742 if (!migration && ((vma->vm_flags & VM_LOCKED) || 773 if (!migration) {
743 (ptep_clear_flush_young_notify(vma, address, pte)))) { 774 if (vma->vm_flags & VM_LOCKED) {
744 ret = SWAP_FAIL; 775 ret = SWAP_MLOCK;
745 goto out_unmap; 776 goto out_unmap;
746 } 777 }
778 if (ptep_clear_flush_young_notify(vma, address, pte)) {
779 ret = SWAP_FAIL;
780 goto out_unmap;
781 }
782 }
747 783
748 /* Nuke the page table entry. */ 784 /* Nuke the page table entry. */
749 flush_cache_page(vma, address, page_to_pfn(page)); 785 flush_cache_page(vma, address, page_to_pfn(page));
@@ -824,12 +860,17 @@ out:
824 * For very sparsely populated VMAs this is a little inefficient - chances are 860 * For very sparsely populated VMAs this is a little inefficient - chances are
825 * there there won't be many ptes located within the scan cluster. In this case 861 * there there won't be many ptes located within the scan cluster. In this case
826 * maybe we could scan further - to the end of the pte page, perhaps. 862 * maybe we could scan further - to the end of the pte page, perhaps.
863 *
864 * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
865 * acquire it without blocking. If vma locked, mlock the pages in the cluster,
866 * rather than unmapping them. If we encounter the "check_page" that vmscan is
867 * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
827 */ 868 */
828#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) 869#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
829#define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) 870#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
830 871
831static void try_to_unmap_cluster(unsigned long cursor, 872static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
832 unsigned int *mapcount, struct vm_area_struct *vma) 873 struct vm_area_struct *vma, struct page *check_page)
833{ 874{
834 struct mm_struct *mm = vma->vm_mm; 875 struct mm_struct *mm = vma->vm_mm;
835 pgd_t *pgd; 876 pgd_t *pgd;
@@ -841,6 +882,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
841 struct page *page; 882 struct page *page;
842 unsigned long address; 883 unsigned long address;
843 unsigned long end; 884 unsigned long end;
885 int ret = SWAP_AGAIN;
886 int locked_vma = 0;
844 887
845 address = (vma->vm_start + cursor) & CLUSTER_MASK; 888 address = (vma->vm_start + cursor) & CLUSTER_MASK;
846 end = address + CLUSTER_SIZE; 889 end = address + CLUSTER_SIZE;
@@ -851,15 +894,26 @@ static void try_to_unmap_cluster(unsigned long cursor,
851 894
852 pgd = pgd_offset(mm, address); 895 pgd = pgd_offset(mm, address);
853 if (!pgd_present(*pgd)) 896 if (!pgd_present(*pgd))
854 return; 897 return ret;
855 898
856 pud = pud_offset(pgd, address); 899 pud = pud_offset(pgd, address);
857 if (!pud_present(*pud)) 900 if (!pud_present(*pud))
858 return; 901 return ret;
859 902
860 pmd = pmd_offset(pud, address); 903 pmd = pmd_offset(pud, address);
861 if (!pmd_present(*pmd)) 904 if (!pmd_present(*pmd))
862 return; 905 return ret;
906
907 /*
908 * MLOCK_PAGES => feature is configured.
909 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
910 * keep the sem while scanning the cluster for mlocking pages.
911 */
912 if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
913 locked_vma = (vma->vm_flags & VM_LOCKED);
914 if (!locked_vma)
915 up_read(&vma->vm_mm->mmap_sem); /* don't need it */
916 }
863 917
864 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 918 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
865 919
@@ -872,6 +926,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
872 page = vm_normal_page(vma, address, *pte); 926 page = vm_normal_page(vma, address, *pte);
873 BUG_ON(!page || PageAnon(page)); 927 BUG_ON(!page || PageAnon(page));
874 928
929 if (locked_vma) {
930 mlock_vma_page(page); /* no-op if already mlocked */
931 if (page == check_page)
932 ret = SWAP_MLOCK;
933 continue; /* don't unmap */
934 }
935
875 if (ptep_clear_flush_young_notify(vma, address, pte)) 936 if (ptep_clear_flush_young_notify(vma, address, pte))
876 continue; 937 continue;
877 938
@@ -893,39 +954,104 @@ static void try_to_unmap_cluster(unsigned long cursor,
893 (*mapcount)--; 954 (*mapcount)--;
894 } 955 }
895 pte_unmap_unlock(pte - 1, ptl); 956 pte_unmap_unlock(pte - 1, ptl);
957 if (locked_vma)
958 up_read(&vma->vm_mm->mmap_sem);
959 return ret;
896} 960}
897 961
898static int try_to_unmap_anon(struct page *page, int migration) 962/*
963 * common handling for pages mapped in VM_LOCKED vmas
964 */
965static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
966{
967 int mlocked = 0;
968
969 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
970 if (vma->vm_flags & VM_LOCKED) {
971 mlock_vma_page(page);
972 mlocked++; /* really mlocked the page */
973 }
974 up_read(&vma->vm_mm->mmap_sem);
975 }
976 return mlocked;
977}
978
979/**
980 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
981 * rmap method
982 * @page: the page to unmap/unlock
983 * @unlock: request for unlock rather than unmap [unlikely]
984 * @migration: unmapping for migration - ignored if @unlock
985 *
986 * Find all the mappings of a page using the mapping pointer and the vma chains
987 * contained in the anon_vma struct it points to.
988 *
989 * This function is only called from try_to_unmap/try_to_munlock for
990 * anonymous pages.
991 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
992 * where the page was found will be held for write. So, we won't recheck
993 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
994 * 'LOCKED.
995 */
996static int try_to_unmap_anon(struct page *page, int unlock, int migration)
899{ 997{
900 struct anon_vma *anon_vma; 998 struct anon_vma *anon_vma;
901 struct vm_area_struct *vma; 999 struct vm_area_struct *vma;
1000 unsigned int mlocked = 0;
902 int ret = SWAP_AGAIN; 1001 int ret = SWAP_AGAIN;
903 1002
1003 if (MLOCK_PAGES && unlikely(unlock))
1004 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1005
904 anon_vma = page_lock_anon_vma(page); 1006 anon_vma = page_lock_anon_vma(page);
905 if (!anon_vma) 1007 if (!anon_vma)
906 return ret; 1008 return ret;
907 1009
908 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1010 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
909 ret = try_to_unmap_one(page, vma, migration); 1011 if (MLOCK_PAGES && unlikely(unlock)) {
910 if (ret == SWAP_FAIL || !page_mapped(page)) 1012 if (!((vma->vm_flags & VM_LOCKED) &&
911 break; 1013 page_mapped_in_vma(page, vma)))
1014 continue; /* must visit all unlocked vmas */
1015 ret = SWAP_MLOCK; /* saw at least one mlocked vma */
1016 } else {
1017 ret = try_to_unmap_one(page, vma, migration);
1018 if (ret == SWAP_FAIL || !page_mapped(page))
1019 break;
1020 }
1021 if (ret == SWAP_MLOCK) {
1022 mlocked = try_to_mlock_page(page, vma);
1023 if (mlocked)
1024 break; /* stop if actually mlocked page */
1025 }
912 } 1026 }
913 1027
914 page_unlock_anon_vma(anon_vma); 1028 page_unlock_anon_vma(anon_vma);
1029
1030 if (mlocked)
1031 ret = SWAP_MLOCK; /* actually mlocked the page */
1032 else if (ret == SWAP_MLOCK)
1033 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1034
915 return ret; 1035 return ret;
916} 1036}
917 1037
918/** 1038/**
919 * try_to_unmap_file - unmap file page using the object-based rmap method 1039 * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
920 * @page: the page to unmap 1040 * @page: the page to unmap/unlock
921 * @migration: migration flag 1041 * @unlock: request for unlock rather than unmap [unlikely]
1042 * @migration: unmapping for migration - ignored if @unlock
922 * 1043 *
923 * Find all the mappings of a page using the mapping pointer and the vma chains 1044 * Find all the mappings of a page using the mapping pointer and the vma chains
924 * contained in the address_space struct it points to. 1045 * contained in the address_space struct it points to.
925 * 1046 *
926 * This function is only called from try_to_unmap for object-based pages. 1047 * This function is only called from try_to_unmap/try_to_munlock for
1048 * object-based pages.
1049 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1050 * where the page was found will be held for write. So, we won't recheck
1051 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1052 * 'LOCKED.
927 */ 1053 */
928static int try_to_unmap_file(struct page *page, int migration) 1054static int try_to_unmap_file(struct page *page, int unlock, int migration)
929{ 1055{
930 struct address_space *mapping = page->mapping; 1056 struct address_space *mapping = page->mapping;
931 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1057 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -936,20 +1062,44 @@ static int try_to_unmap_file(struct page *page, int migration)
936 unsigned long max_nl_cursor = 0; 1062 unsigned long max_nl_cursor = 0;
937 unsigned long max_nl_size = 0; 1063 unsigned long max_nl_size = 0;
938 unsigned int mapcount; 1064 unsigned int mapcount;
1065 unsigned int mlocked = 0;
1066
1067 if (MLOCK_PAGES && unlikely(unlock))
1068 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
939 1069
940 spin_lock(&mapping->i_mmap_lock); 1070 spin_lock(&mapping->i_mmap_lock);
941 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1071 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
942 ret = try_to_unmap_one(page, vma, migration); 1072 if (MLOCK_PAGES && unlikely(unlock)) {
943 if (ret == SWAP_FAIL || !page_mapped(page)) 1073 if (!(vma->vm_flags & VM_LOCKED))
944 goto out; 1074 continue; /* must visit all vmas */
1075 ret = SWAP_MLOCK;
1076 } else {
1077 ret = try_to_unmap_one(page, vma, migration);
1078 if (ret == SWAP_FAIL || !page_mapped(page))
1079 goto out;
1080 }
1081 if (ret == SWAP_MLOCK) {
1082 mlocked = try_to_mlock_page(page, vma);
1083 if (mlocked)
1084 break; /* stop if actually mlocked page */
1085 }
945 } 1086 }
946 1087
1088 if (mlocked)
1089 goto out;
1090
947 if (list_empty(&mapping->i_mmap_nonlinear)) 1091 if (list_empty(&mapping->i_mmap_nonlinear))
948 goto out; 1092 goto out;
949 1093
950 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1094 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
951 shared.vm_set.list) { 1095 shared.vm_set.list) {
952 if ((vma->vm_flags & VM_LOCKED) && !migration) 1096 if (MLOCK_PAGES && unlikely(unlock)) {
1097 if (!(vma->vm_flags & VM_LOCKED))
1098 continue; /* must visit all vmas */
1099 ret = SWAP_MLOCK; /* leave mlocked == 0 */
1100 goto out; /* no need to look further */
1101 }
1102 if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
953 continue; 1103 continue;
954 cursor = (unsigned long) vma->vm_private_data; 1104 cursor = (unsigned long) vma->vm_private_data;
955 if (cursor > max_nl_cursor) 1105 if (cursor > max_nl_cursor)
@@ -959,7 +1109,7 @@ static int try_to_unmap_file(struct page *page, int migration)
959 max_nl_size = cursor; 1109 max_nl_size = cursor;
960 } 1110 }
961 1111
962 if (max_nl_size == 0) { /* any nonlinears locked or reserved */ 1112 if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
963 ret = SWAP_FAIL; 1113 ret = SWAP_FAIL;
964 goto out; 1114 goto out;
965 } 1115 }
@@ -983,12 +1133,16 @@ static int try_to_unmap_file(struct page *page, int migration)
983 do { 1133 do {
984 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1134 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
985 shared.vm_set.list) { 1135 shared.vm_set.list) {
986 if ((vma->vm_flags & VM_LOCKED) && !migration) 1136 if (!MLOCK_PAGES && !migration &&
1137 (vma->vm_flags & VM_LOCKED))
987 continue; 1138 continue;
988 cursor = (unsigned long) vma->vm_private_data; 1139 cursor = (unsigned long) vma->vm_private_data;
989 while ( cursor < max_nl_cursor && 1140 while ( cursor < max_nl_cursor &&
990 cursor < vma->vm_end - vma->vm_start) { 1141 cursor < vma->vm_end - vma->vm_start) {
991 try_to_unmap_cluster(cursor, &mapcount, vma); 1142 ret = try_to_unmap_cluster(cursor, &mapcount,
1143 vma, page);
1144 if (ret == SWAP_MLOCK)
1145 mlocked = 2; /* to return below */
992 cursor += CLUSTER_SIZE; 1146 cursor += CLUSTER_SIZE;
993 vma->vm_private_data = (void *) cursor; 1147 vma->vm_private_data = (void *) cursor;
994 if ((int)mapcount <= 0) 1148 if ((int)mapcount <= 0)
@@ -1009,6 +1163,10 @@ static int try_to_unmap_file(struct page *page, int migration)
1009 vma->vm_private_data = NULL; 1163 vma->vm_private_data = NULL;
1010out: 1164out:
1011 spin_unlock(&mapping->i_mmap_lock); 1165 spin_unlock(&mapping->i_mmap_lock);
1166 if (mlocked)
1167 ret = SWAP_MLOCK; /* actually mlocked the page */
1168 else if (ret == SWAP_MLOCK)
1169 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1012 return ret; 1170 return ret;
1013} 1171}
1014 1172
@@ -1024,6 +1182,7 @@ out:
1024 * SWAP_SUCCESS - we succeeded in removing all mappings 1182 * SWAP_SUCCESS - we succeeded in removing all mappings
1025 * SWAP_AGAIN - we missed a mapping, try again later 1183 * SWAP_AGAIN - we missed a mapping, try again later
1026 * SWAP_FAIL - the page is unswappable 1184 * SWAP_FAIL - the page is unswappable
1185 * SWAP_MLOCK - page is mlocked.
1027 */ 1186 */
1028int try_to_unmap(struct page *page, int migration) 1187int try_to_unmap(struct page *page, int migration)
1029{ 1188{
@@ -1032,12 +1191,36 @@ int try_to_unmap(struct page *page, int migration)
1032 BUG_ON(!PageLocked(page)); 1191 BUG_ON(!PageLocked(page));
1033 1192
1034 if (PageAnon(page)) 1193 if (PageAnon(page))
1035 ret = try_to_unmap_anon(page, migration); 1194 ret = try_to_unmap_anon(page, 0, migration);
1036 else 1195 else
1037 ret = try_to_unmap_file(page, migration); 1196 ret = try_to_unmap_file(page, 0, migration);
1038 1197 if (ret != SWAP_MLOCK && !page_mapped(page))
1039 if (!page_mapped(page))
1040 ret = SWAP_SUCCESS; 1198 ret = SWAP_SUCCESS;
1041 return ret; 1199 return ret;
1042} 1200}
1043 1201
1202#ifdef CONFIG_UNEVICTABLE_LRU
1203/**
1204 * try_to_munlock - try to munlock a page
1205 * @page: the page to be munlocked
1206 *
1207 * Called from munlock code. Checks all of the VMAs mapping the page
1208 * to make sure nobody else has this page mlocked. The page will be
1209 * returned with PG_mlocked cleared if no other vmas have it mlocked.
1210 *
1211 * Return values are:
1212 *
1213 * SWAP_SUCCESS - no vma's holding page mlocked.
1214 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1215 * SWAP_MLOCK - page is now mlocked.
1216 */
1217int try_to_munlock(struct page *page)
1218{
1219 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1220
1221 if (PageAnon(page))
1222 return try_to_unmap_anon(page, 1, 0);
1223 else
1224 return try_to_unmap_file(page, 1, 0);
1225}
1226#endif