diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 319 |
1 files changed, 267 insertions, 52 deletions
@@ -53,9 +53,47 @@ | |||
53 | 53 | ||
54 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
55 | 55 | ||
56 | struct kmem_cache *anon_vma_cachep; | 56 | #include "internal.h" |
57 | 57 | ||
58 | /* This must be called under the mmap_sem. */ | 58 | static struct kmem_cache *anon_vma_cachep; |
59 | |||
60 | static inline struct anon_vma *anon_vma_alloc(void) | ||
61 | { | ||
62 | return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); | ||
63 | } | ||
64 | |||
65 | static inline void anon_vma_free(struct anon_vma *anon_vma) | ||
66 | { | ||
67 | kmem_cache_free(anon_vma_cachep, anon_vma); | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * anon_vma_prepare - attach an anon_vma to a memory region | ||
72 | * @vma: the memory region in question | ||
73 | * | ||
74 | * This makes sure the memory mapping described by 'vma' has | ||
75 | * an 'anon_vma' attached to it, so that we can associate the | ||
76 | * anonymous pages mapped into it with that anon_vma. | ||
77 | * | ||
78 | * The common case will be that we already have one, but if | ||
79 | * if not we either need to find an adjacent mapping that we | ||
80 | * can re-use the anon_vma from (very common when the only | ||
81 | * reason for splitting a vma has been mprotect()), or we | ||
82 | * allocate a new one. | ||
83 | * | ||
84 | * Anon-vma allocations are very subtle, because we may have | ||
85 | * optimistically looked up an anon_vma in page_lock_anon_vma() | ||
86 | * and that may actually touch the spinlock even in the newly | ||
87 | * allocated vma (it depends on RCU to make sure that the | ||
88 | * anon_vma isn't actually destroyed). | ||
89 | * | ||
90 | * As a result, we need to do proper anon_vma locking even | ||
91 | * for the new allocation. At the same time, we do not want | ||
92 | * to do any locking for the common case of already having | ||
93 | * an anon_vma. | ||
94 | * | ||
95 | * This must be called with the mmap_sem held for reading. | ||
96 | */ | ||
59 | int anon_vma_prepare(struct vm_area_struct *vma) | 97 | int anon_vma_prepare(struct vm_area_struct *vma) |
60 | { | 98 | { |
61 | struct anon_vma *anon_vma = vma->anon_vma; | 99 | struct anon_vma *anon_vma = vma->anon_vma; |
@@ -63,20 +101,17 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
63 | might_sleep(); | 101 | might_sleep(); |
64 | if (unlikely(!anon_vma)) { | 102 | if (unlikely(!anon_vma)) { |
65 | struct mm_struct *mm = vma->vm_mm; | 103 | struct mm_struct *mm = vma->vm_mm; |
66 | struct anon_vma *allocated, *locked; | 104 | struct anon_vma *allocated; |
67 | 105 | ||
68 | anon_vma = find_mergeable_anon_vma(vma); | 106 | anon_vma = find_mergeable_anon_vma(vma); |
69 | if (anon_vma) { | 107 | allocated = NULL; |
70 | allocated = NULL; | 108 | if (!anon_vma) { |
71 | locked = anon_vma; | ||
72 | spin_lock(&locked->lock); | ||
73 | } else { | ||
74 | anon_vma = anon_vma_alloc(); | 109 | anon_vma = anon_vma_alloc(); |
75 | if (unlikely(!anon_vma)) | 110 | if (unlikely(!anon_vma)) |
76 | return -ENOMEM; | 111 | return -ENOMEM; |
77 | allocated = anon_vma; | 112 | allocated = anon_vma; |
78 | locked = NULL; | ||
79 | } | 113 | } |
114 | spin_lock(&anon_vma->lock); | ||
80 | 115 | ||
81 | /* page_table_lock to protect against threads */ | 116 | /* page_table_lock to protect against threads */ |
82 | spin_lock(&mm->page_table_lock); | 117 | spin_lock(&mm->page_table_lock); |
@@ -87,8 +122,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
87 | } | 122 | } |
88 | spin_unlock(&mm->page_table_lock); | 123 | spin_unlock(&mm->page_table_lock); |
89 | 124 | ||
90 | if (locked) | 125 | spin_unlock(&anon_vma->lock); |
91 | spin_unlock(&locked->lock); | ||
92 | if (unlikely(allocated)) | 126 | if (unlikely(allocated)) |
93 | anon_vma_free(allocated); | 127 | anon_vma_free(allocated); |
94 | } | 128 | } |
@@ -157,7 +191,7 @@ void __init anon_vma_init(void) | |||
157 | * Getting a lock on a stable anon_vma from a page off the LRU is | 191 | * Getting a lock on a stable anon_vma from a page off the LRU is |
158 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. | 192 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. |
159 | */ | 193 | */ |
160 | static struct anon_vma *page_lock_anon_vma(struct page *page) | 194 | struct anon_vma *page_lock_anon_vma(struct page *page) |
161 | { | 195 | { |
162 | struct anon_vma *anon_vma; | 196 | struct anon_vma *anon_vma; |
163 | unsigned long anon_mapping; | 197 | unsigned long anon_mapping; |
@@ -177,7 +211,7 @@ out: | |||
177 | return NULL; | 211 | return NULL; |
178 | } | 212 | } |
179 | 213 | ||
180 | static void page_unlock_anon_vma(struct anon_vma *anon_vma) | 214 | void page_unlock_anon_vma(struct anon_vma *anon_vma) |
181 | { | 215 | { |
182 | spin_unlock(&anon_vma->lock); | 216 | spin_unlock(&anon_vma->lock); |
183 | rcu_read_unlock(); | 217 | rcu_read_unlock(); |
@@ -268,6 +302,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
268 | return NULL; | 302 | return NULL; |
269 | } | 303 | } |
270 | 304 | ||
305 | /** | ||
306 | * page_mapped_in_vma - check whether a page is really mapped in a VMA | ||
307 | * @page: the page to test | ||
308 | * @vma: the VMA to test | ||
309 | * | ||
310 | * Returns 1 if the page is mapped into the page tables of the VMA, 0 | ||
311 | * if the page is not mapped into the page tables of this VMA. Only | ||
312 | * valid for normal file or anonymous VMAs. | ||
313 | */ | ||
314 | static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | ||
315 | { | ||
316 | unsigned long address; | ||
317 | pte_t *pte; | ||
318 | spinlock_t *ptl; | ||
319 | |||
320 | address = vma_address(page, vma); | ||
321 | if (address == -EFAULT) /* out of vma range */ | ||
322 | return 0; | ||
323 | pte = page_check_address(page, vma->vm_mm, address, &ptl, 1); | ||
324 | if (!pte) /* the page is not in this mm */ | ||
325 | return 0; | ||
326 | pte_unmap_unlock(pte, ptl); | ||
327 | |||
328 | return 1; | ||
329 | } | ||
330 | |||
271 | /* | 331 | /* |
272 | * Subfunctions of page_referenced: page_referenced_one called | 332 | * Subfunctions of page_referenced: page_referenced_one called |
273 | * repeatedly from either page_referenced_anon or page_referenced_file. | 333 | * repeatedly from either page_referenced_anon or page_referenced_file. |
@@ -289,10 +349,17 @@ static int page_referenced_one(struct page *page, | |||
289 | if (!pte) | 349 | if (!pte) |
290 | goto out; | 350 | goto out; |
291 | 351 | ||
352 | /* | ||
353 | * Don't want to elevate referenced for mlocked page that gets this far, | ||
354 | * in order that it progresses to try_to_unmap and is moved to the | ||
355 | * unevictable list. | ||
356 | */ | ||
292 | if (vma->vm_flags & VM_LOCKED) { | 357 | if (vma->vm_flags & VM_LOCKED) { |
293 | referenced++; | ||
294 | *mapcount = 1; /* break early from loop */ | 358 | *mapcount = 1; /* break early from loop */ |
295 | } else if (ptep_clear_flush_young_notify(vma, address, pte)) | 359 | goto out_unmap; |
360 | } | ||
361 | |||
362 | if (ptep_clear_flush_young_notify(vma, address, pte)) | ||
296 | referenced++; | 363 | referenced++; |
297 | 364 | ||
298 | /* Pretend the page is referenced if the task has the | 365 | /* Pretend the page is referenced if the task has the |
@@ -301,6 +368,7 @@ static int page_referenced_one(struct page *page, | |||
301 | rwsem_is_locked(&mm->mmap_sem)) | 368 | rwsem_is_locked(&mm->mmap_sem)) |
302 | referenced++; | 369 | referenced++; |
303 | 370 | ||
371 | out_unmap: | ||
304 | (*mapcount)--; | 372 | (*mapcount)--; |
305 | pte_unmap_unlock(pte, ptl); | 373 | pte_unmap_unlock(pte, ptl); |
306 | out: | 374 | out: |
@@ -390,11 +458,6 @@ static int page_referenced_file(struct page *page, | |||
390 | */ | 458 | */ |
391 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) | 459 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
392 | continue; | 460 | continue; |
393 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) | ||
394 | == (VM_LOCKED|VM_MAYSHARE)) { | ||
395 | referenced++; | ||
396 | break; | ||
397 | } | ||
398 | referenced += page_referenced_one(page, vma, &mapcount); | 461 | referenced += page_referenced_one(page, vma, &mapcount); |
399 | if (!mapcount) | 462 | if (!mapcount) |
400 | break; | 463 | break; |
@@ -674,8 +737,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) | |||
674 | page_clear_dirty(page); | 737 | page_clear_dirty(page); |
675 | set_page_dirty(page); | 738 | set_page_dirty(page); |
676 | } | 739 | } |
677 | 740 | if (PageAnon(page)) | |
678 | mem_cgroup_uncharge_page(page); | 741 | mem_cgroup_uncharge_page(page); |
679 | __dec_zone_page_state(page, | 742 | __dec_zone_page_state(page, |
680 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); | 743 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); |
681 | /* | 744 | /* |
@@ -717,11 +780,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
717 | * If it's recently referenced (perhaps page_referenced | 780 | * If it's recently referenced (perhaps page_referenced |
718 | * skipped over this mm) then we should reactivate it. | 781 | * skipped over this mm) then we should reactivate it. |
719 | */ | 782 | */ |
720 | if (!migration && ((vma->vm_flags & VM_LOCKED) || | 783 | if (!migration) { |
721 | (ptep_clear_flush_young_notify(vma, address, pte)))) { | 784 | if (vma->vm_flags & VM_LOCKED) { |
722 | ret = SWAP_FAIL; | 785 | ret = SWAP_MLOCK; |
723 | goto out_unmap; | 786 | goto out_unmap; |
724 | } | 787 | } |
788 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | ||
789 | ret = SWAP_FAIL; | ||
790 | goto out_unmap; | ||
791 | } | ||
792 | } | ||
725 | 793 | ||
726 | /* Nuke the page table entry. */ | 794 | /* Nuke the page table entry. */ |
727 | flush_cache_page(vma, address, page_to_pfn(page)); | 795 | flush_cache_page(vma, address, page_to_pfn(page)); |
@@ -802,12 +870,17 @@ out: | |||
802 | * For very sparsely populated VMAs this is a little inefficient - chances are | 870 | * For very sparsely populated VMAs this is a little inefficient - chances are |
803 | * there there won't be many ptes located within the scan cluster. In this case | 871 | * there there won't be many ptes located within the scan cluster. In this case |
804 | * maybe we could scan further - to the end of the pte page, perhaps. | 872 | * maybe we could scan further - to the end of the pte page, perhaps. |
873 | * | ||
874 | * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can | ||
875 | * acquire it without blocking. If vma locked, mlock the pages in the cluster, | ||
876 | * rather than unmapping them. If we encounter the "check_page" that vmscan is | ||
877 | * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN. | ||
805 | */ | 878 | */ |
806 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) | 879 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) |
807 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) | 880 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) |
808 | 881 | ||
809 | static void try_to_unmap_cluster(unsigned long cursor, | 882 | static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, |
810 | unsigned int *mapcount, struct vm_area_struct *vma) | 883 | struct vm_area_struct *vma, struct page *check_page) |
811 | { | 884 | { |
812 | struct mm_struct *mm = vma->vm_mm; | 885 | struct mm_struct *mm = vma->vm_mm; |
813 | pgd_t *pgd; | 886 | pgd_t *pgd; |
@@ -819,6 +892,8 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
819 | struct page *page; | 892 | struct page *page; |
820 | unsigned long address; | 893 | unsigned long address; |
821 | unsigned long end; | 894 | unsigned long end; |
895 | int ret = SWAP_AGAIN; | ||
896 | int locked_vma = 0; | ||
822 | 897 | ||
823 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 898 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
824 | end = address + CLUSTER_SIZE; | 899 | end = address + CLUSTER_SIZE; |
@@ -829,15 +904,26 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
829 | 904 | ||
830 | pgd = pgd_offset(mm, address); | 905 | pgd = pgd_offset(mm, address); |
831 | if (!pgd_present(*pgd)) | 906 | if (!pgd_present(*pgd)) |
832 | return; | 907 | return ret; |
833 | 908 | ||
834 | pud = pud_offset(pgd, address); | 909 | pud = pud_offset(pgd, address); |
835 | if (!pud_present(*pud)) | 910 | if (!pud_present(*pud)) |
836 | return; | 911 | return ret; |
837 | 912 | ||
838 | pmd = pmd_offset(pud, address); | 913 | pmd = pmd_offset(pud, address); |
839 | if (!pmd_present(*pmd)) | 914 | if (!pmd_present(*pmd)) |
840 | return; | 915 | return ret; |
916 | |||
917 | /* | ||
918 | * MLOCK_PAGES => feature is configured. | ||
919 | * if we can acquire the mmap_sem for read, and vma is VM_LOCKED, | ||
920 | * keep the sem while scanning the cluster for mlocking pages. | ||
921 | */ | ||
922 | if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
923 | locked_vma = (vma->vm_flags & VM_LOCKED); | ||
924 | if (!locked_vma) | ||
925 | up_read(&vma->vm_mm->mmap_sem); /* don't need it */ | ||
926 | } | ||
841 | 927 | ||
842 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 928 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
843 | 929 | ||
@@ -850,6 +936,13 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
850 | page = vm_normal_page(vma, address, *pte); | 936 | page = vm_normal_page(vma, address, *pte); |
851 | BUG_ON(!page || PageAnon(page)); | 937 | BUG_ON(!page || PageAnon(page)); |
852 | 938 | ||
939 | if (locked_vma) { | ||
940 | mlock_vma_page(page); /* no-op if already mlocked */ | ||
941 | if (page == check_page) | ||
942 | ret = SWAP_MLOCK; | ||
943 | continue; /* don't unmap */ | ||
944 | } | ||
945 | |||
853 | if (ptep_clear_flush_young_notify(vma, address, pte)) | 946 | if (ptep_clear_flush_young_notify(vma, address, pte)) |
854 | continue; | 947 | continue; |
855 | 948 | ||
@@ -871,39 +964,104 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
871 | (*mapcount)--; | 964 | (*mapcount)--; |
872 | } | 965 | } |
873 | pte_unmap_unlock(pte - 1, ptl); | 966 | pte_unmap_unlock(pte - 1, ptl); |
967 | if (locked_vma) | ||
968 | up_read(&vma->vm_mm->mmap_sem); | ||
969 | return ret; | ||
874 | } | 970 | } |
875 | 971 | ||
876 | static int try_to_unmap_anon(struct page *page, int migration) | 972 | /* |
973 | * common handling for pages mapped in VM_LOCKED vmas | ||
974 | */ | ||
975 | static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) | ||
976 | { | ||
977 | int mlocked = 0; | ||
978 | |||
979 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
980 | if (vma->vm_flags & VM_LOCKED) { | ||
981 | mlock_vma_page(page); | ||
982 | mlocked++; /* really mlocked the page */ | ||
983 | } | ||
984 | up_read(&vma->vm_mm->mmap_sem); | ||
985 | } | ||
986 | return mlocked; | ||
987 | } | ||
988 | |||
989 | /** | ||
990 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based | ||
991 | * rmap method | ||
992 | * @page: the page to unmap/unlock | ||
993 | * @unlock: request for unlock rather than unmap [unlikely] | ||
994 | * @migration: unmapping for migration - ignored if @unlock | ||
995 | * | ||
996 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
997 | * contained in the anon_vma struct it points to. | ||
998 | * | ||
999 | * This function is only called from try_to_unmap/try_to_munlock for | ||
1000 | * anonymous pages. | ||
1001 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1002 | * where the page was found will be held for write. So, we won't recheck | ||
1003 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1004 | * 'LOCKED. | ||
1005 | */ | ||
1006 | static int try_to_unmap_anon(struct page *page, int unlock, int migration) | ||
877 | { | 1007 | { |
878 | struct anon_vma *anon_vma; | 1008 | struct anon_vma *anon_vma; |
879 | struct vm_area_struct *vma; | 1009 | struct vm_area_struct *vma; |
1010 | unsigned int mlocked = 0; | ||
880 | int ret = SWAP_AGAIN; | 1011 | int ret = SWAP_AGAIN; |
881 | 1012 | ||
1013 | if (MLOCK_PAGES && unlikely(unlock)) | ||
1014 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
1015 | |||
882 | anon_vma = page_lock_anon_vma(page); | 1016 | anon_vma = page_lock_anon_vma(page); |
883 | if (!anon_vma) | 1017 | if (!anon_vma) |
884 | return ret; | 1018 | return ret; |
885 | 1019 | ||
886 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1020 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
887 | ret = try_to_unmap_one(page, vma, migration); | 1021 | if (MLOCK_PAGES && unlikely(unlock)) { |
888 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1022 | if (!((vma->vm_flags & VM_LOCKED) && |
889 | break; | 1023 | page_mapped_in_vma(page, vma))) |
1024 | continue; /* must visit all unlocked vmas */ | ||
1025 | ret = SWAP_MLOCK; /* saw at least one mlocked vma */ | ||
1026 | } else { | ||
1027 | ret = try_to_unmap_one(page, vma, migration); | ||
1028 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
1029 | break; | ||
1030 | } | ||
1031 | if (ret == SWAP_MLOCK) { | ||
1032 | mlocked = try_to_mlock_page(page, vma); | ||
1033 | if (mlocked) | ||
1034 | break; /* stop if actually mlocked page */ | ||
1035 | } | ||
890 | } | 1036 | } |
891 | 1037 | ||
892 | page_unlock_anon_vma(anon_vma); | 1038 | page_unlock_anon_vma(anon_vma); |
1039 | |||
1040 | if (mlocked) | ||
1041 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
1042 | else if (ret == SWAP_MLOCK) | ||
1043 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
1044 | |||
893 | return ret; | 1045 | return ret; |
894 | } | 1046 | } |
895 | 1047 | ||
896 | /** | 1048 | /** |
897 | * try_to_unmap_file - unmap file page using the object-based rmap method | 1049 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method |
898 | * @page: the page to unmap | 1050 | * @page: the page to unmap/unlock |
899 | * @migration: migration flag | 1051 | * @unlock: request for unlock rather than unmap [unlikely] |
1052 | * @migration: unmapping for migration - ignored if @unlock | ||
900 | * | 1053 | * |
901 | * Find all the mappings of a page using the mapping pointer and the vma chains | 1054 | * Find all the mappings of a page using the mapping pointer and the vma chains |
902 | * contained in the address_space struct it points to. | 1055 | * contained in the address_space struct it points to. |
903 | * | 1056 | * |
904 | * This function is only called from try_to_unmap for object-based pages. | 1057 | * This function is only called from try_to_unmap/try_to_munlock for |
1058 | * object-based pages. | ||
1059 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1060 | * where the page was found will be held for write. So, we won't recheck | ||
1061 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1062 | * 'LOCKED. | ||
905 | */ | 1063 | */ |
906 | static int try_to_unmap_file(struct page *page, int migration) | 1064 | static int try_to_unmap_file(struct page *page, int unlock, int migration) |
907 | { | 1065 | { |
908 | struct address_space *mapping = page->mapping; | 1066 | struct address_space *mapping = page->mapping; |
909 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1067 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
@@ -914,20 +1072,44 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
914 | unsigned long max_nl_cursor = 0; | 1072 | unsigned long max_nl_cursor = 0; |
915 | unsigned long max_nl_size = 0; | 1073 | unsigned long max_nl_size = 0; |
916 | unsigned int mapcount; | 1074 | unsigned int mapcount; |
1075 | unsigned int mlocked = 0; | ||
1076 | |||
1077 | if (MLOCK_PAGES && unlikely(unlock)) | ||
1078 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
917 | 1079 | ||
918 | spin_lock(&mapping->i_mmap_lock); | 1080 | spin_lock(&mapping->i_mmap_lock); |
919 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 1081 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
920 | ret = try_to_unmap_one(page, vma, migration); | 1082 | if (MLOCK_PAGES && unlikely(unlock)) { |
921 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1083 | if (!(vma->vm_flags & VM_LOCKED)) |
922 | goto out; | 1084 | continue; /* must visit all vmas */ |
1085 | ret = SWAP_MLOCK; | ||
1086 | } else { | ||
1087 | ret = try_to_unmap_one(page, vma, migration); | ||
1088 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
1089 | goto out; | ||
1090 | } | ||
1091 | if (ret == SWAP_MLOCK) { | ||
1092 | mlocked = try_to_mlock_page(page, vma); | ||
1093 | if (mlocked) | ||
1094 | break; /* stop if actually mlocked page */ | ||
1095 | } | ||
923 | } | 1096 | } |
924 | 1097 | ||
1098 | if (mlocked) | ||
1099 | goto out; | ||
1100 | |||
925 | if (list_empty(&mapping->i_mmap_nonlinear)) | 1101 | if (list_empty(&mapping->i_mmap_nonlinear)) |
926 | goto out; | 1102 | goto out; |
927 | 1103 | ||
928 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1104 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
929 | shared.vm_set.list) { | 1105 | shared.vm_set.list) { |
930 | if ((vma->vm_flags & VM_LOCKED) && !migration) | 1106 | if (MLOCK_PAGES && unlikely(unlock)) { |
1107 | if (!(vma->vm_flags & VM_LOCKED)) | ||
1108 | continue; /* must visit all vmas */ | ||
1109 | ret = SWAP_MLOCK; /* leave mlocked == 0 */ | ||
1110 | goto out; /* no need to look further */ | ||
1111 | } | ||
1112 | if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) | ||
931 | continue; | 1113 | continue; |
932 | cursor = (unsigned long) vma->vm_private_data; | 1114 | cursor = (unsigned long) vma->vm_private_data; |
933 | if (cursor > max_nl_cursor) | 1115 | if (cursor > max_nl_cursor) |
@@ -937,7 +1119,7 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
937 | max_nl_size = cursor; | 1119 | max_nl_size = cursor; |
938 | } | 1120 | } |
939 | 1121 | ||
940 | if (max_nl_size == 0) { /* any nonlinears locked or reserved */ | 1122 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ |
941 | ret = SWAP_FAIL; | 1123 | ret = SWAP_FAIL; |
942 | goto out; | 1124 | goto out; |
943 | } | 1125 | } |
@@ -961,12 +1143,16 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
961 | do { | 1143 | do { |
962 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1144 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
963 | shared.vm_set.list) { | 1145 | shared.vm_set.list) { |
964 | if ((vma->vm_flags & VM_LOCKED) && !migration) | 1146 | if (!MLOCK_PAGES && !migration && |
1147 | (vma->vm_flags & VM_LOCKED)) | ||
965 | continue; | 1148 | continue; |
966 | cursor = (unsigned long) vma->vm_private_data; | 1149 | cursor = (unsigned long) vma->vm_private_data; |
967 | while ( cursor < max_nl_cursor && | 1150 | while ( cursor < max_nl_cursor && |
968 | cursor < vma->vm_end - vma->vm_start) { | 1151 | cursor < vma->vm_end - vma->vm_start) { |
969 | try_to_unmap_cluster(cursor, &mapcount, vma); | 1152 | ret = try_to_unmap_cluster(cursor, &mapcount, |
1153 | vma, page); | ||
1154 | if (ret == SWAP_MLOCK) | ||
1155 | mlocked = 2; /* to return below */ | ||
970 | cursor += CLUSTER_SIZE; | 1156 | cursor += CLUSTER_SIZE; |
971 | vma->vm_private_data = (void *) cursor; | 1157 | vma->vm_private_data = (void *) cursor; |
972 | if ((int)mapcount <= 0) | 1158 | if ((int)mapcount <= 0) |
@@ -987,6 +1173,10 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
987 | vma->vm_private_data = NULL; | 1173 | vma->vm_private_data = NULL; |
988 | out: | 1174 | out: |
989 | spin_unlock(&mapping->i_mmap_lock); | 1175 | spin_unlock(&mapping->i_mmap_lock); |
1176 | if (mlocked) | ||
1177 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
1178 | else if (ret == SWAP_MLOCK) | ||
1179 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
990 | return ret; | 1180 | return ret; |
991 | } | 1181 | } |
992 | 1182 | ||
@@ -1002,6 +1192,7 @@ out: | |||
1002 | * SWAP_SUCCESS - we succeeded in removing all mappings | 1192 | * SWAP_SUCCESS - we succeeded in removing all mappings |
1003 | * SWAP_AGAIN - we missed a mapping, try again later | 1193 | * SWAP_AGAIN - we missed a mapping, try again later |
1004 | * SWAP_FAIL - the page is unswappable | 1194 | * SWAP_FAIL - the page is unswappable |
1195 | * SWAP_MLOCK - page is mlocked. | ||
1005 | */ | 1196 | */ |
1006 | int try_to_unmap(struct page *page, int migration) | 1197 | int try_to_unmap(struct page *page, int migration) |
1007 | { | 1198 | { |
@@ -1010,12 +1201,36 @@ int try_to_unmap(struct page *page, int migration) | |||
1010 | BUG_ON(!PageLocked(page)); | 1201 | BUG_ON(!PageLocked(page)); |
1011 | 1202 | ||
1012 | if (PageAnon(page)) | 1203 | if (PageAnon(page)) |
1013 | ret = try_to_unmap_anon(page, migration); | 1204 | ret = try_to_unmap_anon(page, 0, migration); |
1014 | else | 1205 | else |
1015 | ret = try_to_unmap_file(page, migration); | 1206 | ret = try_to_unmap_file(page, 0, migration); |
1016 | 1207 | if (ret != SWAP_MLOCK && !page_mapped(page)) | |
1017 | if (!page_mapped(page)) | ||
1018 | ret = SWAP_SUCCESS; | 1208 | ret = SWAP_SUCCESS; |
1019 | return ret; | 1209 | return ret; |
1020 | } | 1210 | } |
1021 | 1211 | ||
1212 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
1213 | /** | ||
1214 | * try_to_munlock - try to munlock a page | ||
1215 | * @page: the page to be munlocked | ||
1216 | * | ||
1217 | * Called from munlock code. Checks all of the VMAs mapping the page | ||
1218 | * to make sure nobody else has this page mlocked. The page will be | ||
1219 | * returned with PG_mlocked cleared if no other vmas have it mlocked. | ||
1220 | * | ||
1221 | * Return values are: | ||
1222 | * | ||
1223 | * SWAP_SUCCESS - no vma's holding page mlocked. | ||
1224 | * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem | ||
1225 | * SWAP_MLOCK - page is now mlocked. | ||
1226 | */ | ||
1227 | int try_to_munlock(struct page *page) | ||
1228 | { | ||
1229 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | ||
1230 | |||
1231 | if (PageAnon(page)) | ||
1232 | return try_to_unmap_anon(page, 1, 0); | ||
1233 | else | ||
1234 | return try_to_unmap_file(page, 1, 0); | ||
1235 | } | ||
1236 | #endif | ||