diff options
Diffstat (limited to 'mm/rmap.c')
| -rw-r--r-- | mm/rmap.c | 319 |
1 files changed, 267 insertions, 52 deletions
| @@ -53,9 +53,47 @@ | |||
| 53 | 53 | ||
| 54 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
| 55 | 55 | ||
| 56 | struct kmem_cache *anon_vma_cachep; | 56 | #include "internal.h" |
| 57 | 57 | ||
| 58 | /* This must be called under the mmap_sem. */ | 58 | static struct kmem_cache *anon_vma_cachep; |
| 59 | |||
| 60 | static inline struct anon_vma *anon_vma_alloc(void) | ||
| 61 | { | ||
| 62 | return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void anon_vma_free(struct anon_vma *anon_vma) | ||
| 66 | { | ||
| 67 | kmem_cache_free(anon_vma_cachep, anon_vma); | ||
| 68 | } | ||
| 69 | |||
| 70 | /** | ||
| 71 | * anon_vma_prepare - attach an anon_vma to a memory region | ||
| 72 | * @vma: the memory region in question | ||
| 73 | * | ||
| 74 | * This makes sure the memory mapping described by 'vma' has | ||
| 75 | * an 'anon_vma' attached to it, so that we can associate the | ||
| 76 | * anonymous pages mapped into it with that anon_vma. | ||
| 77 | * | ||
| 78 | * The common case will be that we already have one, but if | ||
| 79 | * if not we either need to find an adjacent mapping that we | ||
| 80 | * can re-use the anon_vma from (very common when the only | ||
| 81 | * reason for splitting a vma has been mprotect()), or we | ||
| 82 | * allocate a new one. | ||
| 83 | * | ||
| 84 | * Anon-vma allocations are very subtle, because we may have | ||
| 85 | * optimistically looked up an anon_vma in page_lock_anon_vma() | ||
| 86 | * and that may actually touch the spinlock even in the newly | ||
| 87 | * allocated vma (it depends on RCU to make sure that the | ||
| 88 | * anon_vma isn't actually destroyed). | ||
| 89 | * | ||
| 90 | * As a result, we need to do proper anon_vma locking even | ||
| 91 | * for the new allocation. At the same time, we do not want | ||
| 92 | * to do any locking for the common case of already having | ||
| 93 | * an anon_vma. | ||
| 94 | * | ||
| 95 | * This must be called with the mmap_sem held for reading. | ||
| 96 | */ | ||
| 59 | int anon_vma_prepare(struct vm_area_struct *vma) | 97 | int anon_vma_prepare(struct vm_area_struct *vma) |
| 60 | { | 98 | { |
| 61 | struct anon_vma *anon_vma = vma->anon_vma; | 99 | struct anon_vma *anon_vma = vma->anon_vma; |
| @@ -63,20 +101,17 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
| 63 | might_sleep(); | 101 | might_sleep(); |
| 64 | if (unlikely(!anon_vma)) { | 102 | if (unlikely(!anon_vma)) { |
| 65 | struct mm_struct *mm = vma->vm_mm; | 103 | struct mm_struct *mm = vma->vm_mm; |
| 66 | struct anon_vma *allocated, *locked; | 104 | struct anon_vma *allocated; |
| 67 | 105 | ||
| 68 | anon_vma = find_mergeable_anon_vma(vma); | 106 | anon_vma = find_mergeable_anon_vma(vma); |
| 69 | if (anon_vma) { | 107 | allocated = NULL; |
| 70 | allocated = NULL; | 108 | if (!anon_vma) { |
| 71 | locked = anon_vma; | ||
| 72 | spin_lock(&locked->lock); | ||
| 73 | } else { | ||
| 74 | anon_vma = anon_vma_alloc(); | 109 | anon_vma = anon_vma_alloc(); |
| 75 | if (unlikely(!anon_vma)) | 110 | if (unlikely(!anon_vma)) |
| 76 | return -ENOMEM; | 111 | return -ENOMEM; |
| 77 | allocated = anon_vma; | 112 | allocated = anon_vma; |
| 78 | locked = NULL; | ||
| 79 | } | 113 | } |
| 114 | spin_lock(&anon_vma->lock); | ||
| 80 | 115 | ||
| 81 | /* page_table_lock to protect against threads */ | 116 | /* page_table_lock to protect against threads */ |
| 82 | spin_lock(&mm->page_table_lock); | 117 | spin_lock(&mm->page_table_lock); |
| @@ -87,8 +122,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
| 87 | } | 122 | } |
| 88 | spin_unlock(&mm->page_table_lock); | 123 | spin_unlock(&mm->page_table_lock); |
| 89 | 124 | ||
| 90 | if (locked) | 125 | spin_unlock(&anon_vma->lock); |
| 91 | spin_unlock(&locked->lock); | ||
| 92 | if (unlikely(allocated)) | 126 | if (unlikely(allocated)) |
| 93 | anon_vma_free(allocated); | 127 | anon_vma_free(allocated); |
| 94 | } | 128 | } |
| @@ -157,7 +191,7 @@ void __init anon_vma_init(void) | |||
| 157 | * Getting a lock on a stable anon_vma from a page off the LRU is | 191 | * Getting a lock on a stable anon_vma from a page off the LRU is |
| 158 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. | 192 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. |
| 159 | */ | 193 | */ |
| 160 | static struct anon_vma *page_lock_anon_vma(struct page *page) | 194 | struct anon_vma *page_lock_anon_vma(struct page *page) |
| 161 | { | 195 | { |
| 162 | struct anon_vma *anon_vma; | 196 | struct anon_vma *anon_vma; |
| 163 | unsigned long anon_mapping; | 197 | unsigned long anon_mapping; |
| @@ -177,7 +211,7 @@ out: | |||
| 177 | return NULL; | 211 | return NULL; |
| 178 | } | 212 | } |
| 179 | 213 | ||
| 180 | static void page_unlock_anon_vma(struct anon_vma *anon_vma) | 214 | void page_unlock_anon_vma(struct anon_vma *anon_vma) |
| 181 | { | 215 | { |
| 182 | spin_unlock(&anon_vma->lock); | 216 | spin_unlock(&anon_vma->lock); |
| 183 | rcu_read_unlock(); | 217 | rcu_read_unlock(); |
| @@ -268,6 +302,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
| 268 | return NULL; | 302 | return NULL; |
| 269 | } | 303 | } |
| 270 | 304 | ||
| 305 | /** | ||
| 306 | * page_mapped_in_vma - check whether a page is really mapped in a VMA | ||
| 307 | * @page: the page to test | ||
| 308 | * @vma: the VMA to test | ||
| 309 | * | ||
| 310 | * Returns 1 if the page is mapped into the page tables of the VMA, 0 | ||
| 311 | * if the page is not mapped into the page tables of this VMA. Only | ||
| 312 | * valid for normal file or anonymous VMAs. | ||
| 313 | */ | ||
| 314 | static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | ||
| 315 | { | ||
| 316 | unsigned long address; | ||
| 317 | pte_t *pte; | ||
| 318 | spinlock_t *ptl; | ||
| 319 | |||
| 320 | address = vma_address(page, vma); | ||
| 321 | if (address == -EFAULT) /* out of vma range */ | ||
| 322 | return 0; | ||
| 323 | pte = page_check_address(page, vma->vm_mm, address, &ptl, 1); | ||
| 324 | if (!pte) /* the page is not in this mm */ | ||
| 325 | return 0; | ||
| 326 | pte_unmap_unlock(pte, ptl); | ||
| 327 | |||
| 328 | return 1; | ||
| 329 | } | ||
| 330 | |||
| 271 | /* | 331 | /* |
| 272 | * Subfunctions of page_referenced: page_referenced_one called | 332 | * Subfunctions of page_referenced: page_referenced_one called |
| 273 | * repeatedly from either page_referenced_anon or page_referenced_file. | 333 | * repeatedly from either page_referenced_anon or page_referenced_file. |
| @@ -289,10 +349,17 @@ static int page_referenced_one(struct page *page, | |||
| 289 | if (!pte) | 349 | if (!pte) |
| 290 | goto out; | 350 | goto out; |
| 291 | 351 | ||
| 352 | /* | ||
| 353 | * Don't want to elevate referenced for mlocked page that gets this far, | ||
| 354 | * in order that it progresses to try_to_unmap and is moved to the | ||
| 355 | * unevictable list. | ||
| 356 | */ | ||
| 292 | if (vma->vm_flags & VM_LOCKED) { | 357 | if (vma->vm_flags & VM_LOCKED) { |
| 293 | referenced++; | ||
| 294 | *mapcount = 1; /* break early from loop */ | 358 | *mapcount = 1; /* break early from loop */ |
| 295 | } else if (ptep_clear_flush_young_notify(vma, address, pte)) | 359 | goto out_unmap; |
| 360 | } | ||
| 361 | |||
| 362 | if (ptep_clear_flush_young_notify(vma, address, pte)) | ||
| 296 | referenced++; | 363 | referenced++; |
| 297 | 364 | ||
| 298 | /* Pretend the page is referenced if the task has the | 365 | /* Pretend the page is referenced if the task has the |
| @@ -301,6 +368,7 @@ static int page_referenced_one(struct page *page, | |||
| 301 | rwsem_is_locked(&mm->mmap_sem)) | 368 | rwsem_is_locked(&mm->mmap_sem)) |
| 302 | referenced++; | 369 | referenced++; |
| 303 | 370 | ||
| 371 | out_unmap: | ||
| 304 | (*mapcount)--; | 372 | (*mapcount)--; |
| 305 | pte_unmap_unlock(pte, ptl); | 373 | pte_unmap_unlock(pte, ptl); |
| 306 | out: | 374 | out: |
| @@ -390,11 +458,6 @@ static int page_referenced_file(struct page *page, | |||
| 390 | */ | 458 | */ |
| 391 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) | 459 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
| 392 | continue; | 460 | continue; |
| 393 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) | ||
| 394 | == (VM_LOCKED|VM_MAYSHARE)) { | ||
| 395 | referenced++; | ||
| 396 | break; | ||
| 397 | } | ||
| 398 | referenced += page_referenced_one(page, vma, &mapcount); | 461 | referenced += page_referenced_one(page, vma, &mapcount); |
| 399 | if (!mapcount) | 462 | if (!mapcount) |
| 400 | break; | 463 | break; |
| @@ -674,8 +737,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) | |||
| 674 | page_clear_dirty(page); | 737 | page_clear_dirty(page); |
| 675 | set_page_dirty(page); | 738 | set_page_dirty(page); |
| 676 | } | 739 | } |
| 677 | 740 | if (PageAnon(page)) | |
| 678 | mem_cgroup_uncharge_page(page); | 741 | mem_cgroup_uncharge_page(page); |
| 679 | __dec_zone_page_state(page, | 742 | __dec_zone_page_state(page, |
| 680 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); | 743 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); |
| 681 | /* | 744 | /* |
| @@ -717,11 +780,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 717 | * If it's recently referenced (perhaps page_referenced | 780 | * If it's recently referenced (perhaps page_referenced |
| 718 | * skipped over this mm) then we should reactivate it. | 781 | * skipped over this mm) then we should reactivate it. |
| 719 | */ | 782 | */ |
| 720 | if (!migration && ((vma->vm_flags & VM_LOCKED) || | 783 | if (!migration) { |
| 721 | (ptep_clear_flush_young_notify(vma, address, pte)))) { | 784 | if (vma->vm_flags & VM_LOCKED) { |
| 722 | ret = SWAP_FAIL; | 785 | ret = SWAP_MLOCK; |
| 723 | goto out_unmap; | 786 | goto out_unmap; |
| 724 | } | 787 | } |
| 788 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | ||
| 789 | ret = SWAP_FAIL; | ||
| 790 | goto out_unmap; | ||
| 791 | } | ||
| 792 | } | ||
| 725 | 793 | ||
| 726 | /* Nuke the page table entry. */ | 794 | /* Nuke the page table entry. */ |
| 727 | flush_cache_page(vma, address, page_to_pfn(page)); | 795 | flush_cache_page(vma, address, page_to_pfn(page)); |
| @@ -802,12 +870,17 @@ out: | |||
| 802 | * For very sparsely populated VMAs this is a little inefficient - chances are | 870 | * For very sparsely populated VMAs this is a little inefficient - chances are |
| 803 | * there there won't be many ptes located within the scan cluster. In this case | 871 | * there there won't be many ptes located within the scan cluster. In this case |
| 804 | * maybe we could scan further - to the end of the pte page, perhaps. | 872 | * maybe we could scan further - to the end of the pte page, perhaps. |
| 873 | * | ||
| 874 | * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can | ||
| 875 | * acquire it without blocking. If vma locked, mlock the pages in the cluster, | ||
| 876 | * rather than unmapping them. If we encounter the "check_page" that vmscan is | ||
| 877 | * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN. | ||
| 805 | */ | 878 | */ |
| 806 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) | 879 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) |
| 807 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) | 880 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) |
| 808 | 881 | ||
| 809 | static void try_to_unmap_cluster(unsigned long cursor, | 882 | static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, |
| 810 | unsigned int *mapcount, struct vm_area_struct *vma) | 883 | struct vm_area_struct *vma, struct page *check_page) |
| 811 | { | 884 | { |
| 812 | struct mm_struct *mm = vma->vm_mm; | 885 | struct mm_struct *mm = vma->vm_mm; |
| 813 | pgd_t *pgd; | 886 | pgd_t *pgd; |
| @@ -819,6 +892,8 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 819 | struct page *page; | 892 | struct page *page; |
| 820 | unsigned long address; | 893 | unsigned long address; |
| 821 | unsigned long end; | 894 | unsigned long end; |
| 895 | int ret = SWAP_AGAIN; | ||
| 896 | int locked_vma = 0; | ||
| 822 | 897 | ||
| 823 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 898 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
| 824 | end = address + CLUSTER_SIZE; | 899 | end = address + CLUSTER_SIZE; |
| @@ -829,15 +904,26 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 829 | 904 | ||
| 830 | pgd = pgd_offset(mm, address); | 905 | pgd = pgd_offset(mm, address); |
| 831 | if (!pgd_present(*pgd)) | 906 | if (!pgd_present(*pgd)) |
| 832 | return; | 907 | return ret; |
| 833 | 908 | ||
| 834 | pud = pud_offset(pgd, address); | 909 | pud = pud_offset(pgd, address); |
| 835 | if (!pud_present(*pud)) | 910 | if (!pud_present(*pud)) |
| 836 | return; | 911 | return ret; |
| 837 | 912 | ||
| 838 | pmd = pmd_offset(pud, address); | 913 | pmd = pmd_offset(pud, address); |
| 839 | if (!pmd_present(*pmd)) | 914 | if (!pmd_present(*pmd)) |
| 840 | return; | 915 | return ret; |
| 916 | |||
| 917 | /* | ||
| 918 | * MLOCK_PAGES => feature is configured. | ||
| 919 | * if we can acquire the mmap_sem for read, and vma is VM_LOCKED, | ||
| 920 | * keep the sem while scanning the cluster for mlocking pages. | ||
| 921 | */ | ||
| 922 | if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
| 923 | locked_vma = (vma->vm_flags & VM_LOCKED); | ||
| 924 | if (!locked_vma) | ||
| 925 | up_read(&vma->vm_mm->mmap_sem); /* don't need it */ | ||
| 926 | } | ||
| 841 | 927 | ||
| 842 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 928 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
| 843 | 929 | ||
| @@ -850,6 +936,13 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 850 | page = vm_normal_page(vma, address, *pte); | 936 | page = vm_normal_page(vma, address, *pte); |
| 851 | BUG_ON(!page || PageAnon(page)); | 937 | BUG_ON(!page || PageAnon(page)); |
| 852 | 938 | ||
| 939 | if (locked_vma) { | ||
| 940 | mlock_vma_page(page); /* no-op if already mlocked */ | ||
| 941 | if (page == check_page) | ||
| 942 | ret = SWAP_MLOCK; | ||
| 943 | continue; /* don't unmap */ | ||
| 944 | } | ||
| 945 | |||
| 853 | if (ptep_clear_flush_young_notify(vma, address, pte)) | 946 | if (ptep_clear_flush_young_notify(vma, address, pte)) |
| 854 | continue; | 947 | continue; |
| 855 | 948 | ||
| @@ -871,39 +964,104 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
| 871 | (*mapcount)--; | 964 | (*mapcount)--; |
| 872 | } | 965 | } |
| 873 | pte_unmap_unlock(pte - 1, ptl); | 966 | pte_unmap_unlock(pte - 1, ptl); |
| 967 | if (locked_vma) | ||
| 968 | up_read(&vma->vm_mm->mmap_sem); | ||
| 969 | return ret; | ||
| 874 | } | 970 | } |
| 875 | 971 | ||
| 876 | static int try_to_unmap_anon(struct page *page, int migration) | 972 | /* |
| 973 | * common handling for pages mapped in VM_LOCKED vmas | ||
| 974 | */ | ||
| 975 | static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) | ||
| 976 | { | ||
| 977 | int mlocked = 0; | ||
| 978 | |||
| 979 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
| 980 | if (vma->vm_flags & VM_LOCKED) { | ||
| 981 | mlock_vma_page(page); | ||
| 982 | mlocked++; /* really mlocked the page */ | ||
| 983 | } | ||
| 984 | up_read(&vma->vm_mm->mmap_sem); | ||
| 985 | } | ||
| 986 | return mlocked; | ||
| 987 | } | ||
| 988 | |||
| 989 | /** | ||
| 990 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based | ||
| 991 | * rmap method | ||
| 992 | * @page: the page to unmap/unlock | ||
| 993 | * @unlock: request for unlock rather than unmap [unlikely] | ||
| 994 | * @migration: unmapping for migration - ignored if @unlock | ||
| 995 | * | ||
| 996 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
| 997 | * contained in the anon_vma struct it points to. | ||
| 998 | * | ||
| 999 | * This function is only called from try_to_unmap/try_to_munlock for | ||
| 1000 | * anonymous pages. | ||
| 1001 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
| 1002 | * where the page was found will be held for write. So, we won't recheck | ||
| 1003 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
| 1004 | * 'LOCKED. | ||
| 1005 | */ | ||
| 1006 | static int try_to_unmap_anon(struct page *page, int unlock, int migration) | ||
| 877 | { | 1007 | { |
| 878 | struct anon_vma *anon_vma; | 1008 | struct anon_vma *anon_vma; |
| 879 | struct vm_area_struct *vma; | 1009 | struct vm_area_struct *vma; |
| 1010 | unsigned int mlocked = 0; | ||
| 880 | int ret = SWAP_AGAIN; | 1011 | int ret = SWAP_AGAIN; |
| 881 | 1012 | ||
| 1013 | if (MLOCK_PAGES && unlikely(unlock)) | ||
| 1014 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
| 1015 | |||
| 882 | anon_vma = page_lock_anon_vma(page); | 1016 | anon_vma = page_lock_anon_vma(page); |
| 883 | if (!anon_vma) | 1017 | if (!anon_vma) |
| 884 | return ret; | 1018 | return ret; |
| 885 | 1019 | ||
| 886 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1020 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
| 887 | ret = try_to_unmap_one(page, vma, migration); | 1021 | if (MLOCK_PAGES && unlikely(unlock)) { |
| 888 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1022 | if (!((vma->vm_flags & VM_LOCKED) && |
| 889 | break; | 1023 | page_mapped_in_vma(page, vma))) |
| 1024 | continue; /* must visit all unlocked vmas */ | ||
| 1025 | ret = SWAP_MLOCK; /* saw at least one mlocked vma */ | ||
| 1026 | } else { | ||
| 1027 | ret = try_to_unmap_one(page, vma, migration); | ||
| 1028 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
| 1029 | break; | ||
| 1030 | } | ||
| 1031 | if (ret == SWAP_MLOCK) { | ||
| 1032 | mlocked = try_to_mlock_page(page, vma); | ||
| 1033 | if (mlocked) | ||
| 1034 | break; /* stop if actually mlocked page */ | ||
| 1035 | } | ||
| 890 | } | 1036 | } |
| 891 | 1037 | ||
| 892 | page_unlock_anon_vma(anon_vma); | 1038 | page_unlock_anon_vma(anon_vma); |
| 1039 | |||
| 1040 | if (mlocked) | ||
| 1041 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
| 1042 | else if (ret == SWAP_MLOCK) | ||
| 1043 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
| 1044 | |||
| 893 | return ret; | 1045 | return ret; |
| 894 | } | 1046 | } |
| 895 | 1047 | ||
| 896 | /** | 1048 | /** |
| 897 | * try_to_unmap_file - unmap file page using the object-based rmap method | 1049 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method |
| 898 | * @page: the page to unmap | 1050 | * @page: the page to unmap/unlock |
| 899 | * @migration: migration flag | 1051 | * @unlock: request for unlock rather than unmap [unlikely] |
| 1052 | * @migration: unmapping for migration - ignored if @unlock | ||
| 900 | * | 1053 | * |
| 901 | * Find all the mappings of a page using the mapping pointer and the vma chains | 1054 | * Find all the mappings of a page using the mapping pointer and the vma chains |
| 902 | * contained in the address_space struct it points to. | 1055 | * contained in the address_space struct it points to. |
| 903 | * | 1056 | * |
| 904 | * This function is only called from try_to_unmap for object-based pages. | 1057 | * This function is only called from try_to_unmap/try_to_munlock for |
| 1058 | * object-based pages. | ||
| 1059 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
| 1060 | * where the page was found will be held for write. So, we won't recheck | ||
| 1061 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
| 1062 | * 'LOCKED. | ||
| 905 | */ | 1063 | */ |
| 906 | static int try_to_unmap_file(struct page *page, int migration) | 1064 | static int try_to_unmap_file(struct page *page, int unlock, int migration) |
| 907 | { | 1065 | { |
| 908 | struct address_space *mapping = page->mapping; | 1066 | struct address_space *mapping = page->mapping; |
| 909 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1067 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
| @@ -914,20 +1072,44 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
| 914 | unsigned long max_nl_cursor = 0; | 1072 | unsigned long max_nl_cursor = 0; |
| 915 | unsigned long max_nl_size = 0; | 1073 | unsigned long max_nl_size = 0; |
| 916 | unsigned int mapcount; | 1074 | unsigned int mapcount; |
| 1075 | unsigned int mlocked = 0; | ||
| 1076 | |||
| 1077 | if (MLOCK_PAGES && unlikely(unlock)) | ||
| 1078 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
| 917 | 1079 | ||
| 918 | spin_lock(&mapping->i_mmap_lock); | 1080 | spin_lock(&mapping->i_mmap_lock); |
| 919 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 1081 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
| 920 | ret = try_to_unmap_one(page, vma, migration); | 1082 | if (MLOCK_PAGES && unlikely(unlock)) { |
| 921 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1083 | if (!(vma->vm_flags & VM_LOCKED)) |
| 922 | goto out; | 1084 | continue; /* must visit all vmas */ |
| 1085 | ret = SWAP_MLOCK; | ||
| 1086 | } else { | ||
| 1087 | ret = try_to_unmap_one(page, vma, migration); | ||
| 1088 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
| 1089 | goto out; | ||
| 1090 | } | ||
| 1091 | if (ret == SWAP_MLOCK) { | ||
| 1092 | mlocked = try_to_mlock_page(page, vma); | ||
| 1093 | if (mlocked) | ||
| 1094 | break; /* stop if actually mlocked page */ | ||
| 1095 | } | ||
| 923 | } | 1096 | } |
| 924 | 1097 | ||
| 1098 | if (mlocked) | ||
| 1099 | goto out; | ||
| 1100 | |||
| 925 | if (list_empty(&mapping->i_mmap_nonlinear)) | 1101 | if (list_empty(&mapping->i_mmap_nonlinear)) |
| 926 | goto out; | 1102 | goto out; |
| 927 | 1103 | ||
| 928 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1104 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
| 929 | shared.vm_set.list) { | 1105 | shared.vm_set.list) { |
| 930 | if ((vma->vm_flags & VM_LOCKED) && !migration) | 1106 | if (MLOCK_PAGES && unlikely(unlock)) { |
| 1107 | if (!(vma->vm_flags & VM_LOCKED)) | ||
| 1108 | continue; /* must visit all vmas */ | ||
| 1109 | ret = SWAP_MLOCK; /* leave mlocked == 0 */ | ||
| 1110 | goto out; /* no need to look further */ | ||
| 1111 | } | ||
| 1112 | if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) | ||
| 931 | continue; | 1113 | continue; |
| 932 | cursor = (unsigned long) vma->vm_private_data; | 1114 | cursor = (unsigned long) vma->vm_private_data; |
| 933 | if (cursor > max_nl_cursor) | 1115 | if (cursor > max_nl_cursor) |
| @@ -937,7 +1119,7 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
| 937 | max_nl_size = cursor; | 1119 | max_nl_size = cursor; |
| 938 | } | 1120 | } |
| 939 | 1121 | ||
| 940 | if (max_nl_size == 0) { /* any nonlinears locked or reserved */ | 1122 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ |
| 941 | ret = SWAP_FAIL; | 1123 | ret = SWAP_FAIL; |
| 942 | goto out; | 1124 | goto out; |
| 943 | } | 1125 | } |
| @@ -961,12 +1143,16 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
| 961 | do { | 1143 | do { |
| 962 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1144 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
| 963 | shared.vm_set.list) { | 1145 | shared.vm_set.list) { |
| 964 | if ((vma->vm_flags & VM_LOCKED) && !migration) | 1146 | if (!MLOCK_PAGES && !migration && |
| 1147 | (vma->vm_flags & VM_LOCKED)) | ||
| 965 | continue; | 1148 | continue; |
| 966 | cursor = (unsigned long) vma->vm_private_data; | 1149 | cursor = (unsigned long) vma->vm_private_data; |
| 967 | while ( cursor < max_nl_cursor && | 1150 | while ( cursor < max_nl_cursor && |
| 968 | cursor < vma->vm_end - vma->vm_start) { | 1151 | cursor < vma->vm_end - vma->vm_start) { |
| 969 | try_to_unmap_cluster(cursor, &mapcount, vma); | 1152 | ret = try_to_unmap_cluster(cursor, &mapcount, |
| 1153 | vma, page); | ||
| 1154 | if (ret == SWAP_MLOCK) | ||
| 1155 | mlocked = 2; /* to return below */ | ||
| 970 | cursor += CLUSTER_SIZE; | 1156 | cursor += CLUSTER_SIZE; |
| 971 | vma->vm_private_data = (void *) cursor; | 1157 | vma->vm_private_data = (void *) cursor; |
| 972 | if ((int)mapcount <= 0) | 1158 | if ((int)mapcount <= 0) |
| @@ -987,6 +1173,10 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
| 987 | vma->vm_private_data = NULL; | 1173 | vma->vm_private_data = NULL; |
| 988 | out: | 1174 | out: |
| 989 | spin_unlock(&mapping->i_mmap_lock); | 1175 | spin_unlock(&mapping->i_mmap_lock); |
| 1176 | if (mlocked) | ||
| 1177 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
| 1178 | else if (ret == SWAP_MLOCK) | ||
| 1179 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
| 990 | return ret; | 1180 | return ret; |
| 991 | } | 1181 | } |
| 992 | 1182 | ||
| @@ -1002,6 +1192,7 @@ out: | |||
| 1002 | * SWAP_SUCCESS - we succeeded in removing all mappings | 1192 | * SWAP_SUCCESS - we succeeded in removing all mappings |
| 1003 | * SWAP_AGAIN - we missed a mapping, try again later | 1193 | * SWAP_AGAIN - we missed a mapping, try again later |
| 1004 | * SWAP_FAIL - the page is unswappable | 1194 | * SWAP_FAIL - the page is unswappable |
| 1195 | * SWAP_MLOCK - page is mlocked. | ||
| 1005 | */ | 1196 | */ |
| 1006 | int try_to_unmap(struct page *page, int migration) | 1197 | int try_to_unmap(struct page *page, int migration) |
| 1007 | { | 1198 | { |
| @@ -1010,12 +1201,36 @@ int try_to_unmap(struct page *page, int migration) | |||
| 1010 | BUG_ON(!PageLocked(page)); | 1201 | BUG_ON(!PageLocked(page)); |
| 1011 | 1202 | ||
| 1012 | if (PageAnon(page)) | 1203 | if (PageAnon(page)) |
| 1013 | ret = try_to_unmap_anon(page, migration); | 1204 | ret = try_to_unmap_anon(page, 0, migration); |
| 1014 | else | 1205 | else |
| 1015 | ret = try_to_unmap_file(page, migration); | 1206 | ret = try_to_unmap_file(page, 0, migration); |
| 1016 | 1207 | if (ret != SWAP_MLOCK && !page_mapped(page)) | |
| 1017 | if (!page_mapped(page)) | ||
| 1018 | ret = SWAP_SUCCESS; | 1208 | ret = SWAP_SUCCESS; |
| 1019 | return ret; | 1209 | return ret; |
| 1020 | } | 1210 | } |
| 1021 | 1211 | ||
| 1212 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 1213 | /** | ||
| 1214 | * try_to_munlock - try to munlock a page | ||
| 1215 | * @page: the page to be munlocked | ||
| 1216 | * | ||
| 1217 | * Called from munlock code. Checks all of the VMAs mapping the page | ||
| 1218 | * to make sure nobody else has this page mlocked. The page will be | ||
| 1219 | * returned with PG_mlocked cleared if no other vmas have it mlocked. | ||
| 1220 | * | ||
| 1221 | * Return values are: | ||
| 1222 | * | ||
| 1223 | * SWAP_SUCCESS - no vma's holding page mlocked. | ||
| 1224 | * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem | ||
| 1225 | * SWAP_MLOCK - page is now mlocked. | ||
| 1226 | */ | ||
| 1227 | int try_to_munlock(struct page *page) | ||
| 1228 | { | ||
| 1229 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | ||
| 1230 | |||
| 1231 | if (PageAnon(page)) | ||
| 1232 | return try_to_unmap_anon(page, 1, 0); | ||
| 1233 | else | ||
| 1234 | return try_to_unmap_file(page, 1, 0); | ||
| 1235 | } | ||
| 1236 | #endif | ||
