diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 140 |
1 files changed, 94 insertions, 46 deletions
diff --git a/mm/memory.c b/mm/memory.c index 1002f473f49..fc031d68327 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1129,12 +1129,17 @@ static inline int use_zero_page(struct vm_area_struct *vma) | |||
1129 | return !vma->vm_ops || !vma->vm_ops->fault; | 1129 | return !vma->vm_ops || !vma->vm_ops->fault; |
1130 | } | 1130 | } |
1131 | 1131 | ||
1132 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1132 | |
1133 | unsigned long start, int len, int write, int force, | 1133 | |
1134 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
1135 | unsigned long start, int len, int flags, | ||
1134 | struct page **pages, struct vm_area_struct **vmas) | 1136 | struct page **pages, struct vm_area_struct **vmas) |
1135 | { | 1137 | { |
1136 | int i; | 1138 | int i; |
1137 | unsigned int vm_flags; | 1139 | unsigned int vm_flags = 0; |
1140 | int write = !!(flags & GUP_FLAGS_WRITE); | ||
1141 | int force = !!(flags & GUP_FLAGS_FORCE); | ||
1142 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); | ||
1138 | 1143 | ||
1139 | if (len <= 0) | 1144 | if (len <= 0) |
1140 | return 0; | 1145 | return 0; |
@@ -1158,7 +1163,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1158 | pud_t *pud; | 1163 | pud_t *pud; |
1159 | pmd_t *pmd; | 1164 | pmd_t *pmd; |
1160 | pte_t *pte; | 1165 | pte_t *pte; |
1161 | if (write) /* user gate pages are read-only */ | 1166 | |
1167 | /* user gate pages are read-only */ | ||
1168 | if (!ignore && write) | ||
1162 | return i ? : -EFAULT; | 1169 | return i ? : -EFAULT; |
1163 | if (pg > TASK_SIZE) | 1170 | if (pg > TASK_SIZE) |
1164 | pgd = pgd_offset_k(pg); | 1171 | pgd = pgd_offset_k(pg); |
@@ -1190,8 +1197,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1190 | continue; | 1197 | continue; |
1191 | } | 1198 | } |
1192 | 1199 | ||
1193 | if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) | 1200 | if (!vma || |
1194 | || !(vm_flags & vma->vm_flags)) | 1201 | (vma->vm_flags & (VM_IO | VM_PFNMAP)) || |
1202 | (!ignore && !(vm_flags & vma->vm_flags))) | ||
1195 | return i ? : -EFAULT; | 1203 | return i ? : -EFAULT; |
1196 | 1204 | ||
1197 | if (is_vm_hugetlb_page(vma)) { | 1205 | if (is_vm_hugetlb_page(vma)) { |
@@ -1266,6 +1274,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1266 | } while (len); | 1274 | } while (len); |
1267 | return i; | 1275 | return i; |
1268 | } | 1276 | } |
1277 | |||
1278 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
1279 | unsigned long start, int len, int write, int force, | ||
1280 | struct page **pages, struct vm_area_struct **vmas) | ||
1281 | { | ||
1282 | int flags = 0; | ||
1283 | |||
1284 | if (write) | ||
1285 | flags |= GUP_FLAGS_WRITE; | ||
1286 | if (force) | ||
1287 | flags |= GUP_FLAGS_FORCE; | ||
1288 | |||
1289 | return __get_user_pages(tsk, mm, | ||
1290 | start, len, flags, | ||
1291 | pages, vmas); | ||
1292 | } | ||
1293 | |||
1269 | EXPORT_SYMBOL(get_user_pages); | 1294 | EXPORT_SYMBOL(get_user_pages); |
1270 | 1295 | ||
1271 | pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, | 1296 | pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, |
@@ -1296,18 +1321,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
1296 | pte_t *pte; | 1321 | pte_t *pte; |
1297 | spinlock_t *ptl; | 1322 | spinlock_t *ptl; |
1298 | 1323 | ||
1299 | retval = mem_cgroup_charge(page, mm, GFP_KERNEL); | ||
1300 | if (retval) | ||
1301 | goto out; | ||
1302 | |||
1303 | retval = -EINVAL; | 1324 | retval = -EINVAL; |
1304 | if (PageAnon(page)) | 1325 | if (PageAnon(page)) |
1305 | goto out_uncharge; | 1326 | goto out; |
1306 | retval = -ENOMEM; | 1327 | retval = -ENOMEM; |
1307 | flush_dcache_page(page); | 1328 | flush_dcache_page(page); |
1308 | pte = get_locked_pte(mm, addr, &ptl); | 1329 | pte = get_locked_pte(mm, addr, &ptl); |
1309 | if (!pte) | 1330 | if (!pte) |
1310 | goto out_uncharge; | 1331 | goto out; |
1311 | retval = -EBUSY; | 1332 | retval = -EBUSY; |
1312 | if (!pte_none(*pte)) | 1333 | if (!pte_none(*pte)) |
1313 | goto out_unlock; | 1334 | goto out_unlock; |
@@ -1323,8 +1344,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
1323 | return retval; | 1344 | return retval; |
1324 | out_unlock: | 1345 | out_unlock: |
1325 | pte_unmap_unlock(pte, ptl); | 1346 | pte_unmap_unlock(pte, ptl); |
1326 | out_uncharge: | ||
1327 | mem_cgroup_uncharge_page(page); | ||
1328 | out: | 1347 | out: |
1329 | return retval; | 1348 | return retval; |
1330 | } | 1349 | } |
@@ -1858,6 +1877,15 @@ gotten: | |||
1858 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 1877 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
1859 | if (!new_page) | 1878 | if (!new_page) |
1860 | goto oom; | 1879 | goto oom; |
1880 | /* | ||
1881 | * Don't let another task, with possibly unlocked vma, | ||
1882 | * keep the mlocked page. | ||
1883 | */ | ||
1884 | if (vma->vm_flags & VM_LOCKED) { | ||
1885 | lock_page(old_page); /* for LRU manipulation */ | ||
1886 | clear_page_mlock(old_page); | ||
1887 | unlock_page(old_page); | ||
1888 | } | ||
1861 | cow_user_page(new_page, old_page, address, vma); | 1889 | cow_user_page(new_page, old_page, address, vma); |
1862 | __SetPageUptodate(new_page); | 1890 | __SetPageUptodate(new_page); |
1863 | 1891 | ||
@@ -1886,11 +1914,13 @@ gotten: | |||
1886 | * thread doing COW. | 1914 | * thread doing COW. |
1887 | */ | 1915 | */ |
1888 | ptep_clear_flush_notify(vma, address, page_table); | 1916 | ptep_clear_flush_notify(vma, address, page_table); |
1889 | set_pte_at(mm, address, page_table, entry); | 1917 | SetPageSwapBacked(new_page); |
1890 | update_mmu_cache(vma, address, entry); | 1918 | lru_cache_add_active_or_unevictable(new_page, vma); |
1891 | lru_cache_add_active(new_page); | ||
1892 | page_add_new_anon_rmap(new_page, vma, address); | 1919 | page_add_new_anon_rmap(new_page, vma, address); |
1893 | 1920 | ||
1921 | //TODO: is this safe? do_anonymous_page() does it this way. | ||
1922 | set_pte_at(mm, address, page_table, entry); | ||
1923 | update_mmu_cache(vma, address, entry); | ||
1894 | if (old_page) { | 1924 | if (old_page) { |
1895 | /* | 1925 | /* |
1896 | * Only after switching the pte to the new page may | 1926 | * Only after switching the pte to the new page may |
@@ -2288,16 +2318,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2288 | count_vm_event(PGMAJFAULT); | 2318 | count_vm_event(PGMAJFAULT); |
2289 | } | 2319 | } |
2290 | 2320 | ||
2321 | mark_page_accessed(page); | ||
2322 | |||
2323 | lock_page(page); | ||
2324 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | ||
2325 | |||
2291 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2326 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { |
2292 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | ||
2293 | ret = VM_FAULT_OOM; | 2327 | ret = VM_FAULT_OOM; |
2328 | unlock_page(page); | ||
2294 | goto out; | 2329 | goto out; |
2295 | } | 2330 | } |
2296 | 2331 | ||
2297 | mark_page_accessed(page); | ||
2298 | lock_page(page); | ||
2299 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | ||
2300 | |||
2301 | /* | 2332 | /* |
2302 | * Back out if somebody else already faulted in this pte. | 2333 | * Back out if somebody else already faulted in this pte. |
2303 | */ | 2334 | */ |
@@ -2324,7 +2355,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2324 | page_add_anon_rmap(page, vma, address); | 2355 | page_add_anon_rmap(page, vma, address); |
2325 | 2356 | ||
2326 | swap_free(entry); | 2357 | swap_free(entry); |
2327 | if (vm_swap_full()) | 2358 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
2328 | remove_exclusive_swap_page(page); | 2359 | remove_exclusive_swap_page(page); |
2329 | unlock_page(page); | 2360 | unlock_page(page); |
2330 | 2361 | ||
@@ -2382,7 +2413,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2382 | if (!pte_none(*page_table)) | 2413 | if (!pte_none(*page_table)) |
2383 | goto release; | 2414 | goto release; |
2384 | inc_mm_counter(mm, anon_rss); | 2415 | inc_mm_counter(mm, anon_rss); |
2385 | lru_cache_add_active(page); | 2416 | SetPageSwapBacked(page); |
2417 | lru_cache_add_active_or_unevictable(page, vma); | ||
2386 | page_add_new_anon_rmap(page, vma, address); | 2418 | page_add_new_anon_rmap(page, vma, address); |
2387 | set_pte_at(mm, address, page_table, entry); | 2419 | set_pte_at(mm, address, page_table, entry); |
2388 | 2420 | ||
@@ -2423,6 +2455,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2423 | struct page *page; | 2455 | struct page *page; |
2424 | pte_t entry; | 2456 | pte_t entry; |
2425 | int anon = 0; | 2457 | int anon = 0; |
2458 | int charged = 0; | ||
2426 | struct page *dirty_page = NULL; | 2459 | struct page *dirty_page = NULL; |
2427 | struct vm_fault vmf; | 2460 | struct vm_fault vmf; |
2428 | int ret; | 2461 | int ret; |
@@ -2463,6 +2496,18 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2463 | ret = VM_FAULT_OOM; | 2496 | ret = VM_FAULT_OOM; |
2464 | goto out; | 2497 | goto out; |
2465 | } | 2498 | } |
2499 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | ||
2500 | ret = VM_FAULT_OOM; | ||
2501 | page_cache_release(page); | ||
2502 | goto out; | ||
2503 | } | ||
2504 | charged = 1; | ||
2505 | /* | ||
2506 | * Don't let another task, with possibly unlocked vma, | ||
2507 | * keep the mlocked page. | ||
2508 | */ | ||
2509 | if (vma->vm_flags & VM_LOCKED) | ||
2510 | clear_page_mlock(vmf.page); | ||
2466 | copy_user_highpage(page, vmf.page, address, vma); | 2511 | copy_user_highpage(page, vmf.page, address, vma); |
2467 | __SetPageUptodate(page); | 2512 | __SetPageUptodate(page); |
2468 | } else { | 2513 | } else { |
@@ -2497,11 +2542,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2497 | 2542 | ||
2498 | } | 2543 | } |
2499 | 2544 | ||
2500 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | ||
2501 | ret = VM_FAULT_OOM; | ||
2502 | goto out; | ||
2503 | } | ||
2504 | |||
2505 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 2545 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
2506 | 2546 | ||
2507 | /* | 2547 | /* |
@@ -2520,11 +2560,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2520 | entry = mk_pte(page, vma->vm_page_prot); | 2560 | entry = mk_pte(page, vma->vm_page_prot); |
2521 | if (flags & FAULT_FLAG_WRITE) | 2561 | if (flags & FAULT_FLAG_WRITE) |
2522 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2562 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2523 | set_pte_at(mm, address, page_table, entry); | ||
2524 | if (anon) { | 2563 | if (anon) { |
2525 | inc_mm_counter(mm, anon_rss); | 2564 | inc_mm_counter(mm, anon_rss); |
2526 | lru_cache_add_active(page); | 2565 | SetPageSwapBacked(page); |
2527 | page_add_new_anon_rmap(page, vma, address); | 2566 | lru_cache_add_active_or_unevictable(page, vma); |
2567 | page_add_new_anon_rmap(page, vma, address); | ||
2528 | } else { | 2568 | } else { |
2529 | inc_mm_counter(mm, file_rss); | 2569 | inc_mm_counter(mm, file_rss); |
2530 | page_add_file_rmap(page); | 2570 | page_add_file_rmap(page); |
@@ -2533,11 +2573,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2533 | get_page(dirty_page); | 2573 | get_page(dirty_page); |
2534 | } | 2574 | } |
2535 | } | 2575 | } |
2576 | //TODO: is this safe? do_anonymous_page() does it this way. | ||
2577 | set_pte_at(mm, address, page_table, entry); | ||
2536 | 2578 | ||
2537 | /* no need to invalidate: a not-present page won't be cached */ | 2579 | /* no need to invalidate: a not-present page won't be cached */ |
2538 | update_mmu_cache(vma, address, entry); | 2580 | update_mmu_cache(vma, address, entry); |
2539 | } else { | 2581 | } else { |
2540 | mem_cgroup_uncharge_page(page); | 2582 | if (charged) |
2583 | mem_cgroup_uncharge_page(page); | ||
2541 | if (anon) | 2584 | if (anon) |
2542 | page_cache_release(page); | 2585 | page_cache_release(page); |
2543 | else | 2586 | else |
@@ -2772,19 +2815,9 @@ int make_pages_present(unsigned long addr, unsigned long end) | |||
2772 | len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; | 2815 | len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; |
2773 | ret = get_user_pages(current, current->mm, addr, | 2816 | ret = get_user_pages(current, current->mm, addr, |
2774 | len, write, 0, NULL, NULL); | 2817 | len, write, 0, NULL, NULL); |
2775 | if (ret < 0) { | 2818 | if (ret < 0) |
2776 | /* | ||
2777 | SUS require strange return value to mlock | ||
2778 | - invalid addr generate to ENOMEM. | ||
2779 | - out of memory should generate EAGAIN. | ||
2780 | */ | ||
2781 | if (ret == -EFAULT) | ||
2782 | ret = -ENOMEM; | ||
2783 | else if (ret == -ENOMEM) | ||
2784 | ret = -EAGAIN; | ||
2785 | return ret; | 2819 | return ret; |
2786 | } | 2820 | return ret == len ? 0 : -EFAULT; |
2787 | return ret == len ? 0 : -ENOMEM; | ||
2788 | } | 2821 | } |
2789 | 2822 | ||
2790 | #if !defined(__HAVE_ARCH_GATE_AREA) | 2823 | #if !defined(__HAVE_ARCH_GATE_AREA) |
@@ -3016,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip) | |||
3016 | } | 3049 | } |
3017 | up_read(¤t->mm->mmap_sem); | 3050 | up_read(¤t->mm->mmap_sem); |
3018 | } | 3051 | } |
3052 | |||
3053 | #ifdef CONFIG_PROVE_LOCKING | ||
3054 | void might_fault(void) | ||
3055 | { | ||
3056 | might_sleep(); | ||
3057 | /* | ||
3058 | * it would be nicer only to annotate paths which are not under | ||
3059 | * pagefault_disable, however that requires a larger audit and | ||
3060 | * providing helpers like get_user_atomic. | ||
3061 | */ | ||
3062 | if (!in_atomic() && current->mm) | ||
3063 | might_lock_read(¤t->mm->mmap_sem); | ||
3064 | } | ||
3065 | EXPORT_SYMBOL(might_fault); | ||
3066 | #endif | ||