aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c140
1 files changed, 94 insertions, 46 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f49..fc031d68327 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1129,12 +1129,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1129 return !vma->vm_ops || !vma->vm_ops->fault; 1129 return !vma->vm_ops || !vma->vm_ops->fault;
1130} 1130}
1131 1131
1132int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1132
1133 unsigned long start, int len, int write, int force, 1133
1134int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1135 unsigned long start, int len, int flags,
1134 struct page **pages, struct vm_area_struct **vmas) 1136 struct page **pages, struct vm_area_struct **vmas)
1135{ 1137{
1136 int i; 1138 int i;
1137 unsigned int vm_flags; 1139 unsigned int vm_flags = 0;
1140 int write = !!(flags & GUP_FLAGS_WRITE);
1141 int force = !!(flags & GUP_FLAGS_FORCE);
1142 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1138 1143
1139 if (len <= 0) 1144 if (len <= 0)
1140 return 0; 1145 return 0;
@@ -1158,7 +1163,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1158 pud_t *pud; 1163 pud_t *pud;
1159 pmd_t *pmd; 1164 pmd_t *pmd;
1160 pte_t *pte; 1165 pte_t *pte;
1161 if (write) /* user gate pages are read-only */ 1166
1167 /* user gate pages are read-only */
1168 if (!ignore && write)
1162 return i ? : -EFAULT; 1169 return i ? : -EFAULT;
1163 if (pg > TASK_SIZE) 1170 if (pg > TASK_SIZE)
1164 pgd = pgd_offset_k(pg); 1171 pgd = pgd_offset_k(pg);
@@ -1190,8 +1197,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1190 continue; 1197 continue;
1191 } 1198 }
1192 1199
1193 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) 1200 if (!vma ||
1194 || !(vm_flags & vma->vm_flags)) 1201 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1202 (!ignore && !(vm_flags & vma->vm_flags)))
1195 return i ? : -EFAULT; 1203 return i ? : -EFAULT;
1196 1204
1197 if (is_vm_hugetlb_page(vma)) { 1205 if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1274,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1266 } while (len); 1274 } while (len);
1267 return i; 1275 return i;
1268} 1276}
1277
1278int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1279 unsigned long start, int len, int write, int force,
1280 struct page **pages, struct vm_area_struct **vmas)
1281{
1282 int flags = 0;
1283
1284 if (write)
1285 flags |= GUP_FLAGS_WRITE;
1286 if (force)
1287 flags |= GUP_FLAGS_FORCE;
1288
1289 return __get_user_pages(tsk, mm,
1290 start, len, flags,
1291 pages, vmas);
1292}
1293
1269EXPORT_SYMBOL(get_user_pages); 1294EXPORT_SYMBOL(get_user_pages);
1270 1295
1271pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, 1296pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1296,18 +1321,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1296 pte_t *pte; 1321 pte_t *pte;
1297 spinlock_t *ptl; 1322 spinlock_t *ptl;
1298 1323
1299 retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
1300 if (retval)
1301 goto out;
1302
1303 retval = -EINVAL; 1324 retval = -EINVAL;
1304 if (PageAnon(page)) 1325 if (PageAnon(page))
1305 goto out_uncharge; 1326 goto out;
1306 retval = -ENOMEM; 1327 retval = -ENOMEM;
1307 flush_dcache_page(page); 1328 flush_dcache_page(page);
1308 pte = get_locked_pte(mm, addr, &ptl); 1329 pte = get_locked_pte(mm, addr, &ptl);
1309 if (!pte) 1330 if (!pte)
1310 goto out_uncharge; 1331 goto out;
1311 retval = -EBUSY; 1332 retval = -EBUSY;
1312 if (!pte_none(*pte)) 1333 if (!pte_none(*pte))
1313 goto out_unlock; 1334 goto out_unlock;
@@ -1323,8 +1344,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1323 return retval; 1344 return retval;
1324out_unlock: 1345out_unlock:
1325 pte_unmap_unlock(pte, ptl); 1346 pte_unmap_unlock(pte, ptl);
1326out_uncharge:
1327 mem_cgroup_uncharge_page(page);
1328out: 1347out:
1329 return retval; 1348 return retval;
1330} 1349}
@@ -1858,6 +1877,15 @@ gotten:
1858 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1877 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1859 if (!new_page) 1878 if (!new_page)
1860 goto oom; 1879 goto oom;
1880 /*
1881 * Don't let another task, with possibly unlocked vma,
1882 * keep the mlocked page.
1883 */
1884 if (vma->vm_flags & VM_LOCKED) {
1885 lock_page(old_page); /* for LRU manipulation */
1886 clear_page_mlock(old_page);
1887 unlock_page(old_page);
1888 }
1861 cow_user_page(new_page, old_page, address, vma); 1889 cow_user_page(new_page, old_page, address, vma);
1862 __SetPageUptodate(new_page); 1890 __SetPageUptodate(new_page);
1863 1891
@@ -1886,11 +1914,13 @@ gotten:
1886 * thread doing COW. 1914 * thread doing COW.
1887 */ 1915 */
1888 ptep_clear_flush_notify(vma, address, page_table); 1916 ptep_clear_flush_notify(vma, address, page_table);
1889 set_pte_at(mm, address, page_table, entry); 1917 SetPageSwapBacked(new_page);
1890 update_mmu_cache(vma, address, entry); 1918 lru_cache_add_active_or_unevictable(new_page, vma);
1891 lru_cache_add_active(new_page);
1892 page_add_new_anon_rmap(new_page, vma, address); 1919 page_add_new_anon_rmap(new_page, vma, address);
1893 1920
1921//TODO: is this safe? do_anonymous_page() does it this way.
1922 set_pte_at(mm, address, page_table, entry);
1923 update_mmu_cache(vma, address, entry);
1894 if (old_page) { 1924 if (old_page) {
1895 /* 1925 /*
1896 * Only after switching the pte to the new page may 1926 * Only after switching the pte to the new page may
@@ -2288,16 +2318,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2288 count_vm_event(PGMAJFAULT); 2318 count_vm_event(PGMAJFAULT);
2289 } 2319 }
2290 2320
2321 mark_page_accessed(page);
2322
2323 lock_page(page);
2324 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2325
2291 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2326 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2292 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2293 ret = VM_FAULT_OOM; 2327 ret = VM_FAULT_OOM;
2328 unlock_page(page);
2294 goto out; 2329 goto out;
2295 } 2330 }
2296 2331
2297 mark_page_accessed(page);
2298 lock_page(page);
2299 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2300
2301 /* 2332 /*
2302 * Back out if somebody else already faulted in this pte. 2333 * Back out if somebody else already faulted in this pte.
2303 */ 2334 */
@@ -2324,7 +2355,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2324 page_add_anon_rmap(page, vma, address); 2355 page_add_anon_rmap(page, vma, address);
2325 2356
2326 swap_free(entry); 2357 swap_free(entry);
2327 if (vm_swap_full()) 2358 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2328 remove_exclusive_swap_page(page); 2359 remove_exclusive_swap_page(page);
2329 unlock_page(page); 2360 unlock_page(page);
2330 2361
@@ -2382,7 +2413,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2382 if (!pte_none(*page_table)) 2413 if (!pte_none(*page_table))
2383 goto release; 2414 goto release;
2384 inc_mm_counter(mm, anon_rss); 2415 inc_mm_counter(mm, anon_rss);
2385 lru_cache_add_active(page); 2416 SetPageSwapBacked(page);
2417 lru_cache_add_active_or_unevictable(page, vma);
2386 page_add_new_anon_rmap(page, vma, address); 2418 page_add_new_anon_rmap(page, vma, address);
2387 set_pte_at(mm, address, page_table, entry); 2419 set_pte_at(mm, address, page_table, entry);
2388 2420
@@ -2423,6 +2455,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2423 struct page *page; 2455 struct page *page;
2424 pte_t entry; 2456 pte_t entry;
2425 int anon = 0; 2457 int anon = 0;
2458 int charged = 0;
2426 struct page *dirty_page = NULL; 2459 struct page *dirty_page = NULL;
2427 struct vm_fault vmf; 2460 struct vm_fault vmf;
2428 int ret; 2461 int ret;
@@ -2463,6 +2496,18 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2463 ret = VM_FAULT_OOM; 2496 ret = VM_FAULT_OOM;
2464 goto out; 2497 goto out;
2465 } 2498 }
2499 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2500 ret = VM_FAULT_OOM;
2501 page_cache_release(page);
2502 goto out;
2503 }
2504 charged = 1;
2505 /*
2506 * Don't let another task, with possibly unlocked vma,
2507 * keep the mlocked page.
2508 */
2509 if (vma->vm_flags & VM_LOCKED)
2510 clear_page_mlock(vmf.page);
2466 copy_user_highpage(page, vmf.page, address, vma); 2511 copy_user_highpage(page, vmf.page, address, vma);
2467 __SetPageUptodate(page); 2512 __SetPageUptodate(page);
2468 } else { 2513 } else {
@@ -2497,11 +2542,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2497 2542
2498 } 2543 }
2499 2544
2500 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2501 ret = VM_FAULT_OOM;
2502 goto out;
2503 }
2504
2505 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2545 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2506 2546
2507 /* 2547 /*
@@ -2520,11 +2560,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2520 entry = mk_pte(page, vma->vm_page_prot); 2560 entry = mk_pte(page, vma->vm_page_prot);
2521 if (flags & FAULT_FLAG_WRITE) 2561 if (flags & FAULT_FLAG_WRITE)
2522 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2562 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2523 set_pte_at(mm, address, page_table, entry);
2524 if (anon) { 2563 if (anon) {
2525 inc_mm_counter(mm, anon_rss); 2564 inc_mm_counter(mm, anon_rss);
2526 lru_cache_add_active(page); 2565 SetPageSwapBacked(page);
2527 page_add_new_anon_rmap(page, vma, address); 2566 lru_cache_add_active_or_unevictable(page, vma);
2567 page_add_new_anon_rmap(page, vma, address);
2528 } else { 2568 } else {
2529 inc_mm_counter(mm, file_rss); 2569 inc_mm_counter(mm, file_rss);
2530 page_add_file_rmap(page); 2570 page_add_file_rmap(page);
@@ -2533,11 +2573,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2533 get_page(dirty_page); 2573 get_page(dirty_page);
2534 } 2574 }
2535 } 2575 }
2576//TODO: is this safe? do_anonymous_page() does it this way.
2577 set_pte_at(mm, address, page_table, entry);
2536 2578
2537 /* no need to invalidate: a not-present page won't be cached */ 2579 /* no need to invalidate: a not-present page won't be cached */
2538 update_mmu_cache(vma, address, entry); 2580 update_mmu_cache(vma, address, entry);
2539 } else { 2581 } else {
2540 mem_cgroup_uncharge_page(page); 2582 if (charged)
2583 mem_cgroup_uncharge_page(page);
2541 if (anon) 2584 if (anon)
2542 page_cache_release(page); 2585 page_cache_release(page);
2543 else 2586 else
@@ -2772,19 +2815,9 @@ int make_pages_present(unsigned long addr, unsigned long end)
2772 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; 2815 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
2773 ret = get_user_pages(current, current->mm, addr, 2816 ret = get_user_pages(current, current->mm, addr,
2774 len, write, 0, NULL, NULL); 2817 len, write, 0, NULL, NULL);
2775 if (ret < 0) { 2818 if (ret < 0)
2776 /*
2777 SUS require strange return value to mlock
2778 - invalid addr generate to ENOMEM.
2779 - out of memory should generate EAGAIN.
2780 */
2781 if (ret == -EFAULT)
2782 ret = -ENOMEM;
2783 else if (ret == -ENOMEM)
2784 ret = -EAGAIN;
2785 return ret; 2819 return ret;
2786 } 2820 return ret == len ? 0 : -EFAULT;
2787 return ret == len ? 0 : -ENOMEM;
2788} 2821}
2789 2822
2790#if !defined(__HAVE_ARCH_GATE_AREA) 2823#if !defined(__HAVE_ARCH_GATE_AREA)
@@ -3016,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
3016 } 3049 }
3017 up_read(&current->mm->mmap_sem); 3050 up_read(&current->mm->mmap_sem);
3018} 3051}
3052
3053#ifdef CONFIG_PROVE_LOCKING
3054void might_fault(void)
3055{
3056 might_sleep();
3057 /*
3058 * it would be nicer only to annotate paths which are not under
3059 * pagefault_disable, however that requires a larger audit and
3060 * providing helpers like get_user_atomic.
3061 */
3062 if (!in_atomic() && current->mm)
3063 might_lock_read(&current->mm->mmap_sem);
3064}
3065EXPORT_SYMBOL(might_fault);
3066#endif