aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c132
1 files changed, 86 insertions, 46 deletions
diff --git a/mm/memory.c b/mm/memory.c
index e6c99f6b5649..eee7fec3ab54 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1047,7 +1047,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1047 if (pages) 1047 if (pages)
1048 foll_flags |= FOLL_GET; 1048 foll_flags |= FOLL_GET;
1049 if (!write && !(vma->vm_flags & VM_LOCKED) && 1049 if (!write && !(vma->vm_flags & VM_LOCKED) &&
1050 (!vma->vm_ops || !vma->vm_ops->nopage)) 1050 (!vma->vm_ops || (!vma->vm_ops->nopage &&
1051 !vma->vm_ops->fault)))
1051 foll_flags |= FOLL_ANON; 1052 foll_flags |= FOLL_ANON;
1052 1053
1053 do { 1054 do {
@@ -2288,10 +2289,10 @@ oom:
2288} 2289}
2289 2290
2290/* 2291/*
2291 * do_no_page() tries to create a new page mapping. It aggressively 2292 * __do_fault() tries to create a new page mapping. It aggressively
2292 * tries to share with existing pages, but makes a separate copy if 2293 * tries to share with existing pages, but makes a separate copy if
2293 * the "write_access" parameter is true in order to avoid the next 2294 * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid
2294 * page fault. 2295 * the next page fault.
2295 * 2296 *
2296 * As this is called only for pages that do not currently exist, we 2297 * As this is called only for pages that do not currently exist, we
2297 * do not need to flush old virtual caches or the TLB. 2298 * do not need to flush old virtual caches or the TLB.
@@ -2300,64 +2301,82 @@ oom:
2300 * but allow concurrent faults), and pte mapped but not yet locked. 2301 * but allow concurrent faults), and pte mapped but not yet locked.
2301 * We return with mmap_sem still held, but pte unmapped and unlocked. 2302 * We return with mmap_sem still held, but pte unmapped and unlocked.
2302 */ 2303 */
2303static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 2304static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2304 unsigned long address, pte_t *page_table, pmd_t *pmd, 2305 unsigned long address, pte_t *page_table, pmd_t *pmd,
2305 int write_access) 2306 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2306{ 2307{
2307 spinlock_t *ptl; 2308 spinlock_t *ptl;
2308 struct page *page, *nopage_page; 2309 struct page *page, *faulted_page;
2309 pte_t entry; 2310 pte_t entry;
2310 int ret = VM_FAULT_MINOR;
2311 int anon = 0; 2311 int anon = 0;
2312 struct page *dirty_page = NULL; 2312 struct page *dirty_page = NULL;
2313 struct fault_data fdata;
2314
2315 fdata.address = address & PAGE_MASK;
2316 fdata.pgoff = pgoff;
2317 fdata.flags = flags;
2313 2318
2314 pte_unmap(page_table); 2319 pte_unmap(page_table);
2315 BUG_ON(vma->vm_flags & VM_PFNMAP); 2320 BUG_ON(vma->vm_flags & VM_PFNMAP);
2316 2321
2317 nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); 2322 if (likely(vma->vm_ops->fault)) {
2318 /* no page was available -- either SIGBUS, OOM or REFAULT */ 2323 fdata.type = -1;
2319 if (unlikely(nopage_page == NOPAGE_SIGBUS)) 2324 faulted_page = vma->vm_ops->fault(vma, &fdata);
2320 return VM_FAULT_SIGBUS; 2325 WARN_ON(fdata.type == -1);
2321 else if (unlikely(nopage_page == NOPAGE_OOM)) 2326 if (unlikely(!faulted_page))
2322 return VM_FAULT_OOM; 2327 return fdata.type;
2323 else if (unlikely(nopage_page == NOPAGE_REFAULT)) 2328 } else {
2324 return VM_FAULT_MINOR; 2329 /* Legacy ->nopage path */
2330 fdata.type = VM_FAULT_MINOR;
2331 faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK,
2332 &fdata.type);
2333 /* no page was available -- either SIGBUS or OOM */
2334 if (unlikely(faulted_page == NOPAGE_SIGBUS))
2335 return VM_FAULT_SIGBUS;
2336 else if (unlikely(faulted_page == NOPAGE_OOM))
2337 return VM_FAULT_OOM;
2338 }
2325 2339
2326 BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page));
2327 /* 2340 /*
2328 * For consistency in subsequent calls, make the nopage_page always 2341 * For consistency in subsequent calls, make the faulted_page always
2329 * locked. 2342 * locked.
2330 */ 2343 */
2331 if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) 2344 if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
2332 lock_page(nopage_page); 2345 lock_page(faulted_page);
2346 else
2347 BUG_ON(!PageLocked(faulted_page));
2333 2348
2334 /* 2349 /*
2335 * Should we do an early C-O-W break? 2350 * Should we do an early C-O-W break?
2336 */ 2351 */
2337 page = nopage_page; 2352 page = faulted_page;
2338 if (write_access) { 2353 if (flags & FAULT_FLAG_WRITE) {
2339 if (!(vma->vm_flags & VM_SHARED)) { 2354 if (!(vma->vm_flags & VM_SHARED)) {
2355 anon = 1;
2340 if (unlikely(anon_vma_prepare(vma))) { 2356 if (unlikely(anon_vma_prepare(vma))) {
2341 ret = VM_FAULT_OOM; 2357 fdata.type = VM_FAULT_OOM;
2342 goto out_error; 2358 goto out;
2343 } 2359 }
2344 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2360 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2345 if (!page) { 2361 if (!page) {
2346 ret = VM_FAULT_OOM; 2362 fdata.type = VM_FAULT_OOM;
2347 goto out_error; 2363 goto out;
2348 } 2364 }
2349 copy_user_highpage(page, nopage_page, address, vma); 2365 copy_user_highpage(page, faulted_page, address, vma);
2350 anon = 1;
2351 } else { 2366 } else {
2352 /* if the page will be shareable, see if the backing 2367 /*
2368 * If the page will be shareable, see if the backing
2353 * address space wants to know that the page is about 2369 * address space wants to know that the page is about
2354 * to become writable */ 2370 * to become writable
2371 */
2355 if (vma->vm_ops->page_mkwrite && 2372 if (vma->vm_ops->page_mkwrite &&
2356 vma->vm_ops->page_mkwrite(vma, page) < 0) { 2373 vma->vm_ops->page_mkwrite(vma, page) < 0) {
2357 ret = VM_FAULT_SIGBUS; 2374 fdata.type = VM_FAULT_SIGBUS;
2358 goto out_error; 2375 anon = 1; /* no anon but release faulted_page */
2376 goto out;
2359 } 2377 }
2360 } 2378 }
2379
2361 } 2380 }
2362 2381
2363 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2382 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
@@ -2373,10 +2392,10 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2373 * handle that later. 2392 * handle that later.
2374 */ 2393 */
2375 /* Only go through if we didn't race with anybody else... */ 2394 /* Only go through if we didn't race with anybody else... */
2376 if (likely(pte_none(*page_table))) { 2395 if (likely(pte_same(*page_table, orig_pte))) {
2377 flush_icache_page(vma, page); 2396 flush_icache_page(vma, page);
2378 entry = mk_pte(page, vma->vm_page_prot); 2397 entry = mk_pte(page, vma->vm_page_prot);
2379 if (write_access) 2398 if (flags & FAULT_FLAG_WRITE)
2380 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2399 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2381 set_pte_at(mm, address, page_table, entry); 2400 set_pte_at(mm, address, page_table, entry);
2382 if (anon) { 2401 if (anon) {
@@ -2386,7 +2405,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2386 } else { 2405 } else {
2387 inc_mm_counter(mm, file_rss); 2406 inc_mm_counter(mm, file_rss);
2388 page_add_file_rmap(page); 2407 page_add_file_rmap(page);
2389 if (write_access) { 2408 if (flags & FAULT_FLAG_WRITE) {
2390 dirty_page = page; 2409 dirty_page = page;
2391 get_page(dirty_page); 2410 get_page(dirty_page);
2392 } 2411 }
@@ -2399,25 +2418,42 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2399 if (anon) 2418 if (anon)
2400 page_cache_release(page); 2419 page_cache_release(page);
2401 else 2420 else
2402 anon = 1; /* not anon, but release nopage_page */ 2421 anon = 1; /* no anon but release faulted_page */
2403 } 2422 }
2404 2423
2405 pte_unmap_unlock(page_table, ptl); 2424 pte_unmap_unlock(page_table, ptl);
2406 2425
2407out: 2426out:
2408 unlock_page(nopage_page); 2427 unlock_page(faulted_page);
2409 if (anon) 2428 if (anon)
2410 page_cache_release(nopage_page); 2429 page_cache_release(faulted_page);
2411 else if (dirty_page) { 2430 else if (dirty_page) {
2412 set_page_dirty_balance(dirty_page); 2431 set_page_dirty_balance(dirty_page);
2413 put_page(dirty_page); 2432 put_page(dirty_page);
2414 } 2433 }
2415 2434
2416 return ret; 2435 return fdata.type;
2436}
2417 2437
2418out_error: 2438static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2419 anon = 1; /* relase nopage_page */ 2439 unsigned long address, pte_t *page_table, pmd_t *pmd,
2420 goto out; 2440 int write_access, pte_t orig_pte)
2441{
2442 pgoff_t pgoff = (((address & PAGE_MASK)
2443 - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
2444 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
2445
2446 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
2447}
2448
2449static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2450 unsigned long address, pte_t *page_table, pmd_t *pmd,
2451 int write_access, pgoff_t pgoff, pte_t orig_pte)
2452{
2453 unsigned int flags = FAULT_FLAG_NONLINEAR |
2454 (write_access ? FAULT_FLAG_WRITE : 0);
2455
2456 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
2421} 2457}
2422 2458
2423/* 2459/*
@@ -2496,9 +2532,14 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
2496 print_bad_pte(vma, orig_pte, address); 2532 print_bad_pte(vma, orig_pte, address);
2497 return VM_FAULT_OOM; 2533 return VM_FAULT_OOM;
2498 } 2534 }
2499 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
2500 2535
2501 pgoff = pte_to_pgoff(orig_pte); 2536 pgoff = pte_to_pgoff(orig_pte);
2537
2538 if (vma->vm_ops && vma->vm_ops->fault)
2539 return do_nonlinear_fault(mm, vma, address, page_table, pmd,
2540 write_access, pgoff, orig_pte);
2541
2542 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
2502 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, 2543 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
2503 vma->vm_page_prot, pgoff, 0); 2544 vma->vm_page_prot, pgoff, 0);
2504 if (err == -ENOMEM) 2545 if (err == -ENOMEM)
@@ -2532,10 +2573,9 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2532 if (!pte_present(entry)) { 2573 if (!pte_present(entry)) {
2533 if (pte_none(entry)) { 2574 if (pte_none(entry)) {
2534 if (vma->vm_ops) { 2575 if (vma->vm_ops) {
2535 if (vma->vm_ops->nopage) 2576 if (vma->vm_ops->fault || vma->vm_ops->nopage)
2536 return do_no_page(mm, vma, address, 2577 return do_linear_fault(mm, vma, address,
2537 pte, pmd, 2578 pte, pmd, write_access, entry);
2538 write_access);
2539 if (unlikely(vma->vm_ops->nopfn)) 2579 if (unlikely(vma->vm_ops->nopfn))
2540 return do_no_pfn(mm, vma, address, pte, 2580 return do_no_pfn(mm, vma, address, pte,
2541 pmd, write_access); 2581 pmd, write_access);