diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 109 |
1 files changed, 52 insertions, 57 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7abd3899848b..23c870479b3e 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1834,10 +1834,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, | |||
1834 | 1834 | ||
1835 | /* | 1835 | /* |
1836 | * files that support invalidating or truncating portions of the | 1836 | * files that support invalidating or truncating portions of the |
1837 | * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and | 1837 | * file from under mmaped areas must have their ->fault function |
1838 | * have their .nopage function return the page locked. | 1838 | * return a locked page (and FAULT_RET_LOCKED code). This provides |
1839 | * synchronisation against concurrent unmapping here. | ||
1839 | */ | 1840 | */ |
1840 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | ||
1841 | 1841 | ||
1842 | again: | 1842 | again: |
1843 | restart_addr = vma->vm_truncate_count; | 1843 | restart_addr = vma->vm_truncate_count; |
@@ -2306,63 +2306,62 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2306 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) | 2306 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) |
2307 | { | 2307 | { |
2308 | spinlock_t *ptl; | 2308 | spinlock_t *ptl; |
2309 | struct page *page, *faulted_page; | 2309 | struct page *page; |
2310 | pte_t entry; | 2310 | pte_t entry; |
2311 | int anon = 0; | 2311 | int anon = 0; |
2312 | struct page *dirty_page = NULL; | 2312 | struct page *dirty_page = NULL; |
2313 | struct fault_data fdata; | 2313 | struct vm_fault vmf; |
2314 | int ret; | ||
2314 | 2315 | ||
2315 | fdata.address = address & PAGE_MASK; | 2316 | vmf.virtual_address = (void __user *)(address & PAGE_MASK); |
2316 | fdata.pgoff = pgoff; | 2317 | vmf.pgoff = pgoff; |
2317 | fdata.flags = flags; | 2318 | vmf.flags = flags; |
2319 | vmf.page = NULL; | ||
2318 | 2320 | ||
2319 | pte_unmap(page_table); | 2321 | pte_unmap(page_table); |
2320 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2322 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
2321 | 2323 | ||
2322 | if (likely(vma->vm_ops->fault)) { | 2324 | if (likely(vma->vm_ops->fault)) { |
2323 | fdata.type = -1; | 2325 | ret = vma->vm_ops->fault(vma, &vmf); |
2324 | faulted_page = vma->vm_ops->fault(vma, &fdata); | 2326 | if (unlikely(ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE))) |
2325 | WARN_ON(fdata.type == -1); | 2327 | return (ret & VM_FAULT_MASK); |
2326 | if (unlikely(!faulted_page)) | ||
2327 | return fdata.type; | ||
2328 | } else { | 2328 | } else { |
2329 | /* Legacy ->nopage path */ | 2329 | /* Legacy ->nopage path */ |
2330 | fdata.type = VM_FAULT_MINOR; | 2330 | ret = VM_FAULT_MINOR; |
2331 | faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, | 2331 | vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); |
2332 | &fdata.type); | ||
2333 | /* no page was available -- either SIGBUS or OOM */ | 2332 | /* no page was available -- either SIGBUS or OOM */ |
2334 | if (unlikely(faulted_page == NOPAGE_SIGBUS)) | 2333 | if (unlikely(vmf.page == NOPAGE_SIGBUS)) |
2335 | return VM_FAULT_SIGBUS; | 2334 | return VM_FAULT_SIGBUS; |
2336 | else if (unlikely(faulted_page == NOPAGE_OOM)) | 2335 | else if (unlikely(vmf.page == NOPAGE_OOM)) |
2337 | return VM_FAULT_OOM; | 2336 | return VM_FAULT_OOM; |
2338 | } | 2337 | } |
2339 | 2338 | ||
2340 | /* | 2339 | /* |
2341 | * For consistency in subsequent calls, make the faulted_page always | 2340 | * For consistency in subsequent calls, make the faulted page always |
2342 | * locked. | 2341 | * locked. |
2343 | */ | 2342 | */ |
2344 | if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) | 2343 | if (unlikely(!(ret & FAULT_RET_LOCKED))) |
2345 | lock_page(faulted_page); | 2344 | lock_page(vmf.page); |
2346 | else | 2345 | else |
2347 | BUG_ON(!PageLocked(faulted_page)); | 2346 | VM_BUG_ON(!PageLocked(vmf.page)); |
2348 | 2347 | ||
2349 | /* | 2348 | /* |
2350 | * Should we do an early C-O-W break? | 2349 | * Should we do an early C-O-W break? |
2351 | */ | 2350 | */ |
2352 | page = faulted_page; | 2351 | page = vmf.page; |
2353 | if (flags & FAULT_FLAG_WRITE) { | 2352 | if (flags & FAULT_FLAG_WRITE) { |
2354 | if (!(vma->vm_flags & VM_SHARED)) { | 2353 | if (!(vma->vm_flags & VM_SHARED)) { |
2355 | anon = 1; | 2354 | anon = 1; |
2356 | if (unlikely(anon_vma_prepare(vma))) { | 2355 | if (unlikely(anon_vma_prepare(vma))) { |
2357 | fdata.type = VM_FAULT_OOM; | 2356 | ret = VM_FAULT_OOM; |
2358 | goto out; | 2357 | goto out; |
2359 | } | 2358 | } |
2360 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 2359 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
2361 | if (!page) { | 2360 | if (!page) { |
2362 | fdata.type = VM_FAULT_OOM; | 2361 | ret = VM_FAULT_OOM; |
2363 | goto out; | 2362 | goto out; |
2364 | } | 2363 | } |
2365 | copy_user_highpage(page, faulted_page, address, vma); | 2364 | copy_user_highpage(page, vmf.page, address, vma); |
2366 | } else { | 2365 | } else { |
2367 | /* | 2366 | /* |
2368 | * If the page will be shareable, see if the backing | 2367 | * If the page will be shareable, see if the backing |
@@ -2372,11 +2371,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2372 | if (vma->vm_ops->page_mkwrite) { | 2371 | if (vma->vm_ops->page_mkwrite) { |
2373 | unlock_page(page); | 2372 | unlock_page(page); |
2374 | if (vma->vm_ops->page_mkwrite(vma, page) < 0) { | 2373 | if (vma->vm_ops->page_mkwrite(vma, page) < 0) { |
2375 | fdata.type = VM_FAULT_SIGBUS; | 2374 | ret = VM_FAULT_SIGBUS; |
2376 | anon = 1; /* no anon but release faulted_page */ | 2375 | anon = 1; /* no anon but release vmf.page */ |
2377 | goto out_unlocked; | 2376 | goto out_unlocked; |
2378 | } | 2377 | } |
2379 | lock_page(page); | 2378 | lock_page(page); |
2379 | /* | ||
2380 | * XXX: this is not quite right (racy vs | ||
2381 | * invalidate) to unlock and relock the page | ||
2382 | * like this, however a better fix requires | ||
2383 | * reworking page_mkwrite locking API, which | ||
2384 | * is better done later. | ||
2385 | */ | ||
2386 | if (!page->mapping) { | ||
2387 | ret = VM_FAULT_MINOR; | ||
2388 | anon = 1; /* no anon but release vmf.page */ | ||
2389 | goto out; | ||
2390 | } | ||
2380 | } | 2391 | } |
2381 | } | 2392 | } |
2382 | 2393 | ||
@@ -2427,16 +2438,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2427 | pte_unmap_unlock(page_table, ptl); | 2438 | pte_unmap_unlock(page_table, ptl); |
2428 | 2439 | ||
2429 | out: | 2440 | out: |
2430 | unlock_page(faulted_page); | 2441 | unlock_page(vmf.page); |
2431 | out_unlocked: | 2442 | out_unlocked: |
2432 | if (anon) | 2443 | if (anon) |
2433 | page_cache_release(faulted_page); | 2444 | page_cache_release(vmf.page); |
2434 | else if (dirty_page) { | 2445 | else if (dirty_page) { |
2435 | set_page_dirty_balance(dirty_page); | 2446 | set_page_dirty_balance(dirty_page); |
2436 | put_page(dirty_page); | 2447 | put_page(dirty_page); |
2437 | } | 2448 | } |
2438 | 2449 | ||
2439 | return fdata.type; | 2450 | return (ret & VM_FAULT_MASK); |
2440 | } | 2451 | } |
2441 | 2452 | ||
2442 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 2453 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -2447,18 +2458,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2447 | - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; | 2458 | - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; |
2448 | unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); | 2459 | unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); |
2449 | 2460 | ||
2450 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); | 2461 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, |
2462 | flags, orig_pte); | ||
2451 | } | 2463 | } |
2452 | 2464 | ||
2453 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2454 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2455 | int write_access, pgoff_t pgoff, pte_t orig_pte) | ||
2456 | { | ||
2457 | unsigned int flags = FAULT_FLAG_NONLINEAR | | ||
2458 | (write_access ? FAULT_FLAG_WRITE : 0); | ||
2459 | |||
2460 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); | ||
2461 | } | ||
2462 | 2465 | ||
2463 | /* | 2466 | /* |
2464 | * do_no_pfn() tries to create a new page mapping for a page without | 2467 | * do_no_pfn() tries to create a new page mapping for a page without |
@@ -2519,17 +2522,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2519 | * but allow concurrent faults), and pte mapped but not yet locked. | 2522 | * but allow concurrent faults), and pte mapped but not yet locked. |
2520 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 2523 | * We return with mmap_sem still held, but pte unmapped and unlocked. |
2521 | */ | 2524 | */ |
2522 | static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2525 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
2523 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 2526 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
2524 | int write_access, pte_t orig_pte) | 2527 | int write_access, pte_t orig_pte) |
2525 | { | 2528 | { |
2529 | unsigned int flags = FAULT_FLAG_NONLINEAR | | ||
2530 | (write_access ? FAULT_FLAG_WRITE : 0); | ||
2526 | pgoff_t pgoff; | 2531 | pgoff_t pgoff; |
2527 | int err; | ||
2528 | 2532 | ||
2529 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | 2533 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) |
2530 | return VM_FAULT_MINOR; | 2534 | return VM_FAULT_MINOR; |
2531 | 2535 | ||
2532 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { | 2536 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || |
2537 | !(vma->vm_flags & VM_CAN_NONLINEAR))) { | ||
2533 | /* | 2538 | /* |
2534 | * Page table corrupted: show pte and kill process. | 2539 | * Page table corrupted: show pte and kill process. |
2535 | */ | 2540 | */ |
@@ -2539,18 +2544,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2539 | 2544 | ||
2540 | pgoff = pte_to_pgoff(orig_pte); | 2545 | pgoff = pte_to_pgoff(orig_pte); |
2541 | 2546 | ||
2542 | if (vma->vm_ops && vma->vm_ops->fault) | 2547 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, |
2543 | return do_nonlinear_fault(mm, vma, address, page_table, pmd, | 2548 | flags, orig_pte); |
2544 | write_access, pgoff, orig_pte); | ||
2545 | |||
2546 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ | ||
2547 | err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, | ||
2548 | vma->vm_page_prot, pgoff, 0); | ||
2549 | if (err == -ENOMEM) | ||
2550 | return VM_FAULT_OOM; | ||
2551 | if (err) | ||
2552 | return VM_FAULT_SIGBUS; | ||
2553 | return VM_FAULT_MAJOR; | ||
2554 | } | 2549 | } |
2555 | 2550 | ||
2556 | /* | 2551 | /* |
@@ -2588,7 +2583,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2588 | pte, pmd, write_access); | 2583 | pte, pmd, write_access); |
2589 | } | 2584 | } |
2590 | if (pte_file(entry)) | 2585 | if (pte_file(entry)) |
2591 | return do_file_page(mm, vma, address, | 2586 | return do_nonlinear_fault(mm, vma, address, |
2592 | pte, pmd, write_access, entry); | 2587 | pte, pmd, write_access, entry); |
2593 | return do_swap_page(mm, vma, address, | 2588 | return do_swap_page(mm, vma, address, |
2594 | pte, pmd, write_access, entry); | 2589 | pte, pmd, write_access, entry); |