diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 153 |
1 files changed, 73 insertions, 80 deletions
diff --git a/mm/memory.c b/mm/memory.c index 9c6ff7fffdc8..e6c99f6b5649 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1831,6 +1831,13 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, | |||
1831 | unsigned long restart_addr; | 1831 | unsigned long restart_addr; |
1832 | int need_break; | 1832 | int need_break; |
1833 | 1833 | ||
1834 | /* | ||
1835 | * files that support invalidating or truncating portions of the | ||
1836 | * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and | ||
1837 | * have their .nopage function return the page locked. | ||
1838 | */ | ||
1839 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | ||
1840 | |||
1834 | again: | 1841 | again: |
1835 | restart_addr = vma->vm_truncate_count; | 1842 | restart_addr = vma->vm_truncate_count; |
1836 | if (is_restart_addr(restart_addr) && start_addr < restart_addr) { | 1843 | if (is_restart_addr(restart_addr) && start_addr < restart_addr) { |
@@ -1959,17 +1966,8 @@ void unmap_mapping_range(struct address_space *mapping, | |||
1959 | 1966 | ||
1960 | spin_lock(&mapping->i_mmap_lock); | 1967 | spin_lock(&mapping->i_mmap_lock); |
1961 | 1968 | ||
1962 | /* serialize i_size write against truncate_count write */ | 1969 | /* Protect against endless unmapping loops */ |
1963 | smp_wmb(); | ||
1964 | /* Protect against page faults, and endless unmapping loops */ | ||
1965 | mapping->truncate_count++; | 1970 | mapping->truncate_count++; |
1966 | /* | ||
1967 | * For archs where spin_lock has inclusive semantics like ia64 | ||
1968 | * this smp_mb() will prevent to read pagetable contents | ||
1969 | * before the truncate_count increment is visible to | ||
1970 | * other cpus. | ||
1971 | */ | ||
1972 | smp_mb(); | ||
1973 | if (unlikely(is_restart_addr(mapping->truncate_count))) { | 1971 | if (unlikely(is_restart_addr(mapping->truncate_count))) { |
1974 | if (mapping->truncate_count == 0) | 1972 | if (mapping->truncate_count == 0) |
1975 | reset_vma_truncate_counts(mapping); | 1973 | reset_vma_truncate_counts(mapping); |
@@ -2008,8 +2006,18 @@ int vmtruncate(struct inode * inode, loff_t offset) | |||
2008 | if (IS_SWAPFILE(inode)) | 2006 | if (IS_SWAPFILE(inode)) |
2009 | goto out_busy; | 2007 | goto out_busy; |
2010 | i_size_write(inode, offset); | 2008 | i_size_write(inode, offset); |
2009 | |||
2010 | /* | ||
2011 | * unmap_mapping_range is called twice, first simply for efficiency | ||
2012 | * so that truncate_inode_pages does fewer single-page unmaps. However | ||
2013 | * after this first call, and before truncate_inode_pages finishes, | ||
2014 | * it is possible for private pages to be COWed, which remain after | ||
2015 | * truncate_inode_pages finishes, hence the second unmap_mapping_range | ||
2016 | * call must be made for correctness. | ||
2017 | */ | ||
2011 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | 2018 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); |
2012 | truncate_inode_pages(mapping, offset); | 2019 | truncate_inode_pages(mapping, offset); |
2020 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
2013 | goto out_truncate; | 2021 | goto out_truncate; |
2014 | 2022 | ||
2015 | do_expand: | 2023 | do_expand: |
@@ -2049,6 +2057,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) | |||
2049 | down_write(&inode->i_alloc_sem); | 2057 | down_write(&inode->i_alloc_sem); |
2050 | unmap_mapping_range(mapping, offset, (end - offset), 1); | 2058 | unmap_mapping_range(mapping, offset, (end - offset), 1); |
2051 | truncate_inode_pages_range(mapping, offset, end); | 2059 | truncate_inode_pages_range(mapping, offset, end); |
2060 | unmap_mapping_range(mapping, offset, (end - offset), 1); | ||
2052 | inode->i_op->truncate_range(inode, offset, end); | 2061 | inode->i_op->truncate_range(inode, offset, end); |
2053 | up_write(&inode->i_alloc_sem); | 2062 | up_write(&inode->i_alloc_sem); |
2054 | mutex_unlock(&inode->i_mutex); | 2063 | mutex_unlock(&inode->i_mutex); |
@@ -2206,7 +2215,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2206 | 2215 | ||
2207 | /* No need to invalidate - it was non-present before */ | 2216 | /* No need to invalidate - it was non-present before */ |
2208 | update_mmu_cache(vma, address, pte); | 2217 | update_mmu_cache(vma, address, pte); |
2209 | lazy_mmu_prot_update(pte); | ||
2210 | unlock: | 2218 | unlock: |
2211 | pte_unmap_unlock(page_table, ptl); | 2219 | pte_unmap_unlock(page_table, ptl); |
2212 | out: | 2220 | out: |
@@ -2297,10 +2305,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2297 | int write_access) | 2305 | int write_access) |
2298 | { | 2306 | { |
2299 | spinlock_t *ptl; | 2307 | spinlock_t *ptl; |
2300 | struct page *new_page; | 2308 | struct page *page, *nopage_page; |
2301 | struct address_space *mapping = NULL; | ||
2302 | pte_t entry; | 2309 | pte_t entry; |
2303 | unsigned int sequence = 0; | ||
2304 | int ret = VM_FAULT_MINOR; | 2310 | int ret = VM_FAULT_MINOR; |
2305 | int anon = 0; | 2311 | int anon = 0; |
2306 | struct page *dirty_page = NULL; | 2312 | struct page *dirty_page = NULL; |
@@ -2308,74 +2314,53 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2308 | pte_unmap(page_table); | 2314 | pte_unmap(page_table); |
2309 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2315 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
2310 | 2316 | ||
2311 | if (vma->vm_file) { | 2317 | nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); |
2312 | mapping = vma->vm_file->f_mapping; | ||
2313 | sequence = mapping->truncate_count; | ||
2314 | smp_rmb(); /* serializes i_size against truncate_count */ | ||
2315 | } | ||
2316 | retry: | ||
2317 | new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); | ||
2318 | /* | ||
2319 | * No smp_rmb is needed here as long as there's a full | ||
2320 | * spin_lock/unlock sequence inside the ->nopage callback | ||
2321 | * (for the pagecache lookup) that acts as an implicit | ||
2322 | * smp_mb() and prevents the i_size read to happen | ||
2323 | * after the next truncate_count read. | ||
2324 | */ | ||
2325 | |||
2326 | /* no page was available -- either SIGBUS, OOM or REFAULT */ | 2318 | /* no page was available -- either SIGBUS, OOM or REFAULT */ |
2327 | if (unlikely(new_page == NOPAGE_SIGBUS)) | 2319 | if (unlikely(nopage_page == NOPAGE_SIGBUS)) |
2328 | return VM_FAULT_SIGBUS; | 2320 | return VM_FAULT_SIGBUS; |
2329 | else if (unlikely(new_page == NOPAGE_OOM)) | 2321 | else if (unlikely(nopage_page == NOPAGE_OOM)) |
2330 | return VM_FAULT_OOM; | 2322 | return VM_FAULT_OOM; |
2331 | else if (unlikely(new_page == NOPAGE_REFAULT)) | 2323 | else if (unlikely(nopage_page == NOPAGE_REFAULT)) |
2332 | return VM_FAULT_MINOR; | 2324 | return VM_FAULT_MINOR; |
2333 | 2325 | ||
2326 | BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page)); | ||
2327 | /* | ||
2328 | * For consistency in subsequent calls, make the nopage_page always | ||
2329 | * locked. | ||
2330 | */ | ||
2331 | if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) | ||
2332 | lock_page(nopage_page); | ||
2333 | |||
2334 | /* | 2334 | /* |
2335 | * Should we do an early C-O-W break? | 2335 | * Should we do an early C-O-W break? |
2336 | */ | 2336 | */ |
2337 | page = nopage_page; | ||
2337 | if (write_access) { | 2338 | if (write_access) { |
2338 | if (!(vma->vm_flags & VM_SHARED)) { | 2339 | if (!(vma->vm_flags & VM_SHARED)) { |
2339 | struct page *page; | 2340 | if (unlikely(anon_vma_prepare(vma))) { |
2340 | 2341 | ret = VM_FAULT_OOM; | |
2341 | if (unlikely(anon_vma_prepare(vma))) | 2342 | goto out_error; |
2342 | goto oom; | 2343 | } |
2343 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, | 2344 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
2344 | vma, address); | 2345 | if (!page) { |
2345 | if (!page) | 2346 | ret = VM_FAULT_OOM; |
2346 | goto oom; | 2347 | goto out_error; |
2347 | copy_user_highpage(page, new_page, address, vma); | 2348 | } |
2348 | page_cache_release(new_page); | 2349 | copy_user_highpage(page, nopage_page, address, vma); |
2349 | new_page = page; | ||
2350 | anon = 1; | 2350 | anon = 1; |
2351 | |||
2352 | } else { | 2351 | } else { |
2353 | /* if the page will be shareable, see if the backing | 2352 | /* if the page will be shareable, see if the backing |
2354 | * address space wants to know that the page is about | 2353 | * address space wants to know that the page is about |
2355 | * to become writable */ | 2354 | * to become writable */ |
2356 | if (vma->vm_ops->page_mkwrite && | 2355 | if (vma->vm_ops->page_mkwrite && |
2357 | vma->vm_ops->page_mkwrite(vma, new_page) < 0 | 2356 | vma->vm_ops->page_mkwrite(vma, page) < 0) { |
2358 | ) { | 2357 | ret = VM_FAULT_SIGBUS; |
2359 | page_cache_release(new_page); | 2358 | goto out_error; |
2360 | return VM_FAULT_SIGBUS; | ||
2361 | } | 2359 | } |
2362 | } | 2360 | } |
2363 | } | 2361 | } |
2364 | 2362 | ||
2365 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 2363 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
2366 | /* | ||
2367 | * For a file-backed vma, someone could have truncated or otherwise | ||
2368 | * invalidated this page. If unmap_mapping_range got called, | ||
2369 | * retry getting the page. | ||
2370 | */ | ||
2371 | if (mapping && unlikely(sequence != mapping->truncate_count)) { | ||
2372 | pte_unmap_unlock(page_table, ptl); | ||
2373 | page_cache_release(new_page); | ||
2374 | cond_resched(); | ||
2375 | sequence = mapping->truncate_count; | ||
2376 | smp_rmb(); | ||
2377 | goto retry; | ||
2378 | } | ||
2379 | 2364 | ||
2380 | /* | 2365 | /* |
2381 | * This silly early PAGE_DIRTY setting removes a race | 2366 | * This silly early PAGE_DIRTY setting removes a race |
@@ -2388,43 +2373,51 @@ retry: | |||
2388 | * handle that later. | 2373 | * handle that later. |
2389 | */ | 2374 | */ |
2390 | /* Only go through if we didn't race with anybody else... */ | 2375 | /* Only go through if we didn't race with anybody else... */ |
2391 | if (pte_none(*page_table)) { | 2376 | if (likely(pte_none(*page_table))) { |
2392 | flush_icache_page(vma, new_page); | 2377 | flush_icache_page(vma, page); |
2393 | entry = mk_pte(new_page, vma->vm_page_prot); | 2378 | entry = mk_pte(page, vma->vm_page_prot); |
2394 | if (write_access) | 2379 | if (write_access) |
2395 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2380 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2396 | set_pte_at(mm, address, page_table, entry); | 2381 | set_pte_at(mm, address, page_table, entry); |
2397 | if (anon) { | 2382 | if (anon) { |
2398 | inc_mm_counter(mm, anon_rss); | 2383 | inc_mm_counter(mm, anon_rss); |
2399 | lru_cache_add_active(new_page); | 2384 | lru_cache_add_active(page); |
2400 | page_add_new_anon_rmap(new_page, vma, address); | 2385 | page_add_new_anon_rmap(page, vma, address); |
2401 | } else { | 2386 | } else { |
2402 | inc_mm_counter(mm, file_rss); | 2387 | inc_mm_counter(mm, file_rss); |
2403 | page_add_file_rmap(new_page); | 2388 | page_add_file_rmap(page); |
2404 | if (write_access) { | 2389 | if (write_access) { |
2405 | dirty_page = new_page; | 2390 | dirty_page = page; |
2406 | get_page(dirty_page); | 2391 | get_page(dirty_page); |
2407 | } | 2392 | } |
2408 | } | 2393 | } |
2394 | |||
2395 | /* no need to invalidate: a not-present page won't be cached */ | ||
2396 | update_mmu_cache(vma, address, entry); | ||
2397 | lazy_mmu_prot_update(entry); | ||
2409 | } else { | 2398 | } else { |
2410 | /* One of our sibling threads was faster, back out. */ | 2399 | if (anon) |
2411 | page_cache_release(new_page); | 2400 | page_cache_release(page); |
2412 | goto unlock; | 2401 | else |
2402 | anon = 1; /* not anon, but release nopage_page */ | ||
2413 | } | 2403 | } |
2414 | 2404 | ||
2415 | /* no need to invalidate: a not-present page shouldn't be cached */ | ||
2416 | update_mmu_cache(vma, address, entry); | ||
2417 | lazy_mmu_prot_update(entry); | ||
2418 | unlock: | ||
2419 | pte_unmap_unlock(page_table, ptl); | 2405 | pte_unmap_unlock(page_table, ptl); |
2420 | if (dirty_page) { | 2406 | |
2407 | out: | ||
2408 | unlock_page(nopage_page); | ||
2409 | if (anon) | ||
2410 | page_cache_release(nopage_page); | ||
2411 | else if (dirty_page) { | ||
2421 | set_page_dirty_balance(dirty_page); | 2412 | set_page_dirty_balance(dirty_page); |
2422 | put_page(dirty_page); | 2413 | put_page(dirty_page); |
2423 | } | 2414 | } |
2415 | |||
2424 | return ret; | 2416 | return ret; |
2425 | oom: | 2417 | |
2426 | page_cache_release(new_page); | 2418 | out_error: |
2427 | return VM_FAULT_OOM; | 2419 | anon = 1; /* relase nopage_page */ |
2420 | goto out; | ||
2428 | } | 2421 | } |
2429 | 2422 | ||
2430 | /* | 2423 | /* |