diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 127 |
1 files changed, 118 insertions, 9 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 83a45d35468b..f807afda86f2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -108,11 +108,11 @@ | |||
108 | */ | 108 | */ |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Remove a page from the page cache and free it. Caller has to make | 111 | * Delete a page from the page cache and free it. Caller has to make |
112 | * sure the page is locked and that nobody else uses it - or that usage | 112 | * sure the page is locked and that nobody else uses it - or that usage |
113 | * is safe. The caller must hold the mapping's tree_lock. | 113 | * is safe. The caller must hold the mapping's tree_lock. |
114 | */ | 114 | */ |
115 | void __remove_from_page_cache(struct page *page) | 115 | void __delete_from_page_cache(struct page *page) |
116 | { | 116 | { |
117 | struct address_space *mapping = page->mapping; | 117 | struct address_space *mapping = page->mapping; |
118 | 118 | ||
@@ -137,7 +137,15 @@ void __remove_from_page_cache(struct page *page) | |||
137 | } | 137 | } |
138 | } | 138 | } |
139 | 139 | ||
140 | void remove_from_page_cache(struct page *page) | 140 | /** |
141 | * delete_from_page_cache - delete page from page cache | ||
142 | * @page: the page which the kernel is trying to remove from page cache | ||
143 | * | ||
144 | * This must be called only on pages that have been verified to be in the page | ||
145 | * cache and locked. It will never put the page into the free list, the caller | ||
146 | * has a reference on the page. | ||
147 | */ | ||
148 | void delete_from_page_cache(struct page *page) | ||
141 | { | 149 | { |
142 | struct address_space *mapping = page->mapping; | 150 | struct address_space *mapping = page->mapping; |
143 | void (*freepage)(struct page *); | 151 | void (*freepage)(struct page *); |
@@ -146,14 +154,15 @@ void remove_from_page_cache(struct page *page) | |||
146 | 154 | ||
147 | freepage = mapping->a_ops->freepage; | 155 | freepage = mapping->a_ops->freepage; |
148 | spin_lock_irq(&mapping->tree_lock); | 156 | spin_lock_irq(&mapping->tree_lock); |
149 | __remove_from_page_cache(page); | 157 | __delete_from_page_cache(page); |
150 | spin_unlock_irq(&mapping->tree_lock); | 158 | spin_unlock_irq(&mapping->tree_lock); |
151 | mem_cgroup_uncharge_cache_page(page); | 159 | mem_cgroup_uncharge_cache_page(page); |
152 | 160 | ||
153 | if (freepage) | 161 | if (freepage) |
154 | freepage(page); | 162 | freepage(page); |
163 | page_cache_release(page); | ||
155 | } | 164 | } |
156 | EXPORT_SYMBOL(remove_from_page_cache); | 165 | EXPORT_SYMBOL(delete_from_page_cache); |
157 | 166 | ||
158 | static int sync_page(void *word) | 167 | static int sync_page(void *word) |
159 | { | 168 | { |
@@ -387,6 +396,76 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
387 | EXPORT_SYMBOL(filemap_write_and_wait_range); | 396 | EXPORT_SYMBOL(filemap_write_and_wait_range); |
388 | 397 | ||
389 | /** | 398 | /** |
399 | * replace_page_cache_page - replace a pagecache page with a new one | ||
400 | * @old: page to be replaced | ||
401 | * @new: page to replace with | ||
402 | * @gfp_mask: allocation mode | ||
403 | * | ||
404 | * This function replaces a page in the pagecache with a new one. On | ||
405 | * success it acquires the pagecache reference for the new page and | ||
406 | * drops it for the old page. Both the old and new pages must be | ||
407 | * locked. This function does not add the new page to the LRU, the | ||
408 | * caller must do that. | ||
409 | * | ||
410 | * The remove + add is atomic. The only way this function can fail is | ||
411 | * memory allocation failure. | ||
412 | */ | ||
413 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | ||
414 | { | ||
415 | int error; | ||
416 | struct mem_cgroup *memcg = NULL; | ||
417 | |||
418 | VM_BUG_ON(!PageLocked(old)); | ||
419 | VM_BUG_ON(!PageLocked(new)); | ||
420 | VM_BUG_ON(new->mapping); | ||
421 | |||
422 | /* | ||
423 | * This is not page migration, but prepare_migration and | ||
424 | * end_migration does enough work for charge replacement. | ||
425 | * | ||
426 | * In the longer term we probably want a specialized function | ||
427 | * for moving the charge from old to new in a more efficient | ||
428 | * manner. | ||
429 | */ | ||
430 | error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); | ||
431 | if (error) | ||
432 | return error; | ||
433 | |||
434 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | ||
435 | if (!error) { | ||
436 | struct address_space *mapping = old->mapping; | ||
437 | void (*freepage)(struct page *); | ||
438 | |||
439 | pgoff_t offset = old->index; | ||
440 | freepage = mapping->a_ops->freepage; | ||
441 | |||
442 | page_cache_get(new); | ||
443 | new->mapping = mapping; | ||
444 | new->index = offset; | ||
445 | |||
446 | spin_lock_irq(&mapping->tree_lock); | ||
447 | __delete_from_page_cache(old); | ||
448 | error = radix_tree_insert(&mapping->page_tree, offset, new); | ||
449 | BUG_ON(error); | ||
450 | mapping->nrpages++; | ||
451 | __inc_zone_page_state(new, NR_FILE_PAGES); | ||
452 | if (PageSwapBacked(new)) | ||
453 | __inc_zone_page_state(new, NR_SHMEM); | ||
454 | spin_unlock_irq(&mapping->tree_lock); | ||
455 | radix_tree_preload_end(); | ||
456 | if (freepage) | ||
457 | freepage(old); | ||
458 | page_cache_release(old); | ||
459 | mem_cgroup_end_migration(memcg, old, new, true); | ||
460 | } else { | ||
461 | mem_cgroup_end_migration(memcg, old, new, false); | ||
462 | } | ||
463 | |||
464 | return error; | ||
465 | } | ||
466 | EXPORT_SYMBOL_GPL(replace_page_cache_page); | ||
467 | |||
468 | /** | ||
390 | * add_to_page_cache_locked - add a locked page to the pagecache | 469 | * add_to_page_cache_locked - add a locked page to the pagecache |
391 | * @page: page to add | 470 | * @page: page to add |
392 | * @mapping: the page's address_space | 471 | * @mapping: the page's address_space |
@@ -621,8 +700,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, | |||
621 | __lock_page(page); | 700 | __lock_page(page); |
622 | return 1; | 701 | return 1; |
623 | } else { | 702 | } else { |
624 | up_read(&mm->mmap_sem); | 703 | if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) { |
625 | wait_on_page_locked(page); | 704 | up_read(&mm->mmap_sem); |
705 | wait_on_page_locked(page); | ||
706 | } | ||
626 | return 0; | 707 | return 0; |
627 | } | 708 | } |
628 | } | 709 | } |
@@ -782,9 +863,13 @@ repeat: | |||
782 | page = radix_tree_deref_slot((void **)pages[i]); | 863 | page = radix_tree_deref_slot((void **)pages[i]); |
783 | if (unlikely(!page)) | 864 | if (unlikely(!page)) |
784 | continue; | 865 | continue; |
866 | |||
867 | /* | ||
868 | * This can only trigger when the entry at index 0 moves out | ||
869 | * of or back to the root: none yet gotten, safe to restart. | ||
870 | */ | ||
785 | if (radix_tree_deref_retry(page)) { | 871 | if (radix_tree_deref_retry(page)) { |
786 | if (ret) | 872 | WARN_ON(start | i); |
787 | start = pages[ret-1]->index; | ||
788 | goto restart; | 873 | goto restart; |
789 | } | 874 | } |
790 | 875 | ||
@@ -800,6 +885,13 @@ repeat: | |||
800 | pages[ret] = page; | 885 | pages[ret] = page; |
801 | ret++; | 886 | ret++; |
802 | } | 887 | } |
888 | |||
889 | /* | ||
890 | * If all entries were removed before we could secure them, | ||
891 | * try again, because callers stop trying once 0 is returned. | ||
892 | */ | ||
893 | if (unlikely(!ret && nr_found)) | ||
894 | goto restart; | ||
803 | rcu_read_unlock(); | 895 | rcu_read_unlock(); |
804 | return ret; | 896 | return ret; |
805 | } | 897 | } |
@@ -834,6 +926,11 @@ repeat: | |||
834 | page = radix_tree_deref_slot((void **)pages[i]); | 926 | page = radix_tree_deref_slot((void **)pages[i]); |
835 | if (unlikely(!page)) | 927 | if (unlikely(!page)) |
836 | continue; | 928 | continue; |
929 | |||
930 | /* | ||
931 | * This can only trigger when the entry at index 0 moves out | ||
932 | * of or back to the root: none yet gotten, safe to restart. | ||
933 | */ | ||
837 | if (radix_tree_deref_retry(page)) | 934 | if (radix_tree_deref_retry(page)) |
838 | goto restart; | 935 | goto restart; |
839 | 936 | ||
@@ -894,6 +991,11 @@ repeat: | |||
894 | page = radix_tree_deref_slot((void **)pages[i]); | 991 | page = radix_tree_deref_slot((void **)pages[i]); |
895 | if (unlikely(!page)) | 992 | if (unlikely(!page)) |
896 | continue; | 993 | continue; |
994 | |||
995 | /* | ||
996 | * This can only trigger when the entry at index 0 moves out | ||
997 | * of or back to the root: none yet gotten, safe to restart. | ||
998 | */ | ||
897 | if (radix_tree_deref_retry(page)) | 999 | if (radix_tree_deref_retry(page)) |
898 | goto restart; | 1000 | goto restart; |
899 | 1001 | ||
@@ -909,6 +1011,13 @@ repeat: | |||
909 | pages[ret] = page; | 1011 | pages[ret] = page; |
910 | ret++; | 1012 | ret++; |
911 | } | 1013 | } |
1014 | |||
1015 | /* | ||
1016 | * If all entries were removed before we could secure them, | ||
1017 | * try again, because callers stop trying once 0 is returned. | ||
1018 | */ | ||
1019 | if (unlikely(!ret && nr_found)) | ||
1020 | goto restart; | ||
912 | rcu_read_unlock(); | 1021 | rcu_read_unlock(); |
913 | 1022 | ||
914 | if (ret) | 1023 | if (ret) |