diff options
author | Lee Schermerhorn <lee.schermerhorn@hp.com> | 2008-10-18 23:26:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:52:31 -0400 |
commit | 64d6519dda3905dfb94d3f93c07c5f263f41813f (patch) | |
tree | 62cae88edcb858c42a5b4b568eb77801299250bb /mm | |
parent | 5344b7e648980cc2ca613ec03a56a8222ff48820 (diff) |
swap: cull unevictable pages in fault path
In the fault paths that install new anonymous pages, check whether the
page is evictable or not using lru_cache_add_active_or_unevictable(). If
the page is evictable, just add it to the active lru list [via the pagevec
cache], else add it to the unevictable list.
This "proactive" culling in the fault path mimics the handling of mlocked
pages in Nick Piggin's series to keep mlocked pages off the lru lists.
Notes:
1) This patch is optional--e.g., if one is concerned about the
additional test in the fault path. We can defer the moving of
nonreclaimable pages until when vmscan [shrink_*_list()]
encounters them. Vmscan will only need to handle such pages
once, but if there are a lot of them it could impact system
performance.
2) The 'vma' argument to page_evictable() is require to notice that
we're faulting a page into an mlock()ed vma w/o having to scan the
page's rmap in the fault path. Culling mlock()ed anon pages is
currently the only reason for this patch.
3) We can't cull swap pages in read_swap_cache_async() because the
vma argument doesn't necessarily correspond to the swap cache
offset passed in by swapin_readahead(). This could [did!] result
in mlocking pages in non-VM_LOCKED vmas if [when] we tried to
cull in this path.
4) Move set_pte_at() to after where we add page to lru to keep it
hidden from other tasks that might walk the page table.
We already do it in this order in do_anonymous() page. And,
these are COW'd anon pages. Is this safe?
[riel@redhat.com: undo an overzealous code cleanup]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 18 | ||||
-rw-r--r-- | mm/swap.c | 21 |
2 files changed, 31 insertions, 8 deletions
diff --git a/mm/memory.c b/mm/memory.c index 9fef7272fb9e..450127f4c582 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1922,12 +1922,13 @@ gotten: | |||
1922 | * thread doing COW. | 1922 | * thread doing COW. |
1923 | */ | 1923 | */ |
1924 | ptep_clear_flush_notify(vma, address, page_table); | 1924 | ptep_clear_flush_notify(vma, address, page_table); |
1925 | set_pte_at(mm, address, page_table, entry); | ||
1926 | update_mmu_cache(vma, address, entry); | ||
1927 | SetPageSwapBacked(new_page); | 1925 | SetPageSwapBacked(new_page); |
1928 | lru_cache_add_active_anon(new_page); | 1926 | lru_cache_add_active_or_unevictable(new_page, vma); |
1929 | page_add_new_anon_rmap(new_page, vma, address); | 1927 | page_add_new_anon_rmap(new_page, vma, address); |
1930 | 1928 | ||
1929 | //TODO: is this safe? do_anonymous_page() does it this way. | ||
1930 | set_pte_at(mm, address, page_table, entry); | ||
1931 | update_mmu_cache(vma, address, entry); | ||
1931 | if (old_page) { | 1932 | if (old_page) { |
1932 | /* | 1933 | /* |
1933 | * Only after switching the pte to the new page may | 1934 | * Only after switching the pte to the new page may |
@@ -2420,7 +2421,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2420 | goto release; | 2421 | goto release; |
2421 | inc_mm_counter(mm, anon_rss); | 2422 | inc_mm_counter(mm, anon_rss); |
2422 | SetPageSwapBacked(page); | 2423 | SetPageSwapBacked(page); |
2423 | lru_cache_add_active_anon(page); | 2424 | lru_cache_add_active_or_unevictable(page, vma); |
2424 | page_add_new_anon_rmap(page, vma, address); | 2425 | page_add_new_anon_rmap(page, vma, address); |
2425 | set_pte_at(mm, address, page_table, entry); | 2426 | set_pte_at(mm, address, page_table, entry); |
2426 | 2427 | ||
@@ -2564,12 +2565,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2564 | entry = mk_pte(page, vma->vm_page_prot); | 2565 | entry = mk_pte(page, vma->vm_page_prot); |
2565 | if (flags & FAULT_FLAG_WRITE) | 2566 | if (flags & FAULT_FLAG_WRITE) |
2566 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2567 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2567 | set_pte_at(mm, address, page_table, entry); | ||
2568 | if (anon) { | 2568 | if (anon) { |
2569 | inc_mm_counter(mm, anon_rss); | 2569 | inc_mm_counter(mm, anon_rss); |
2570 | SetPageSwapBacked(page); | 2570 | SetPageSwapBacked(page); |
2571 | lru_cache_add_active_anon(page); | 2571 | lru_cache_add_active_or_unevictable(page, vma); |
2572 | page_add_new_anon_rmap(page, vma, address); | 2572 | page_add_new_anon_rmap(page, vma, address); |
2573 | } else { | 2573 | } else { |
2574 | inc_mm_counter(mm, file_rss); | 2574 | inc_mm_counter(mm, file_rss); |
2575 | page_add_file_rmap(page); | 2575 | page_add_file_rmap(page); |
@@ -2578,6 +2578,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2578 | get_page(dirty_page); | 2578 | get_page(dirty_page); |
2579 | } | 2579 | } |
2580 | } | 2580 | } |
2581 | //TODO: is this safe? do_anonymous_page() does it this way. | ||
2582 | set_pte_at(mm, address, page_table, entry); | ||
2581 | 2583 | ||
2582 | /* no need to invalidate: a not-present page won't be cached */ | 2584 | /* no need to invalidate: a not-present page won't be cached */ |
2583 | update_mmu_cache(vma, address, entry); | 2585 | update_mmu_cache(vma, address, entry); |
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/backing-dev.h> | 31 | #include <linux/backing-dev.h> |
32 | #include <linux/memcontrol.h> | 32 | #include <linux/memcontrol.h> |
33 | 33 | ||
34 | #include "internal.h" | ||
35 | |||
34 | /* How many pages do we try to swap or page in/out together? */ | 36 | /* How many pages do we try to swap or page in/out together? */ |
35 | int page_cluster; | 37 | int page_cluster; |
36 | 38 | ||
@@ -244,6 +246,25 @@ void add_page_to_unevictable_list(struct page *page) | |||
244 | spin_unlock_irq(&zone->lru_lock); | 246 | spin_unlock_irq(&zone->lru_lock); |
245 | } | 247 | } |
246 | 248 | ||
249 | /** | ||
250 | * lru_cache_add_active_or_unevictable | ||
251 | * @page: the page to be added to LRU | ||
252 | * @vma: vma in which page is mapped for determining reclaimability | ||
253 | * | ||
254 | * place @page on active or unevictable LRU list, depending on | ||
255 | * page_evictable(). Note that if the page is not evictable, | ||
256 | * it goes directly back onto it's zone's unevictable list. It does | ||
257 | * NOT use a per cpu pagevec. | ||
258 | */ | ||
259 | void lru_cache_add_active_or_unevictable(struct page *page, | ||
260 | struct vm_area_struct *vma) | ||
261 | { | ||
262 | if (page_evictable(page, vma)) | ||
263 | lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page)); | ||
264 | else | ||
265 | add_page_to_unevictable_list(page); | ||
266 | } | ||
267 | |||
247 | /* | 268 | /* |
248 | * Drain pages out of the cpu's pagevecs. | 269 | * Drain pages out of the cpu's pagevecs. |
249 | * Either "cpu" is the current CPU, and preemption has already been | 270 | * Either "cpu" is the current CPU, and preemption has already been |