aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swap.c
diff options
context:
space:
mode:
authorMinchan Kim <minchan.kim@gmail.com>2011-03-22 19:32:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 20:44:03 -0400
commit315601809d124d046abd6c3ffa346d0dbd7aa29d (patch)
treebe48e1a0053e3ada3a5c25561923f1b87f8e1719 /mm/swap.c
parent481b4bb5e370aa69c1dc276bd08871ec01b41d2a (diff)
mm: deactivate invalidated pages
Recently, there are reported problem about thrashing. (http://marc.info/?l=rsync&m=128885034930933&w=2) It happens by backup workloads(ex, nightly rsync). That's because the workload makes just use-once pages and touches pages twice. It promotes the page into active list so that it results in working set page eviction. Some app developer want to support POSIX_FADV_NOREUSE. But other OSes don't support it, either. (http://marc.info/?l=linux-mm&m=128928979512086&w=2) By other approach, app developers use POSIX_FADV_DONTNEED. But it has a problem. If kernel meets page is writing during invalidate_mapping_pages, it can't work. It makes for application programmer to use it since they always have to sync data before calling fadivse(..POSIX_FADV_DONTNEED) to make sure the pages could be discardable. At last, they can't use deferred write of kernel so that they could see performance loss. (http://insights.oetiker.ch/linux/fadvise.html) In fact, invalidation is very big hint to reclaimer. It means we don't use the page any more. So let's move the writing page into inactive list's head if we can't truncate it right now. Why I move page to head of lru on this patch, Dirty/Writeback page would be flushed sooner or later. It can prevent writeout of pageout which is less effective than flusher's writeout. Originally, I reused lru_demote of Peter with some change so added his Signed-off-by. Signed-off-by: Minchan Kim <minchan.kim@gmail.com> Reported-by: Ben Gamari <bgamari.foss@gmail.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap.c')
-rw-r--r--mm/swap.c78
1 files changed, 78 insertions, 0 deletions
diff --git a/mm/swap.c b/mm/swap.c
index c02f93611a8..4aea806d0d4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -39,6 +39,7 @@ int page_cluster;
39 39
40static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); 40static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
41static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 41static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
42static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
42 43
43/* 44/*
44 * This path almost never happens for VM activity - pages are normally 45 * This path almost never happens for VM activity - pages are normally
@@ -347,6 +348,60 @@ void add_page_to_unevictable_list(struct page *page)
347} 348}
348 349
349/* 350/*
351 * If the page can not be invalidated, it is moved to the
352 * inactive list to speed up its reclaim. It is moved to the
353 * head of the list, rather than the tail, to give the flusher
354 * threads some time to write it out, as this is much more
355 * effective than the single-page writeout from reclaim.
356 */
357static void lru_deactivate(struct page *page, struct zone *zone)
358{
359 int lru, file;
360
361 if (!PageLRU(page) || !PageActive(page))
362 return;
363
364 /* Some processes are using the page */
365 if (page_mapped(page))
366 return;
367
368 file = page_is_file_cache(page);
369 lru = page_lru_base_type(page);
370 del_page_from_lru_list(zone, page, lru + LRU_ACTIVE);
371 ClearPageActive(page);
372 ClearPageReferenced(page);
373 add_page_to_lru_list(zone, page, lru);
374 __count_vm_event(PGDEACTIVATE);
375
376 update_page_reclaim_stat(zone, page, file, 0);
377}
378
379static void ____pagevec_lru_deactivate(struct pagevec *pvec)
380{
381 int i;
382 struct zone *zone = NULL;
383
384 for (i = 0; i < pagevec_count(pvec); i++) {
385 struct page *page = pvec->pages[i];
386 struct zone *pagezone = page_zone(page);
387
388 if (pagezone != zone) {
389 if (zone)
390 spin_unlock_irq(&zone->lru_lock);
391 zone = pagezone;
392 spin_lock_irq(&zone->lru_lock);
393 }
394 lru_deactivate(page, zone);
395 }
396 if (zone)
397 spin_unlock_irq(&zone->lru_lock);
398
399 release_pages(pvec->pages, pvec->nr, pvec->cold);
400 pagevec_reinit(pvec);
401}
402
403
404/*
350 * Drain pages out of the cpu's pagevecs. 405 * Drain pages out of the cpu's pagevecs.
351 * Either "cpu" is the current CPU, and preemption has already been 406 * Either "cpu" is the current CPU, and preemption has already been
352 * disabled; or "cpu" is being hot-unplugged, and is already dead. 407 * disabled; or "cpu" is being hot-unplugged, and is already dead.
@@ -372,6 +427,29 @@ static void drain_cpu_pagevecs(int cpu)
372 pagevec_move_tail(pvec); 427 pagevec_move_tail(pvec);
373 local_irq_restore(flags); 428 local_irq_restore(flags);
374 } 429 }
430
431 pvec = &per_cpu(lru_deactivate_pvecs, cpu);
432 if (pagevec_count(pvec))
433 ____pagevec_lru_deactivate(pvec);
434}
435
436/**
437 * deactivate_page - forcefully deactivate a page
438 * @page: page to deactivate
439 *
440 * This function hints the VM that @page is a good reclaim candidate,
441 * for example if its invalidation fails due to the page being dirty
442 * or under writeback.
443 */
444void deactivate_page(struct page *page)
445{
446 if (likely(get_page_unless_zero(page))) {
447 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
448
449 if (!pagevec_add(pvec, page))
450 ____pagevec_lru_deactivate(pvec);
451 put_cpu_var(lru_deactivate_pvecs);
452 }
375} 453}
376 454
377void lru_add_drain(void) 455void lru_add_drain(void)