aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMinchan Kim <minchan.kim@gmail.com>2011-03-22 19:32:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 20:44:03 -0400
commit315601809d124d046abd6c3ffa346d0dbd7aa29d (patch)
treebe48e1a0053e3ada3a5c25561923f1b87f8e1719
parent481b4bb5e370aa69c1dc276bd08871ec01b41d2a (diff)
mm: deactivate invalidated pages
Recently, there are reported problem about thrashing. (http://marc.info/?l=rsync&m=128885034930933&w=2) It happens by backup workloads(ex, nightly rsync). That's because the workload makes just use-once pages and touches pages twice. It promotes the page into active list so that it results in working set page eviction. Some app developer want to support POSIX_FADV_NOREUSE. But other OSes don't support it, either. (http://marc.info/?l=linux-mm&m=128928979512086&w=2) By other approach, app developers use POSIX_FADV_DONTNEED. But it has a problem. If kernel meets page is writing during invalidate_mapping_pages, it can't work. It makes for application programmer to use it since they always have to sync data before calling fadivse(..POSIX_FADV_DONTNEED) to make sure the pages could be discardable. At last, they can't use deferred write of kernel so that they could see performance loss. (http://insights.oetiker.ch/linux/fadvise.html) In fact, invalidation is very big hint to reclaimer. It means we don't use the page any more. So let's move the writing page into inactive list's head if we can't truncate it right now. Why I move page to head of lru on this patch, Dirty/Writeback page would be flushed sooner or later. It can prevent writeout of pageout which is less effective than flusher's writeout. Originally, I reused lru_demote of Peter with some change so added his Signed-off-by. Signed-off-by: Minchan Kim <minchan.kim@gmail.com> Reported-by: Ben Gamari <bgamari.foss@gmail.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/swap.h1
-rw-r--r--mm/swap.c78
-rw-r--r--mm/truncate.c17
3 files changed, 91 insertions, 5 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d559325d919..c335055c4253 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -215,6 +215,7 @@ extern void mark_page_accessed(struct page *);
215extern void lru_add_drain(void); 215extern void lru_add_drain(void);
216extern int lru_add_drain_all(void); 216extern int lru_add_drain_all(void);
217extern void rotate_reclaimable_page(struct page *page); 217extern void rotate_reclaimable_page(struct page *page);
218extern void deactivate_page(struct page *page);
218extern void swap_setup(void); 219extern void swap_setup(void);
219 220
220extern void add_page_to_unevictable_list(struct page *page); 221extern void add_page_to_unevictable_list(struct page *page);
diff --git a/mm/swap.c b/mm/swap.c
index c02f93611a84..4aea806d0d44 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -39,6 +39,7 @@ int page_cluster;
39 39
40static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); 40static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
41static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 41static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
42static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
42 43
43/* 44/*
44 * This path almost never happens for VM activity - pages are normally 45 * This path almost never happens for VM activity - pages are normally
@@ -347,6 +348,60 @@ void add_page_to_unevictable_list(struct page *page)
347} 348}
348 349
349/* 350/*
351 * If the page can not be invalidated, it is moved to the
352 * inactive list to speed up its reclaim. It is moved to the
353 * head of the list, rather than the tail, to give the flusher
354 * threads some time to write it out, as this is much more
355 * effective than the single-page writeout from reclaim.
356 */
357static void lru_deactivate(struct page *page, struct zone *zone)
358{
359 int lru, file;
360
361 if (!PageLRU(page) || !PageActive(page))
362 return;
363
364 /* Some processes are using the page */
365 if (page_mapped(page))
366 return;
367
368 file = page_is_file_cache(page);
369 lru = page_lru_base_type(page);
370 del_page_from_lru_list(zone, page, lru + LRU_ACTIVE);
371 ClearPageActive(page);
372 ClearPageReferenced(page);
373 add_page_to_lru_list(zone, page, lru);
374 __count_vm_event(PGDEACTIVATE);
375
376 update_page_reclaim_stat(zone, page, file, 0);
377}
378
379static void ____pagevec_lru_deactivate(struct pagevec *pvec)
380{
381 int i;
382 struct zone *zone = NULL;
383
384 for (i = 0; i < pagevec_count(pvec); i++) {
385 struct page *page = pvec->pages[i];
386 struct zone *pagezone = page_zone(page);
387
388 if (pagezone != zone) {
389 if (zone)
390 spin_unlock_irq(&zone->lru_lock);
391 zone = pagezone;
392 spin_lock_irq(&zone->lru_lock);
393 }
394 lru_deactivate(page, zone);
395 }
396 if (zone)
397 spin_unlock_irq(&zone->lru_lock);
398
399 release_pages(pvec->pages, pvec->nr, pvec->cold);
400 pagevec_reinit(pvec);
401}
402
403
404/*
350 * Drain pages out of the cpu's pagevecs. 405 * Drain pages out of the cpu's pagevecs.
351 * Either "cpu" is the current CPU, and preemption has already been 406 * Either "cpu" is the current CPU, and preemption has already been
352 * disabled; or "cpu" is being hot-unplugged, and is already dead. 407 * disabled; or "cpu" is being hot-unplugged, and is already dead.
@@ -372,6 +427,29 @@ static void drain_cpu_pagevecs(int cpu)
372 pagevec_move_tail(pvec); 427 pagevec_move_tail(pvec);
373 local_irq_restore(flags); 428 local_irq_restore(flags);
374 } 429 }
430
431 pvec = &per_cpu(lru_deactivate_pvecs, cpu);
432 if (pagevec_count(pvec))
433 ____pagevec_lru_deactivate(pvec);
434}
435
436/**
437 * deactivate_page - forcefully deactivate a page
438 * @page: page to deactivate
439 *
440 * This function hints the VM that @page is a good reclaim candidate,
441 * for example if its invalidation fails due to the page being dirty
442 * or under writeback.
443 */
444void deactivate_page(struct page *page)
445{
446 if (likely(get_page_unless_zero(page))) {
447 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
448
449 if (!pagevec_add(pvec, page))
450 ____pagevec_lru_deactivate(pvec);
451 put_cpu_var(lru_deactivate_pvecs);
452 }
375} 453}
376 454
377void lru_add_drain(void) 455void lru_add_drain(void)
diff --git a/mm/truncate.c b/mm/truncate.c
index 3d2ae1f423dc..a95667529135 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -321,11 +321,12 @@ EXPORT_SYMBOL(truncate_inode_pages);
321 * pagetables. 321 * pagetables.
322 */ 322 */
323unsigned long invalidate_mapping_pages(struct address_space *mapping, 323unsigned long invalidate_mapping_pages(struct address_space *mapping,
324 pgoff_t start, pgoff_t end) 324 pgoff_t start, pgoff_t end)
325{ 325{
326 struct pagevec pvec; 326 struct pagevec pvec;
327 pgoff_t next = start; 327 pgoff_t next = start;
328 unsigned long ret = 0; 328 unsigned long ret;
329 unsigned long count = 0;
329 int i; 330 int i;
330 331
331 pagevec_init(&pvec, 0); 332 pagevec_init(&pvec, 0);
@@ -352,9 +353,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
352 if (lock_failed) 353 if (lock_failed)
353 continue; 354 continue;
354 355
355 ret += invalidate_inode_page(page); 356 ret = invalidate_inode_page(page);
356
357 unlock_page(page); 357 unlock_page(page);
358 /*
359 * Invalidation is a hint that the page is no longer
360 * of interest and try to speed up its reclaim.
361 */
362 if (!ret)
363 deactivate_page(page);
364 count += ret;
358 if (next > end) 365 if (next > end)
359 break; 366 break;
360 } 367 }
@@ -362,7 +369,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
362 mem_cgroup_uncharge_end(); 369 mem_cgroup_uncharge_end();
363 cond_resched(); 370 cond_resched();
364 } 371 }
365 return ret; 372 return count;
366} 373}
367EXPORT_SYMBOL(invalidate_mapping_pages); 374EXPORT_SYMBOL(invalidate_mapping_pages);
368 375