aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMartin Hicks <mort@sgi.com>2005-06-21 20:14:41 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:14 -0400
commit753ee728964e5afb80c17659cc6c3a6fd0a42fe0 (patch)
tree41c9a7700d0858c1f77c5bdaba97e5b636f69b06 /mm/page_alloc.c
parentbfbb38fb808ac23ef44472d05d9bb36edfb49ed0 (diff)
[PATCH] VM: early zone reclaim
This is the core of the (much simplified) early reclaim. The goal of this patch is to reclaim some easily-freed pages from a zone before falling back onto another zone. One of the major uses of this is NUMA machines. With the default allocator behavior the allocator would look for memory in another zone, which might be off-node, before trying to reclaim from the current zone. This adds a zone tuneable to enable early zone reclaim. It is selected on a per-zone basis and is turned on/off via syscall. Adding some extra throttling on the reclaim was also required (patch 4/4). Without the machine would grind to a crawl when doing a "make -j" kernel build. Even with this patch the System Time is higher on average, but it seems tolerable. Here are some numbers for kernbench runs on a 2-node, 4cpu, 8Gig RAM Altix in the "make -j" run: wall user sys %cpu ctx sw. sleeps ---- ---- --- ---- ------ ------ No patch 1009 1384 847 258 298170 504402 w/patch, no reclaim 880 1376 667 288 254064 396745 w/patch & reclaim 1079 1385 926 252 291625 548873 These numbers are the average of 2 runs of 3 "make -j" runs done right after system boot. Run-to-run variability for "make -j" is huge, so these numbers aren't terribly useful except to seee that with reclaim the benchmark still finishes in a reasonable amount of time. I also looked at the NUMA hit/miss stats for the "make -j" runs and the reclaim doesn't make any difference when the machine is thrashing away. Doing a "make -j8" on a single node that is filled with page cache pages takes 700 seconds with reclaim turned on and 735 seconds without reclaim (due to remote memory accesses). The simple zone_reclaim syscall program is at http://www.bork.org/~mort/sgi/zone_reclaim.c Signed-off-by: Martin Hicks <mort@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c33
1 files changed, 28 insertions, 5 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40169f0b7e9e..3c0f69ded6b5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -724,6 +724,14 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
724 return 1; 724 return 1;
725} 725}
726 726
727static inline int
728should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
729{
730 if (!z->reclaim_pages)
731 return 0;
732 return 1;
733}
734
727/* 735/*
728 * This is the 'heart' of the zoned buddy allocator. 736 * This is the 'heart' of the zoned buddy allocator.
729 */ 737 */
@@ -760,17 +768,32 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
760 768
761 classzone_idx = zone_idx(zones[0]); 769 classzone_idx = zone_idx(zones[0]);
762 770
763 restart: 771restart:
764 /* Go through the zonelist once, looking for a zone with enough free */ 772 /* Go through the zonelist once, looking for a zone with enough free */
765 for (i = 0; (z = zones[i]) != NULL; i++) { 773 for (i = 0; (z = zones[i]) != NULL; i++) {
766 774 int do_reclaim = should_reclaim_zone(z, gfp_mask);
767 if (!zone_watermark_ok(z, order, z->pages_low,
768 classzone_idx, 0, 0))
769 continue;
770 775
771 if (!cpuset_zone_allowed(z)) 776 if (!cpuset_zone_allowed(z))
772 continue; 777 continue;
773 778
779 /*
780 * If the zone is to attempt early page reclaim then this loop
781 * will try to reclaim pages and check the watermark a second
782 * time before giving up and falling back to the next zone.
783 */
784zone_reclaim_retry:
785 if (!zone_watermark_ok(z, order, z->pages_low,
786 classzone_idx, 0, 0)) {
787 if (!do_reclaim)
788 continue;
789 else {
790 zone_reclaim(z, gfp_mask, order);
791 /* Only try reclaim once */
792 do_reclaim = 0;
793 goto zone_reclaim_retry;
794 }
795 }
796
774 page = buffered_rmqueue(z, order, gfp_mask); 797 page = buffered_rmqueue(z, order, gfp_mask);
775 if (page) 798 if (page)
776 goto got_pg; 799 goto got_pg;