diff options
author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:18 -0400 |
commit | 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (patch) | |
tree | 547176a090beb787722a153cf2b8b942dc0e68db /include/linux | |
parent | 18ea7e710d2452fa726814a406779188028cf1bf (diff) |
mm: use two zonelist that are filtered by GFP mask
Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations. Based on the zones
allowed by a gfp mask, one of these zonelists is selected. All of these
zonelists consume memory and occupy cache lines.
This patch replaces the multiple zonelists per-node with two zonelists. The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages. The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.
An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/gfp.h | 13 | ||||
-rw-r--r-- | include/linux/mmzone.h | 65 |
2 files changed, 54 insertions, 24 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e865d51f1c74..e1c6064cb6c7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags) | |||
151 | * virtual kernel addresses to the allocated page(s). | 151 | * virtual kernel addresses to the allocated page(s). |
152 | */ | 152 | */ |
153 | 153 | ||
154 | static inline int gfp_zonelist(gfp_t flags) | ||
155 | { | ||
156 | if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) | ||
157 | return 1; | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
154 | /* | 162 | /* |
155 | * We get the zone list from the current node and the gfp_mask. | 163 | * We get the zone list from the current node and the gfp_mask. |
156 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. | 164 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. |
157 | * There are many zonelists per node, two for each active zone. | 165 | * There are two zonelists per node, one for all zones with memory and |
166 | * one containing just zones from the node the zonelist belongs to. | ||
158 | * | 167 | * |
159 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets | 168 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets |
160 | * optimized to &contig_page_data at compile-time. | 169 | * optimized to &contig_page_data at compile-time. |
161 | */ | 170 | */ |
162 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) | 171 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) |
163 | { | 172 | { |
164 | return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); | 173 | return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); |
165 | } | 174 | } |
166 | 175 | ||
167 | #ifndef HAVE_ARCH_FREE_PAGE | 176 | #ifndef HAVE_ARCH_FREE_PAGE |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 451eaa13bc28..d5c33a0b89e9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) | |||
393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the | 393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the |
394 | * allocations to a single node for GFP_THISNODE. | 394 | * allocations to a single node for GFP_THISNODE. |
395 | * | 395 | * |
396 | * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback | 396 | * [0] : Zonelist with fallback |
397 | * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) | 397 | * [1] : No fallback (GFP_THISNODE) |
398 | */ | 398 | */ |
399 | #define MAX_ZONELISTS (2 * MAX_NR_ZONES) | 399 | #define MAX_ZONELISTS 2 |
400 | 400 | ||
401 | 401 | ||
402 | /* | 402 | /* |
@@ -464,7 +464,7 @@ struct zonelist_cache { | |||
464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ | 464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ |
465 | }; | 465 | }; |
466 | #else | 466 | #else |
467 | #define MAX_ZONELISTS MAX_NR_ZONES | 467 | #define MAX_ZONELISTS 1 |
468 | struct zonelist_cache; | 468 | struct zonelist_cache; |
469 | #endif | 469 | #endif |
470 | 470 | ||
@@ -486,24 +486,6 @@ struct zonelist { | |||
486 | #endif | 486 | #endif |
487 | }; | 487 | }; |
488 | 488 | ||
489 | #ifdef CONFIG_NUMA | ||
490 | /* | ||
491 | * Only custom zonelists like MPOL_BIND need to be filtered as part of | ||
492 | * policies. As described in the comment for struct zonelist_cache, these | ||
493 | * zonelists will not have a zlcache so zlcache_ptr will not be set. Use | ||
494 | * that to determine if the zonelists needs to be filtered or not. | ||
495 | */ | ||
496 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
497 | { | ||
498 | return !zonelist->zlcache_ptr; | ||
499 | } | ||
500 | #else | ||
501 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
502 | { | ||
503 | return 0; | ||
504 | } | ||
505 | #endif /* CONFIG_NUMA */ | ||
506 | |||
507 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 489 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
508 | struct node_active_region { | 490 | struct node_active_region { |
509 | unsigned long start_pfn; | 491 | unsigned long start_pfn; |
@@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone); | |||
731 | zone; \ | 713 | zone; \ |
732 | zone = next_zone(zone)) | 714 | zone = next_zone(zone)) |
733 | 715 | ||
716 | /* Returns the first zone at or below highest_zoneidx in a zonelist */ | ||
717 | static inline struct zone **first_zones_zonelist(struct zonelist *zonelist, | ||
718 | enum zone_type highest_zoneidx) | ||
719 | { | ||
720 | struct zone **z; | ||
721 | |||
722 | /* Find the first suitable zone to use for the allocation */ | ||
723 | z = zonelist->zones; | ||
724 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
725 | z++; | ||
726 | |||
727 | return z; | ||
728 | } | ||
729 | |||
730 | /* Returns the next zone at or below highest_zoneidx in a zonelist */ | ||
731 | static inline struct zone **next_zones_zonelist(struct zone **z, | ||
732 | enum zone_type highest_zoneidx) | ||
733 | { | ||
734 | /* Find the next suitable zone to use for the allocation */ | ||
735 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
736 | z++; | ||
737 | |||
738 | return z; | ||
739 | } | ||
740 | |||
741 | /** | ||
742 | * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index | ||
743 | * @zone - The current zone in the iterator | ||
744 | * @z - The current pointer within zonelist->zones being iterated | ||
745 | * @zlist - The zonelist being iterated | ||
746 | * @highidx - The zone index of the highest zone to return | ||
747 | * | ||
748 | * This iterator iterates though all zones at or below a given zone index. | ||
749 | */ | ||
750 | #define for_each_zone_zonelist(zone, z, zlist, highidx) \ | ||
751 | for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \ | ||
752 | zone; \ | ||
753 | z = next_zones_zonelist(z, highidx), zone = *z++) | ||
754 | |||
734 | #ifdef CONFIG_SPARSEMEM | 755 | #ifdef CONFIG_SPARSEMEM |
735 | #include <asm/sparsemem.h> | 756 | #include <asm/sparsemem.h> |
736 | #endif | 757 | #endif |