diff options
| author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:16 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:18 -0400 |
| commit | 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (patch) | |
| tree | 547176a090beb787722a153cf2b8b942dc0e68db /include/linux | |
| parent | 18ea7e710d2452fa726814a406779188028cf1bf (diff) | |
mm: use two zonelist that are filtered by GFP mask
Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations. Based on the zones
allowed by a gfp mask, one of these zonelists is selected. All of these
zonelists consume memory and occupy cache lines.
This patch replaces the multiple zonelists per-node with two zonelists. The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages. The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.
An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/gfp.h | 13 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 65 |
2 files changed, 54 insertions, 24 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e865d51f1c74..e1c6064cb6c7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags) | |||
| 151 | * virtual kernel addresses to the allocated page(s). | 151 | * virtual kernel addresses to the allocated page(s). |
| 152 | */ | 152 | */ |
| 153 | 153 | ||
| 154 | static inline int gfp_zonelist(gfp_t flags) | ||
| 155 | { | ||
| 156 | if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) | ||
| 157 | return 1; | ||
| 158 | |||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | |||
| 154 | /* | 162 | /* |
| 155 | * We get the zone list from the current node and the gfp_mask. | 163 | * We get the zone list from the current node and the gfp_mask. |
| 156 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. | 164 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. |
| 157 | * There are many zonelists per node, two for each active zone. | 165 | * There are two zonelists per node, one for all zones with memory and |
| 166 | * one containing just zones from the node the zonelist belongs to. | ||
| 158 | * | 167 | * |
| 159 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets | 168 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets |
| 160 | * optimized to &contig_page_data at compile-time. | 169 | * optimized to &contig_page_data at compile-time. |
| 161 | */ | 170 | */ |
| 162 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) | 171 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) |
| 163 | { | 172 | { |
| 164 | return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); | 173 | return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); |
| 165 | } | 174 | } |
| 166 | 175 | ||
| 167 | #ifndef HAVE_ARCH_FREE_PAGE | 176 | #ifndef HAVE_ARCH_FREE_PAGE |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 451eaa13bc28..d5c33a0b89e9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) | |||
| 393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the | 393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the |
| 394 | * allocations to a single node for GFP_THISNODE. | 394 | * allocations to a single node for GFP_THISNODE. |
| 395 | * | 395 | * |
| 396 | * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback | 396 | * [0] : Zonelist with fallback |
| 397 | * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) | 397 | * [1] : No fallback (GFP_THISNODE) |
| 398 | */ | 398 | */ |
| 399 | #define MAX_ZONELISTS (2 * MAX_NR_ZONES) | 399 | #define MAX_ZONELISTS 2 |
| 400 | 400 | ||
| 401 | 401 | ||
| 402 | /* | 402 | /* |
| @@ -464,7 +464,7 @@ struct zonelist_cache { | |||
| 464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ | 464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ |
| 465 | }; | 465 | }; |
| 466 | #else | 466 | #else |
| 467 | #define MAX_ZONELISTS MAX_NR_ZONES | 467 | #define MAX_ZONELISTS 1 |
| 468 | struct zonelist_cache; | 468 | struct zonelist_cache; |
| 469 | #endif | 469 | #endif |
| 470 | 470 | ||
| @@ -486,24 +486,6 @@ struct zonelist { | |||
| 486 | #endif | 486 | #endif |
| 487 | }; | 487 | }; |
| 488 | 488 | ||
| 489 | #ifdef CONFIG_NUMA | ||
| 490 | /* | ||
| 491 | * Only custom zonelists like MPOL_BIND need to be filtered as part of | ||
| 492 | * policies. As described in the comment for struct zonelist_cache, these | ||
| 493 | * zonelists will not have a zlcache so zlcache_ptr will not be set. Use | ||
| 494 | * that to determine if the zonelists needs to be filtered or not. | ||
| 495 | */ | ||
| 496 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
| 497 | { | ||
| 498 | return !zonelist->zlcache_ptr; | ||
| 499 | } | ||
| 500 | #else | ||
| 501 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
| 502 | { | ||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | #endif /* CONFIG_NUMA */ | ||
| 506 | |||
| 507 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 489 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
| 508 | struct node_active_region { | 490 | struct node_active_region { |
| 509 | unsigned long start_pfn; | 491 | unsigned long start_pfn; |
| @@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone); | |||
| 731 | zone; \ | 713 | zone; \ |
| 732 | zone = next_zone(zone)) | 714 | zone = next_zone(zone)) |
| 733 | 715 | ||
| 716 | /* Returns the first zone at or below highest_zoneidx in a zonelist */ | ||
| 717 | static inline struct zone **first_zones_zonelist(struct zonelist *zonelist, | ||
| 718 | enum zone_type highest_zoneidx) | ||
| 719 | { | ||
| 720 | struct zone **z; | ||
| 721 | |||
| 722 | /* Find the first suitable zone to use for the allocation */ | ||
| 723 | z = zonelist->zones; | ||
| 724 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
| 725 | z++; | ||
| 726 | |||
| 727 | return z; | ||
| 728 | } | ||
| 729 | |||
| 730 | /* Returns the next zone at or below highest_zoneidx in a zonelist */ | ||
| 731 | static inline struct zone **next_zones_zonelist(struct zone **z, | ||
| 732 | enum zone_type highest_zoneidx) | ||
| 733 | { | ||
| 734 | /* Find the next suitable zone to use for the allocation */ | ||
| 735 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
| 736 | z++; | ||
| 737 | |||
| 738 | return z; | ||
| 739 | } | ||
| 740 | |||
| 741 | /** | ||
| 742 | * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index | ||
| 743 | * @zone - The current zone in the iterator | ||
| 744 | * @z - The current pointer within zonelist->zones being iterated | ||
| 745 | * @zlist - The zonelist being iterated | ||
| 746 | * @highidx - The zone index of the highest zone to return | ||
| 747 | * | ||
| 748 | * This iterator iterates though all zones at or below a given zone index. | ||
| 749 | */ | ||
| 750 | #define for_each_zone_zonelist(zone, z, zlist, highidx) \ | ||
| 751 | for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \ | ||
| 752 | zone; \ | ||
| 753 | z = next_zones_zonelist(z, highidx), zone = *z++) | ||
| 754 | |||
| 734 | #ifdef CONFIG_SPARSEMEM | 755 | #ifdef CONFIG_SPARSEMEM |
| 735 | #include <asm/sparsemem.h> | 756 | #include <asm/sparsemem.h> |
| 736 | #endif | 757 | #endif |
