diff options
author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:18 -0400 |
commit | 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (patch) | |
tree | 547176a090beb787722a153cf2b8b942dc0e68db | |
parent | 18ea7e710d2452fa726814a406779188028cf1bf (diff) |
mm: use two zonelist that are filtered by GFP mask
Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations. Based on the zones
allowed by a gfp mask, one of these zonelists is selected. All of these
zonelists consume memory and occupy cache lines.
This patch replaces the multiple zonelists per-node with two zonelists. The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages. The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.
An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/parisc/mm/init.c | 11 | ||||
-rw-r--r-- | fs/buffer.c | 10 | ||||
-rw-r--r-- | include/linux/gfp.h | 13 | ||||
-rw-r--r-- | include/linux/mmzone.h | 65 | ||||
-rw-r--r-- | mm/hugetlb.c | 8 | ||||
-rw-r--r-- | mm/oom_kill.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 170 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/slub.c | 8 | ||||
-rw-r--r-- | mm/vmscan.c | 21 |
10 files changed, 168 insertions, 154 deletions
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index eb80f5e33d7d..9bb6136d77c2 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
@@ -603,15 +603,18 @@ void show_mem(void) | |||
603 | #ifdef CONFIG_DISCONTIGMEM | 603 | #ifdef CONFIG_DISCONTIGMEM |
604 | { | 604 | { |
605 | struct zonelist *zl; | 605 | struct zonelist *zl; |
606 | int i, j, k; | 606 | int i, j; |
607 | 607 | ||
608 | for (i = 0; i < npmem_ranges; i++) { | 608 | for (i = 0; i < npmem_ranges; i++) { |
609 | zl = node_zonelist(i); | ||
609 | for (j = 0; j < MAX_NR_ZONES; j++) { | 610 | for (j = 0; j < MAX_NR_ZONES; j++) { |
610 | zl = NODE_DATA(i)->node_zonelists + j; | 611 | struct zone **z; |
612 | struct zone *zone; | ||
611 | 613 | ||
612 | printk("Zone list for zone %d on node %d: ", j, i); | 614 | printk("Zone list for zone %d on node %d: ", j, i); |
613 | for (k = 0; zl->zones[k] != NULL; k++) | 615 | for_each_zone_zonelist(zone, z, zl, j) |
614 | printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); | 616 | printk("[%d/%s] ", zone_to_nid(zone), |
617 | zone->name); | ||
615 | printk("\n"); | 618 | printk("\n"); |
616 | } | 619 | } |
617 | } | 620 | } |
diff --git a/fs/buffer.c b/fs/buffer.c index 71358499bc57..9b5434a80479 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev) | |||
360 | */ | 360 | */ |
361 | static void free_more_memory(void) | 361 | static void free_more_memory(void) |
362 | { | 362 | { |
363 | struct zonelist *zonelist; | 363 | struct zone **zones; |
364 | int nid; | 364 | int nid; |
365 | 365 | ||
366 | wakeup_pdflush(1024); | 366 | wakeup_pdflush(1024); |
367 | yield(); | 367 | yield(); |
368 | 368 | ||
369 | for_each_online_node(nid) { | 369 | for_each_online_node(nid) { |
370 | zonelist = node_zonelist(nid, GFP_NOFS); | 370 | zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS), |
371 | if (zonelist->zones[0]) | 371 | gfp_zone(GFP_NOFS)); |
372 | try_to_free_pages(zonelist, 0, GFP_NOFS); | 372 | if (*zones) |
373 | try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, | ||
374 | GFP_NOFS); | ||
373 | } | 375 | } |
374 | } | 376 | } |
375 | 377 | ||
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e865d51f1c74..e1c6064cb6c7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags) | |||
151 | * virtual kernel addresses to the allocated page(s). | 151 | * virtual kernel addresses to the allocated page(s). |
152 | */ | 152 | */ |
153 | 153 | ||
154 | static inline int gfp_zonelist(gfp_t flags) | ||
155 | { | ||
156 | if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) | ||
157 | return 1; | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
154 | /* | 162 | /* |
155 | * We get the zone list from the current node and the gfp_mask. | 163 | * We get the zone list from the current node and the gfp_mask. |
156 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. | 164 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. |
157 | * There are many zonelists per node, two for each active zone. | 165 | * There are two zonelists per node, one for all zones with memory and |
166 | * one containing just zones from the node the zonelist belongs to. | ||
158 | * | 167 | * |
159 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets | 168 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets |
160 | * optimized to &contig_page_data at compile-time. | 169 | * optimized to &contig_page_data at compile-time. |
161 | */ | 170 | */ |
162 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) | 171 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) |
163 | { | 172 | { |
164 | return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); | 173 | return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); |
165 | } | 174 | } |
166 | 175 | ||
167 | #ifndef HAVE_ARCH_FREE_PAGE | 176 | #ifndef HAVE_ARCH_FREE_PAGE |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 451eaa13bc28..d5c33a0b89e9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) | |||
393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the | 393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the |
394 | * allocations to a single node for GFP_THISNODE. | 394 | * allocations to a single node for GFP_THISNODE. |
395 | * | 395 | * |
396 | * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback | 396 | * [0] : Zonelist with fallback |
397 | * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) | 397 | * [1] : No fallback (GFP_THISNODE) |
398 | */ | 398 | */ |
399 | #define MAX_ZONELISTS (2 * MAX_NR_ZONES) | 399 | #define MAX_ZONELISTS 2 |
400 | 400 | ||
401 | 401 | ||
402 | /* | 402 | /* |
@@ -464,7 +464,7 @@ struct zonelist_cache { | |||
464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ | 464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ |
465 | }; | 465 | }; |
466 | #else | 466 | #else |
467 | #define MAX_ZONELISTS MAX_NR_ZONES | 467 | #define MAX_ZONELISTS 1 |
468 | struct zonelist_cache; | 468 | struct zonelist_cache; |
469 | #endif | 469 | #endif |
470 | 470 | ||
@@ -486,24 +486,6 @@ struct zonelist { | |||
486 | #endif | 486 | #endif |
487 | }; | 487 | }; |
488 | 488 | ||
489 | #ifdef CONFIG_NUMA | ||
490 | /* | ||
491 | * Only custom zonelists like MPOL_BIND need to be filtered as part of | ||
492 | * policies. As described in the comment for struct zonelist_cache, these | ||
493 | * zonelists will not have a zlcache so zlcache_ptr will not be set. Use | ||
494 | * that to determine if the zonelists needs to be filtered or not. | ||
495 | */ | ||
496 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
497 | { | ||
498 | return !zonelist->zlcache_ptr; | ||
499 | } | ||
500 | #else | ||
501 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
502 | { | ||
503 | return 0; | ||
504 | } | ||
505 | #endif /* CONFIG_NUMA */ | ||
506 | |||
507 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 489 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
508 | struct node_active_region { | 490 | struct node_active_region { |
509 | unsigned long start_pfn; | 491 | unsigned long start_pfn; |
@@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone); | |||
731 | zone; \ | 713 | zone; \ |
732 | zone = next_zone(zone)) | 714 | zone = next_zone(zone)) |
733 | 715 | ||
716 | /* Returns the first zone at or below highest_zoneidx in a zonelist */ | ||
717 | static inline struct zone **first_zones_zonelist(struct zonelist *zonelist, | ||
718 | enum zone_type highest_zoneidx) | ||
719 | { | ||
720 | struct zone **z; | ||
721 | |||
722 | /* Find the first suitable zone to use for the allocation */ | ||
723 | z = zonelist->zones; | ||
724 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
725 | z++; | ||
726 | |||
727 | return z; | ||
728 | } | ||
729 | |||
730 | /* Returns the next zone at or below highest_zoneidx in a zonelist */ | ||
731 | static inline struct zone **next_zones_zonelist(struct zone **z, | ||
732 | enum zone_type highest_zoneidx) | ||
733 | { | ||
734 | /* Find the next suitable zone to use for the allocation */ | ||
735 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
736 | z++; | ||
737 | |||
738 | return z; | ||
739 | } | ||
740 | |||
741 | /** | ||
742 | * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index | ||
743 | * @zone - The current zone in the iterator | ||
744 | * @z - The current pointer within zonelist->zones being iterated | ||
745 | * @zlist - The zonelist being iterated | ||
746 | * @highidx - The zone index of the highest zone to return | ||
747 | * | ||
748 | * This iterator iterates though all zones at or below a given zone index. | ||
749 | */ | ||
750 | #define for_each_zone_zonelist(zone, z, zlist, highidx) \ | ||
751 | for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \ | ||
752 | zone; \ | ||
753 | z = next_zones_zonelist(z, highidx), zone = *z++) | ||
754 | |||
734 | #ifdef CONFIG_SPARSEMEM | 755 | #ifdef CONFIG_SPARSEMEM |
735 | #include <asm/sparsemem.h> | 756 | #include <asm/sparsemem.h> |
736 | #endif | 757 | #endif |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 51c9e2c01640..ddd141cad77f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | |||
97 | struct mempolicy *mpol; | 97 | struct mempolicy *mpol; |
98 | struct zonelist *zonelist = huge_zonelist(vma, address, | 98 | struct zonelist *zonelist = huge_zonelist(vma, address, |
99 | htlb_alloc_mask, &mpol); | 99 | htlb_alloc_mask, &mpol); |
100 | struct zone **z; | 100 | struct zone *zone, **z; |
101 | 101 | ||
102 | for (z = zonelist->zones; *z; z++) { | 102 | for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) { |
103 | nid = zone_to_nid(*z); | 103 | nid = zone_to_nid(zone); |
104 | if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && | 104 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && |
105 | !list_empty(&hugepage_freelists[nid])) { | 105 | !list_empty(&hugepage_freelists[nid])) { |
106 | page = list_entry(hugepage_freelists[nid].next, | 106 | page = list_entry(hugepage_freelists[nid].next, |
107 | struct page, lru); | 107 | struct page, lru); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index beb592fe9389..2c93502cfcb4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, | |||
175 | gfp_t gfp_mask) | 175 | gfp_t gfp_mask) |
176 | { | 176 | { |
177 | #ifdef CONFIG_NUMA | 177 | #ifdef CONFIG_NUMA |
178 | struct zone *zone; | ||
178 | struct zone **z; | 179 | struct zone **z; |
180 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
179 | nodemask_t nodes = node_states[N_HIGH_MEMORY]; | 181 | nodemask_t nodes = node_states[N_HIGH_MEMORY]; |
180 | 182 | ||
181 | for (z = zonelist->zones; *z; z++) | 183 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) |
182 | if (cpuset_zone_allowed_softwall(*z, gfp_mask)) | 184 | if (cpuset_zone_allowed_softwall(zone, gfp_mask)) |
183 | node_clear(zone_to_nid(*z), nodes); | 185 | node_clear(zone_to_nid(zone), nodes); |
184 | else | 186 | else |
185 | return CONSTRAINT_CPUSET; | 187 | return CONSTRAINT_CPUSET; |
186 | 188 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 187efd47a446..4ccb8651cf22 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) | |||
1378 | */ | 1378 | */ |
1379 | static struct page * | 1379 | static struct page * |
1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | 1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, |
1381 | struct zonelist *zonelist, int alloc_flags) | 1381 | struct zonelist *zonelist, int high_zoneidx, int alloc_flags) |
1382 | { | 1382 | { |
1383 | struct zone **z; | 1383 | struct zone **z; |
1384 | struct page *page = NULL; | 1384 | struct page *page = NULL; |
1385 | int classzone_idx = zone_idx(zonelist->zones[0]); | 1385 | int classzone_idx; |
1386 | struct zone *zone, *preferred_zone; | 1386 | struct zone *zone, *preferred_zone; |
1387 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ | 1387 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ |
1388 | int zlc_active = 0; /* set if using zonelist_cache */ | 1388 | int zlc_active = 0; /* set if using zonelist_cache */ |
1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ | 1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ |
1390 | enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ | 1390 | |
1391 | z = first_zones_zonelist(zonelist, high_zoneidx); | ||
1392 | classzone_idx = zone_idx(*z); | ||
1393 | preferred_zone = *z; | ||
1391 | 1394 | ||
1392 | zonelist_scan: | 1395 | zonelist_scan: |
1393 | /* | 1396 | /* |
1394 | * Scan zonelist, looking for a zone with enough free. | 1397 | * Scan zonelist, looking for a zone with enough free. |
1395 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1398 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1396 | */ | 1399 | */ |
1397 | z = zonelist->zones; | 1400 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1398 | preferred_zone = *z; | ||
1399 | |||
1400 | do { | ||
1401 | /* | ||
1402 | * In NUMA, this could be a policy zonelist which contains | ||
1403 | * zones that may not be allowed by the current gfp_mask. | ||
1404 | * Check the zone is allowed by the current flags | ||
1405 | */ | ||
1406 | if (unlikely(alloc_should_filter_zonelist(zonelist))) { | ||
1407 | if (highest_zoneidx == -1) | ||
1408 | highest_zoneidx = gfp_zone(gfp_mask); | ||
1409 | if (zone_idx(*z) > highest_zoneidx) | ||
1410 | continue; | ||
1411 | } | ||
1412 | |||
1413 | if (NUMA_BUILD && zlc_active && | 1401 | if (NUMA_BUILD && zlc_active && |
1414 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1402 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1415 | continue; | 1403 | continue; |
1416 | zone = *z; | ||
1417 | if ((alloc_flags & ALLOC_CPUSET) && | 1404 | if ((alloc_flags & ALLOC_CPUSET) && |
1418 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 1405 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
1419 | goto try_next_zone; | 1406 | goto try_next_zone; |
@@ -1447,7 +1434,7 @@ try_next_zone: | |||
1447 | zlc_active = 1; | 1434 | zlc_active = 1; |
1448 | did_zlc_setup = 1; | 1435 | did_zlc_setup = 1; |
1449 | } | 1436 | } |
1450 | } while (*(++z) != NULL); | 1437 | } |
1451 | 1438 | ||
1452 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { | 1439 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { |
1453 | /* Disable zlc cache for second zonelist scan */ | 1440 | /* Disable zlc cache for second zonelist scan */ |
@@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
1465 | struct zonelist *zonelist) | 1452 | struct zonelist *zonelist) |
1466 | { | 1453 | { |
1467 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 1454 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
1455 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
1468 | struct zone **z; | 1456 | struct zone **z; |
1469 | struct page *page; | 1457 | struct page *page; |
1470 | struct reclaim_state reclaim_state; | 1458 | struct reclaim_state reclaim_state; |
@@ -1490,7 +1478,7 @@ restart: | |||
1490 | } | 1478 | } |
1491 | 1479 | ||
1492 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1480 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
1493 | zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); | 1481 | zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); |
1494 | if (page) | 1482 | if (page) |
1495 | goto got_pg; | 1483 | goto got_pg; |
1496 | 1484 | ||
@@ -1534,7 +1522,8 @@ restart: | |||
1534 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 1522 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. |
1535 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1523 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1536 | */ | 1524 | */ |
1537 | page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); | 1525 | page = get_page_from_freelist(gfp_mask, order, zonelist, |
1526 | high_zoneidx, alloc_flags); | ||
1538 | if (page) | 1527 | if (page) |
1539 | goto got_pg; | 1528 | goto got_pg; |
1540 | 1529 | ||
@@ -1547,7 +1536,7 @@ rebalance: | |||
1547 | nofail_alloc: | 1536 | nofail_alloc: |
1548 | /* go through the zonelist yet again, ignoring mins */ | 1537 | /* go through the zonelist yet again, ignoring mins */ |
1549 | page = get_page_from_freelist(gfp_mask, order, | 1538 | page = get_page_from_freelist(gfp_mask, order, |
1550 | zonelist, ALLOC_NO_WATERMARKS); | 1539 | zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); |
1551 | if (page) | 1540 | if (page) |
1552 | goto got_pg; | 1541 | goto got_pg; |
1553 | if (gfp_mask & __GFP_NOFAIL) { | 1542 | if (gfp_mask & __GFP_NOFAIL) { |
@@ -1582,7 +1571,7 @@ nofail_alloc: | |||
1582 | 1571 | ||
1583 | if (likely(did_some_progress)) { | 1572 | if (likely(did_some_progress)) { |
1584 | page = get_page_from_freelist(gfp_mask, order, | 1573 | page = get_page_from_freelist(gfp_mask, order, |
1585 | zonelist, alloc_flags); | 1574 | zonelist, high_zoneidx, alloc_flags); |
1586 | if (page) | 1575 | if (page) |
1587 | goto got_pg; | 1576 | goto got_pg; |
1588 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { | 1577 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { |
@@ -1598,7 +1587,7 @@ nofail_alloc: | |||
1598 | * under heavy pressure. | 1587 | * under heavy pressure. |
1599 | */ | 1588 | */ |
1600 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1589 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
1601 | zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); | 1590 | zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); |
1602 | if (page) { | 1591 | if (page) { |
1603 | clear_zonelist_oom(zonelist); | 1592 | clear_zonelist_oom(zonelist); |
1604 | goto got_pg; | 1593 | goto got_pg; |
@@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages); | |||
1713 | 1702 | ||
1714 | static unsigned int nr_free_zone_pages(int offset) | 1703 | static unsigned int nr_free_zone_pages(int offset) |
1715 | { | 1704 | { |
1705 | struct zone **z; | ||
1706 | struct zone *zone; | ||
1707 | |||
1716 | /* Just pick one node, since fallback list is circular */ | 1708 | /* Just pick one node, since fallback list is circular */ |
1717 | unsigned int sum = 0; | 1709 | unsigned int sum = 0; |
1718 | 1710 | ||
1719 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | 1711 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); |
1720 | struct zone **zonep = zonelist->zones; | ||
1721 | struct zone *zone; | ||
1722 | 1712 | ||
1723 | for (zone = *zonep++; zone; zone = *zonep++) { | 1713 | for_each_zone_zonelist(zone, z, zonelist, offset) { |
1724 | unsigned long size = zone->present_pages; | 1714 | unsigned long size = zone->present_pages; |
1725 | unsigned long high = zone->pages_high; | 1715 | unsigned long high = zone->pages_high; |
1726 | if (size > high) | 1716 | if (size > high) |
@@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) | |||
2078 | */ | 2068 | */ |
2079 | static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) | 2069 | static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) |
2080 | { | 2070 | { |
2081 | enum zone_type i; | ||
2082 | int j; | 2071 | int j; |
2083 | struct zonelist *zonelist; | 2072 | struct zonelist *zonelist; |
2084 | 2073 | ||
2085 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2074 | zonelist = &pgdat->node_zonelists[0]; |
2086 | zonelist = pgdat->node_zonelists + i; | 2075 | for (j = 0; zonelist->zones[j] != NULL; j++) |
2087 | for (j = 0; zonelist->zones[j] != NULL; j++) | 2076 | ; |
2088 | ; | 2077 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, |
2089 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); | 2078 | MAX_NR_ZONES - 1); |
2090 | zonelist->zones[j] = NULL; | 2079 | zonelist->zones[j] = NULL; |
2091 | } | ||
2092 | } | 2080 | } |
2093 | 2081 | ||
2094 | /* | 2082 | /* |
@@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) | |||
2096 | */ | 2084 | */ |
2097 | static void build_thisnode_zonelists(pg_data_t *pgdat) | 2085 | static void build_thisnode_zonelists(pg_data_t *pgdat) |
2098 | { | 2086 | { |
2099 | enum zone_type i; | ||
2100 | int j; | 2087 | int j; |
2101 | struct zonelist *zonelist; | 2088 | struct zonelist *zonelist; |
2102 | 2089 | ||
2103 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2090 | zonelist = &pgdat->node_zonelists[1]; |
2104 | zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i; | 2091 | j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); |
2105 | j = build_zonelists_node(pgdat, zonelist, 0, i); | 2092 | zonelist->zones[j] = NULL; |
2106 | zonelist->zones[j] = NULL; | ||
2107 | } | ||
2108 | } | 2093 | } |
2109 | 2094 | ||
2110 | /* | 2095 | /* |
@@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES]; | |||
2117 | 2102 | ||
2118 | static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) | 2103 | static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) |
2119 | { | 2104 | { |
2120 | enum zone_type i; | ||
2121 | int pos, j, node; | 2105 | int pos, j, node; |
2122 | int zone_type; /* needs to be signed */ | 2106 | int zone_type; /* needs to be signed */ |
2123 | struct zone *z; | 2107 | struct zone *z; |
2124 | struct zonelist *zonelist; | 2108 | struct zonelist *zonelist; |
2125 | 2109 | ||
2126 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2110 | zonelist = &pgdat->node_zonelists[0]; |
2127 | zonelist = pgdat->node_zonelists + i; | 2111 | pos = 0; |
2128 | pos = 0; | 2112 | for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) { |
2129 | for (zone_type = i; zone_type >= 0; zone_type--) { | 2113 | for (j = 0; j < nr_nodes; j++) { |
2130 | for (j = 0; j < nr_nodes; j++) { | 2114 | node = node_order[j]; |
2131 | node = node_order[j]; | 2115 | z = &NODE_DATA(node)->node_zones[zone_type]; |
2132 | z = &NODE_DATA(node)->node_zones[zone_type]; | 2116 | if (populated_zone(z)) { |
2133 | if (populated_zone(z)) { | 2117 | zonelist->zones[pos++] = z; |
2134 | zonelist->zones[pos++] = z; | 2118 | check_highest_zone(zone_type); |
2135 | check_highest_zone(zone_type); | ||
2136 | } | ||
2137 | } | 2119 | } |
2138 | } | 2120 | } |
2139 | zonelist->zones[pos] = NULL; | ||
2140 | } | 2121 | } |
2122 | zonelist->zones[pos] = NULL; | ||
2141 | } | 2123 | } |
2142 | 2124 | ||
2143 | static int default_zonelist_order(void) | 2125 | static int default_zonelist_order(void) |
@@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat) | |||
2264 | /* Construct the zonelist performance cache - see further mmzone.h */ | 2246 | /* Construct the zonelist performance cache - see further mmzone.h */ |
2265 | static void build_zonelist_cache(pg_data_t *pgdat) | 2247 | static void build_zonelist_cache(pg_data_t *pgdat) |
2266 | { | 2248 | { |
2267 | int i; | 2249 | struct zonelist *zonelist; |
2268 | 2250 | struct zonelist_cache *zlc; | |
2269 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2251 | struct zone **z; |
2270 | struct zonelist *zonelist; | ||
2271 | struct zonelist_cache *zlc; | ||
2272 | struct zone **z; | ||
2273 | 2252 | ||
2274 | zonelist = pgdat->node_zonelists + i; | 2253 | zonelist = &pgdat->node_zonelists[0]; |
2275 | zonelist->zlcache_ptr = zlc = &zonelist->zlcache; | 2254 | zonelist->zlcache_ptr = zlc = &zonelist->zlcache; |
2276 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 2255 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
2277 | for (z = zonelist->zones; *z; z++) | 2256 | for (z = zonelist->zones; *z; z++) |
2278 | zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); | 2257 | zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); |
2279 | } | ||
2280 | } | 2258 | } |
2281 | 2259 | ||
2282 | 2260 | ||
@@ -2290,45 +2268,43 @@ static void set_zonelist_order(void) | |||
2290 | static void build_zonelists(pg_data_t *pgdat) | 2268 | static void build_zonelists(pg_data_t *pgdat) |
2291 | { | 2269 | { |
2292 | int node, local_node; | 2270 | int node, local_node; |
2293 | enum zone_type i,j; | 2271 | enum zone_type j; |
2272 | struct zonelist *zonelist; | ||
2294 | 2273 | ||
2295 | local_node = pgdat->node_id; | 2274 | local_node = pgdat->node_id; |
2296 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
2297 | struct zonelist *zonelist; | ||
2298 | 2275 | ||
2299 | zonelist = pgdat->node_zonelists + i; | 2276 | zonelist = &pgdat->node_zonelists[0]; |
2277 | j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); | ||
2300 | 2278 | ||
2301 | j = build_zonelists_node(pgdat, zonelist, 0, i); | 2279 | /* |
2302 | /* | 2280 | * Now we build the zonelist so that it contains the zones |
2303 | * Now we build the zonelist so that it contains the zones | 2281 | * of all the other nodes. |
2304 | * of all the other nodes. | 2282 | * We don't want to pressure a particular node, so when |
2305 | * We don't want to pressure a particular node, so when | 2283 | * building the zones for node N, we make sure that the |
2306 | * building the zones for node N, we make sure that the | 2284 | * zones coming right after the local ones are those from |
2307 | * zones coming right after the local ones are those from | 2285 | * node N+1 (modulo N) |
2308 | * node N+1 (modulo N) | 2286 | */ |
2309 | */ | 2287 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { |
2310 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { | 2288 | if (!node_online(node)) |
2311 | if (!node_online(node)) | 2289 | continue; |
2312 | continue; | 2290 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, |
2313 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); | 2291 | MAX_NR_ZONES - 1); |
2314 | } | ||
2315 | for (node = 0; node < local_node; node++) { | ||
2316 | if (!node_online(node)) | ||
2317 | continue; | ||
2318 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); | ||
2319 | } | ||
2320 | |||
2321 | zonelist->zones[j] = NULL; | ||
2322 | } | 2292 | } |
2293 | for (node = 0; node < local_node; node++) { | ||
2294 | if (!node_online(node)) | ||
2295 | continue; | ||
2296 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, | ||
2297 | MAX_NR_ZONES - 1); | ||
2298 | } | ||
2299 | |||
2300 | zonelist->zones[j] = NULL; | ||
2323 | } | 2301 | } |
2324 | 2302 | ||
2325 | /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ | 2303 | /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ |
2326 | static void build_zonelist_cache(pg_data_t *pgdat) | 2304 | static void build_zonelist_cache(pg_data_t *pgdat) |
2327 | { | 2305 | { |
2328 | int i; | 2306 | pgdat->node_zonelists[0].zlcache_ptr = NULL; |
2329 | 2307 | pgdat->node_zonelists[1].zlcache_ptr = NULL; | |
2330 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2331 | pgdat->node_zonelists[i].zlcache_ptr = NULL; | ||
2332 | } | 2308 | } |
2333 | 2309 | ||
2334 | #endif /* CONFIG_NUMA */ | 2310 | #endif /* CONFIG_NUMA */ |
@@ -3243,6 +3243,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | |||
3243 | struct zonelist *zonelist; | 3243 | struct zonelist *zonelist; |
3244 | gfp_t local_flags; | 3244 | gfp_t local_flags; |
3245 | struct zone **z; | 3245 | struct zone **z; |
3246 | struct zone *zone; | ||
3247 | enum zone_type high_zoneidx = gfp_zone(flags); | ||
3246 | void *obj = NULL; | 3248 | void *obj = NULL; |
3247 | int nid; | 3249 | int nid; |
3248 | 3250 | ||
@@ -3257,10 +3259,10 @@ retry: | |||
3257 | * Look through allowed nodes for objects available | 3259 | * Look through allowed nodes for objects available |
3258 | * from existing per node queues. | 3260 | * from existing per node queues. |
3259 | */ | 3261 | */ |
3260 | for (z = zonelist->zones; *z && !obj; z++) { | 3262 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
3261 | nid = zone_to_nid(*z); | 3263 | nid = zone_to_nid(zone); |
3262 | 3264 | ||
3263 | if (cpuset_zone_allowed_hardwall(*z, flags) && | 3265 | if (cpuset_zone_allowed_hardwall(zone, flags) && |
3264 | cache->nodelists[nid] && | 3266 | cache->nodelists[nid] && |
3265 | cache->nodelists[nid]->free_objects) | 3267 | cache->nodelists[nid]->free_objects) |
3266 | obj = ____cache_alloc_node(cache, | 3268 | obj = ____cache_alloc_node(cache, |
@@ -1285,6 +1285,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1285 | #ifdef CONFIG_NUMA | 1285 | #ifdef CONFIG_NUMA |
1286 | struct zonelist *zonelist; | 1286 | struct zonelist *zonelist; |
1287 | struct zone **z; | 1287 | struct zone **z; |
1288 | struct zone *zone; | ||
1289 | enum zone_type high_zoneidx = gfp_zone(flags); | ||
1288 | struct page *page; | 1290 | struct page *page; |
1289 | 1291 | ||
1290 | /* | 1292 | /* |
@@ -1310,12 +1312,12 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1310 | return NULL; | 1312 | return NULL; |
1311 | 1313 | ||
1312 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); | 1314 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); |
1313 | for (z = zonelist->zones; *z; z++) { | 1315 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1314 | struct kmem_cache_node *n; | 1316 | struct kmem_cache_node *n; |
1315 | 1317 | ||
1316 | n = get_node(s, zone_to_nid(*z)); | 1318 | n = get_node(s, zone_to_nid(zone)); |
1317 | 1319 | ||
1318 | if (n && cpuset_zone_allowed_hardwall(*z, flags) && | 1320 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && |
1319 | n->nr_partial > MIN_PARTIAL) { | 1321 | n->nr_partial > MIN_PARTIAL) { |
1320 | page = get_partial_node(n); | 1322 | page = get_partial_node(n); |
1321 | if (page) | 1323 | if (page) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index ef8551e0d2d0..0515b8f44894 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1249,15 +1249,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1249 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | 1249 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, |
1250 | struct scan_control *sc) | 1250 | struct scan_control *sc) |
1251 | { | 1251 | { |
1252 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | ||
1252 | unsigned long nr_reclaimed = 0; | 1253 | unsigned long nr_reclaimed = 0; |
1253 | struct zone **zones = zonelist->zones; | 1254 | struct zone **z; |
1254 | int i; | 1255 | struct zone *zone; |
1255 | |||
1256 | 1256 | ||
1257 | sc->all_unreclaimable = 1; | 1257 | sc->all_unreclaimable = 1; |
1258 | for (i = 0; zones[i] != NULL; i++) { | 1258 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1259 | struct zone *zone = zones[i]; | ||
1260 | |||
1261 | if (!populated_zone(zone)) | 1259 | if (!populated_zone(zone)) |
1262 | continue; | 1260 | continue; |
1263 | /* | 1261 | /* |
@@ -1311,8 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1311 | unsigned long nr_reclaimed = 0; | 1309 | unsigned long nr_reclaimed = 0; |
1312 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1310 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1313 | unsigned long lru_pages = 0; | 1311 | unsigned long lru_pages = 0; |
1314 | struct zone **zones = zonelist->zones; | 1312 | struct zone **z; |
1315 | int i; | 1313 | struct zone *zone; |
1314 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
1316 | 1315 | ||
1317 | if (scan_global_lru(sc)) | 1316 | if (scan_global_lru(sc)) |
1318 | count_vm_event(ALLOCSTALL); | 1317 | count_vm_event(ALLOCSTALL); |
@@ -1320,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1320 | * mem_cgroup will not do shrink_slab. | 1319 | * mem_cgroup will not do shrink_slab. |
1321 | */ | 1320 | */ |
1322 | if (scan_global_lru(sc)) { | 1321 | if (scan_global_lru(sc)) { |
1323 | for (i = 0; zones[i] != NULL; i++) { | 1322 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1324 | struct zone *zone = zones[i]; | ||
1325 | 1323 | ||
1326 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1324 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1327 | continue; | 1325 | continue; |
@@ -1385,8 +1383,7 @@ out: | |||
1385 | priority = 0; | 1383 | priority = 0; |
1386 | 1384 | ||
1387 | if (scan_global_lru(sc)) { | 1385 | if (scan_global_lru(sc)) { |
1388 | for (i = 0; zones[i] != NULL; i++) { | 1386 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1389 | struct zone *zone = zones[i]; | ||
1390 | 1387 | ||
1391 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1388 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1392 | continue; | 1389 | continue; |