aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 05:12:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:18 -0400
commit54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (patch)
tree547176a090beb787722a153cf2b8b942dc0e68db
parent18ea7e710d2452fa726814a406779188028cf1bf (diff)
mm: use two zonelist that are filtered by GFP mask
Currently a node has two sets of zonelists, one for each zone type in the system and a second set for GFP_THISNODE allocations. Based on the zones allowed by a gfp mask, one of these zonelists is selected. All of these zonelists consume memory and occupy cache lines. This patch replaces the multiple zonelists per-node with two zonelists. The first contains all populated zones in the system, ordered by distance, for fallback allocations when the target/preferred node has no free pages. The second contains all populated zones in the node suitable for GFP_THISNODE allocations. An iterator macro is introduced called for_each_zone_zonelist() that interates through each zone allowed by the GFP flags in the selected zonelist. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <clameter@sgi.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/parisc/mm/init.c11
-rw-r--r--fs/buffer.c10
-rw-r--r--include/linux/gfp.h13
-rw-r--r--include/linux/mmzone.h65
-rw-r--r--mm/hugetlb.c8
-rw-r--r--mm/oom_kill.c8
-rw-r--r--mm/page_alloc.c170
-rw-r--r--mm/slab.c8
-rw-r--r--mm/slub.c8
-rw-r--r--mm/vmscan.c21
10 files changed, 168 insertions, 154 deletions
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index eb80f5e33d7d..9bb6136d77c2 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -603,15 +603,18 @@ void show_mem(void)
603#ifdef CONFIG_DISCONTIGMEM 603#ifdef CONFIG_DISCONTIGMEM
604 { 604 {
605 struct zonelist *zl; 605 struct zonelist *zl;
606 int i, j, k; 606 int i, j;
607 607
608 for (i = 0; i < npmem_ranges; i++) { 608 for (i = 0; i < npmem_ranges; i++) {
609 zl = node_zonelist(i);
609 for (j = 0; j < MAX_NR_ZONES; j++) { 610 for (j = 0; j < MAX_NR_ZONES; j++) {
610 zl = NODE_DATA(i)->node_zonelists + j; 611 struct zone **z;
612 struct zone *zone;
611 613
612 printk("Zone list for zone %d on node %d: ", j, i); 614 printk("Zone list for zone %d on node %d: ", j, i);
613 for (k = 0; zl->zones[k] != NULL; k++) 615 for_each_zone_zonelist(zone, z, zl, j)
614 printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); 616 printk("[%d/%s] ", zone_to_nid(zone),
617 zone->name);
615 printk("\n"); 618 printk("\n");
616 } 619 }
617 } 620 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 71358499bc57..9b5434a80479 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev)
360 */ 360 */
361static void free_more_memory(void) 361static void free_more_memory(void)
362{ 362{
363 struct zonelist *zonelist; 363 struct zone **zones;
364 int nid; 364 int nid;
365 365
366 wakeup_pdflush(1024); 366 wakeup_pdflush(1024);
367 yield(); 367 yield();
368 368
369 for_each_online_node(nid) { 369 for_each_online_node(nid) {
370 zonelist = node_zonelist(nid, GFP_NOFS); 370 zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
371 if (zonelist->zones[0]) 371 gfp_zone(GFP_NOFS));
372 try_to_free_pages(zonelist, 0, GFP_NOFS); 372 if (*zones)
373 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
374 GFP_NOFS);
373 } 375 }
374} 376}
375 377
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e865d51f1c74..e1c6064cb6c7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags)
151 * virtual kernel addresses to the allocated page(s). 151 * virtual kernel addresses to the allocated page(s).
152 */ 152 */
153 153
154static inline int gfp_zonelist(gfp_t flags)
155{
156 if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
157 return 1;
158
159 return 0;
160}
161
154/* 162/*
155 * We get the zone list from the current node and the gfp_mask. 163 * We get the zone list from the current node and the gfp_mask.
156 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. 164 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
157 * There are many zonelists per node, two for each active zone. 165 * There are two zonelists per node, one for all zones with memory and
166 * one containing just zones from the node the zonelist belongs to.
158 * 167 *
159 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets 168 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
160 * optimized to &contig_page_data at compile-time. 169 * optimized to &contig_page_data at compile-time.
161 */ 170 */
162static inline struct zonelist *node_zonelist(int nid, gfp_t flags) 171static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
163{ 172{
164 return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); 173 return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
165} 174}
166 175
167#ifndef HAVE_ARCH_FREE_PAGE 176#ifndef HAVE_ARCH_FREE_PAGE
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 451eaa13bc28..d5c33a0b89e9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone)
393 * The NUMA zonelists are doubled becausse we need zonelists that restrict the 393 * The NUMA zonelists are doubled becausse we need zonelists that restrict the
394 * allocations to a single node for GFP_THISNODE. 394 * allocations to a single node for GFP_THISNODE.
395 * 395 *
396 * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback 396 * [0] : Zonelist with fallback
397 * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) 397 * [1] : No fallback (GFP_THISNODE)
398 */ 398 */
399#define MAX_ZONELISTS (2 * MAX_NR_ZONES) 399#define MAX_ZONELISTS 2
400 400
401 401
402/* 402/*
@@ -464,7 +464,7 @@ struct zonelist_cache {
464 unsigned long last_full_zap; /* when last zap'd (jiffies) */ 464 unsigned long last_full_zap; /* when last zap'd (jiffies) */
465}; 465};
466#else 466#else
467#define MAX_ZONELISTS MAX_NR_ZONES 467#define MAX_ZONELISTS 1
468struct zonelist_cache; 468struct zonelist_cache;
469#endif 469#endif
470 470
@@ -486,24 +486,6 @@ struct zonelist {
486#endif 486#endif
487}; 487};
488 488
489#ifdef CONFIG_NUMA
490/*
491 * Only custom zonelists like MPOL_BIND need to be filtered as part of
492 * policies. As described in the comment for struct zonelist_cache, these
493 * zonelists will not have a zlcache so zlcache_ptr will not be set. Use
494 * that to determine if the zonelists needs to be filtered or not.
495 */
496static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
497{
498 return !zonelist->zlcache_ptr;
499}
500#else
501static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
502{
503 return 0;
504}
505#endif /* CONFIG_NUMA */
506
507#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 489#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
508struct node_active_region { 490struct node_active_region {
509 unsigned long start_pfn; 491 unsigned long start_pfn;
@@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone);
731 zone; \ 713 zone; \
732 zone = next_zone(zone)) 714 zone = next_zone(zone))
733 715
716/* Returns the first zone at or below highest_zoneidx in a zonelist */
717static inline struct zone **first_zones_zonelist(struct zonelist *zonelist,
718 enum zone_type highest_zoneidx)
719{
720 struct zone **z;
721
722 /* Find the first suitable zone to use for the allocation */
723 z = zonelist->zones;
724 while (*z && zone_idx(*z) > highest_zoneidx)
725 z++;
726
727 return z;
728}
729
730/* Returns the next zone at or below highest_zoneidx in a zonelist */
731static inline struct zone **next_zones_zonelist(struct zone **z,
732 enum zone_type highest_zoneidx)
733{
734 /* Find the next suitable zone to use for the allocation */
735 while (*z && zone_idx(*z) > highest_zoneidx)
736 z++;
737
738 return z;
739}
740
741/**
742 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
743 * @zone - The current zone in the iterator
744 * @z - The current pointer within zonelist->zones being iterated
745 * @zlist - The zonelist being iterated
746 * @highidx - The zone index of the highest zone to return
747 *
748 * This iterator iterates though all zones at or below a given zone index.
749 */
750#define for_each_zone_zonelist(zone, z, zlist, highidx) \
751 for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \
752 zone; \
753 z = next_zones_zonelist(z, highidx), zone = *z++)
754
734#ifdef CONFIG_SPARSEMEM 755#ifdef CONFIG_SPARSEMEM
735#include <asm/sparsemem.h> 756#include <asm/sparsemem.h>
736#endif 757#endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 51c9e2c01640..ddd141cad77f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
97 struct mempolicy *mpol; 97 struct mempolicy *mpol;
98 struct zonelist *zonelist = huge_zonelist(vma, address, 98 struct zonelist *zonelist = huge_zonelist(vma, address,
99 htlb_alloc_mask, &mpol); 99 htlb_alloc_mask, &mpol);
100 struct zone **z; 100 struct zone *zone, **z;
101 101
102 for (z = zonelist->zones; *z; z++) { 102 for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
103 nid = zone_to_nid(*z); 103 nid = zone_to_nid(zone);
104 if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && 104 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
105 !list_empty(&hugepage_freelists[nid])) { 105 !list_empty(&hugepage_freelists[nid])) {
106 page = list_entry(hugepage_freelists[nid].next, 106 page = list_entry(hugepage_freelists[nid].next,
107 struct page, lru); 107 struct page, lru);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index beb592fe9389..2c93502cfcb4 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
175 gfp_t gfp_mask) 175 gfp_t gfp_mask)
176{ 176{
177#ifdef CONFIG_NUMA 177#ifdef CONFIG_NUMA
178 struct zone *zone;
178 struct zone **z; 179 struct zone **z;
180 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
179 nodemask_t nodes = node_states[N_HIGH_MEMORY]; 181 nodemask_t nodes = node_states[N_HIGH_MEMORY];
180 182
181 for (z = zonelist->zones; *z; z++) 183 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
182 if (cpuset_zone_allowed_softwall(*z, gfp_mask)) 184 if (cpuset_zone_allowed_softwall(zone, gfp_mask))
183 node_clear(zone_to_nid(*z), nodes); 185 node_clear(zone_to_nid(zone), nodes);
184 else 186 else
185 return CONSTRAINT_CPUSET; 187 return CONSTRAINT_CPUSET;
186 188
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 187efd47a446..4ccb8651cf22 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
1378 */ 1378 */
1379static struct page * 1379static struct page *
1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order, 1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1381 struct zonelist *zonelist, int alloc_flags) 1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
1382{ 1382{
1383 struct zone **z; 1383 struct zone **z;
1384 struct page *page = NULL; 1384 struct page *page = NULL;
1385 int classzone_idx = zone_idx(zonelist->zones[0]); 1385 int classzone_idx;
1386 struct zone *zone, *preferred_zone; 1386 struct zone *zone, *preferred_zone;
1387 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ 1387 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
1388 int zlc_active = 0; /* set if using zonelist_cache */ 1388 int zlc_active = 0; /* set if using zonelist_cache */
1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1390 enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ 1390
1391 z = first_zones_zonelist(zonelist, high_zoneidx);
1392 classzone_idx = zone_idx(*z);
1393 preferred_zone = *z;
1391 1394
1392zonelist_scan: 1395zonelist_scan:
1393 /* 1396 /*
1394 * Scan zonelist, looking for a zone with enough free. 1397 * Scan zonelist, looking for a zone with enough free.
1395 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1398 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1396 */ 1399 */
1397 z = zonelist->zones; 1400 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1398 preferred_zone = *z;
1399
1400 do {
1401 /*
1402 * In NUMA, this could be a policy zonelist which contains
1403 * zones that may not be allowed by the current gfp_mask.
1404 * Check the zone is allowed by the current flags
1405 */
1406 if (unlikely(alloc_should_filter_zonelist(zonelist))) {
1407 if (highest_zoneidx == -1)
1408 highest_zoneidx = gfp_zone(gfp_mask);
1409 if (zone_idx(*z) > highest_zoneidx)
1410 continue;
1411 }
1412
1413 if (NUMA_BUILD && zlc_active && 1401 if (NUMA_BUILD && zlc_active &&
1414 !zlc_zone_worth_trying(zonelist, z, allowednodes)) 1402 !zlc_zone_worth_trying(zonelist, z, allowednodes))
1415 continue; 1403 continue;
1416 zone = *z;
1417 if ((alloc_flags & ALLOC_CPUSET) && 1404 if ((alloc_flags & ALLOC_CPUSET) &&
1418 !cpuset_zone_allowed_softwall(zone, gfp_mask)) 1405 !cpuset_zone_allowed_softwall(zone, gfp_mask))
1419 goto try_next_zone; 1406 goto try_next_zone;
@@ -1447,7 +1434,7 @@ try_next_zone:
1447 zlc_active = 1; 1434 zlc_active = 1;
1448 did_zlc_setup = 1; 1435 did_zlc_setup = 1;
1449 } 1436 }
1450 } while (*(++z) != NULL); 1437 }
1451 1438
1452 if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { 1439 if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
1453 /* Disable zlc cache for second zonelist scan */ 1440 /* Disable zlc cache for second zonelist scan */
@@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
1465 struct zonelist *zonelist) 1452 struct zonelist *zonelist)
1466{ 1453{
1467 const gfp_t wait = gfp_mask & __GFP_WAIT; 1454 const gfp_t wait = gfp_mask & __GFP_WAIT;
1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
1468 struct zone **z; 1456 struct zone **z;
1469 struct page *page; 1457 struct page *page;
1470 struct reclaim_state reclaim_state; 1458 struct reclaim_state reclaim_state;
@@ -1490,7 +1478,7 @@ restart:
1490 } 1478 }
1491 1479
1492 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, 1480 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
1493 zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); 1481 zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
1494 if (page) 1482 if (page)
1495 goto got_pg; 1483 goto got_pg;
1496 1484
@@ -1534,7 +1522,8 @@ restart:
1534 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. 1522 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
1535 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1523 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1536 */ 1524 */
1537 page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); 1525 page = get_page_from_freelist(gfp_mask, order, zonelist,
1526 high_zoneidx, alloc_flags);
1538 if (page) 1527 if (page)
1539 goto got_pg; 1528 goto got_pg;
1540 1529
@@ -1547,7 +1536,7 @@ rebalance:
1547nofail_alloc: 1536nofail_alloc:
1548 /* go through the zonelist yet again, ignoring mins */ 1537 /* go through the zonelist yet again, ignoring mins */
1549 page = get_page_from_freelist(gfp_mask, order, 1538 page = get_page_from_freelist(gfp_mask, order,
1550 zonelist, ALLOC_NO_WATERMARKS); 1539 zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
1551 if (page) 1540 if (page)
1552 goto got_pg; 1541 goto got_pg;
1553 if (gfp_mask & __GFP_NOFAIL) { 1542 if (gfp_mask & __GFP_NOFAIL) {
@@ -1582,7 +1571,7 @@ nofail_alloc:
1582 1571
1583 if (likely(did_some_progress)) { 1572 if (likely(did_some_progress)) {
1584 page = get_page_from_freelist(gfp_mask, order, 1573 page = get_page_from_freelist(gfp_mask, order,
1585 zonelist, alloc_flags); 1574 zonelist, high_zoneidx, alloc_flags);
1586 if (page) 1575 if (page)
1587 goto got_pg; 1576 goto got_pg;
1588 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 1577 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
@@ -1598,7 +1587,7 @@ nofail_alloc:
1598 * under heavy pressure. 1587 * under heavy pressure.
1599 */ 1588 */
1600 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, 1589 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
1601 zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); 1590 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
1602 if (page) { 1591 if (page) {
1603 clear_zonelist_oom(zonelist); 1592 clear_zonelist_oom(zonelist);
1604 goto got_pg; 1593 goto got_pg;
@@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages);
1713 1702
1714static unsigned int nr_free_zone_pages(int offset) 1703static unsigned int nr_free_zone_pages(int offset)
1715{ 1704{
1705 struct zone **z;
1706 struct zone *zone;
1707
1716 /* Just pick one node, since fallback list is circular */ 1708 /* Just pick one node, since fallback list is circular */
1717 unsigned int sum = 0; 1709 unsigned int sum = 0;
1718 1710
1719 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); 1711 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
1720 struct zone **zonep = zonelist->zones;
1721 struct zone *zone;
1722 1712
1723 for (zone = *zonep++; zone; zone = *zonep++) { 1713 for_each_zone_zonelist(zone, z, zonelist, offset) {
1724 unsigned long size = zone->present_pages; 1714 unsigned long size = zone->present_pages;
1725 unsigned long high = zone->pages_high; 1715 unsigned long high = zone->pages_high;
1726 if (size > high) 1716 if (size > high)
@@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
2078 */ 2068 */
2079static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) 2069static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
2080{ 2070{
2081 enum zone_type i;
2082 int j; 2071 int j;
2083 struct zonelist *zonelist; 2072 struct zonelist *zonelist;
2084 2073
2085 for (i = 0; i < MAX_NR_ZONES; i++) { 2074 zonelist = &pgdat->node_zonelists[0];
2086 zonelist = pgdat->node_zonelists + i; 2075 for (j = 0; zonelist->zones[j] != NULL; j++)
2087 for (j = 0; zonelist->zones[j] != NULL; j++) 2076 ;
2088 ; 2077 j = build_zonelists_node(NODE_DATA(node), zonelist, j,
2089 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); 2078 MAX_NR_ZONES - 1);
2090 zonelist->zones[j] = NULL; 2079 zonelist->zones[j] = NULL;
2091 }
2092} 2080}
2093 2081
2094/* 2082/*
@@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
2096 */ 2084 */
2097static void build_thisnode_zonelists(pg_data_t *pgdat) 2085static void build_thisnode_zonelists(pg_data_t *pgdat)
2098{ 2086{
2099 enum zone_type i;
2100 int j; 2087 int j;
2101 struct zonelist *zonelist; 2088 struct zonelist *zonelist;
2102 2089
2103 for (i = 0; i < MAX_NR_ZONES; i++) { 2090 zonelist = &pgdat->node_zonelists[1];
2104 zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i; 2091 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
2105 j = build_zonelists_node(pgdat, zonelist, 0, i); 2092 zonelist->zones[j] = NULL;
2106 zonelist->zones[j] = NULL;
2107 }
2108} 2093}
2109 2094
2110/* 2095/*
@@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES];
2117 2102
2118static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) 2103static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
2119{ 2104{
2120 enum zone_type i;
2121 int pos, j, node; 2105 int pos, j, node;
2122 int zone_type; /* needs to be signed */ 2106 int zone_type; /* needs to be signed */
2123 struct zone *z; 2107 struct zone *z;
2124 struct zonelist *zonelist; 2108 struct zonelist *zonelist;
2125 2109
2126 for (i = 0; i < MAX_NR_ZONES; i++) { 2110 zonelist = &pgdat->node_zonelists[0];
2127 zonelist = pgdat->node_zonelists + i; 2111 pos = 0;
2128 pos = 0; 2112 for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
2129 for (zone_type = i; zone_type >= 0; zone_type--) { 2113 for (j = 0; j < nr_nodes; j++) {
2130 for (j = 0; j < nr_nodes; j++) { 2114 node = node_order[j];
2131 node = node_order[j]; 2115 z = &NODE_DATA(node)->node_zones[zone_type];
2132 z = &NODE_DATA(node)->node_zones[zone_type]; 2116 if (populated_zone(z)) {
2133 if (populated_zone(z)) { 2117 zonelist->zones[pos++] = z;
2134 zonelist->zones[pos++] = z; 2118 check_highest_zone(zone_type);
2135 check_highest_zone(zone_type);
2136 }
2137 } 2119 }
2138 } 2120 }
2139 zonelist->zones[pos] = NULL;
2140 } 2121 }
2122 zonelist->zones[pos] = NULL;
2141} 2123}
2142 2124
2143static int default_zonelist_order(void) 2125static int default_zonelist_order(void)
@@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat)
2264/* Construct the zonelist performance cache - see further mmzone.h */ 2246/* Construct the zonelist performance cache - see further mmzone.h */
2265static void build_zonelist_cache(pg_data_t *pgdat) 2247static void build_zonelist_cache(pg_data_t *pgdat)
2266{ 2248{
2267 int i; 2249 struct zonelist *zonelist;
2268 2250 struct zonelist_cache *zlc;
2269 for (i = 0; i < MAX_NR_ZONES; i++) { 2251 struct zone **z;
2270 struct zonelist *zonelist;
2271 struct zonelist_cache *zlc;
2272 struct zone **z;
2273 2252
2274 zonelist = pgdat->node_zonelists + i; 2253 zonelist = &pgdat->node_zonelists[0];
2275 zonelist->zlcache_ptr = zlc = &zonelist->zlcache; 2254 zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
2276 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); 2255 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
2277 for (z = zonelist->zones; *z; z++) 2256 for (z = zonelist->zones; *z; z++)
2278 zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); 2257 zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
2279 }
2280} 2258}
2281 2259
2282 2260
@@ -2290,45 +2268,43 @@ static void set_zonelist_order(void)
2290static void build_zonelists(pg_data_t *pgdat) 2268static void build_zonelists(pg_data_t *pgdat)
2291{ 2269{
2292 int node, local_node; 2270 int node, local_node;
2293 enum zone_type i,j; 2271 enum zone_type j;
2272 struct zonelist *zonelist;
2294 2273
2295 local_node = pgdat->node_id; 2274 local_node = pgdat->node_id;
2296 for (i = 0; i < MAX_NR_ZONES; i++) {
2297 struct zonelist *zonelist;
2298 2275
2299 zonelist = pgdat->node_zonelists + i; 2276 zonelist = &pgdat->node_zonelists[0];
2277 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
2300 2278
2301 j = build_zonelists_node(pgdat, zonelist, 0, i); 2279 /*
2302 /* 2280 * Now we build the zonelist so that it contains the zones
2303 * Now we build the zonelist so that it contains the zones 2281 * of all the other nodes.
2304 * of all the other nodes. 2282 * We don't want to pressure a particular node, so when
2305 * We don't want to pressure a particular node, so when 2283 * building the zones for node N, we make sure that the
2306 * building the zones for node N, we make sure that the 2284 * zones coming right after the local ones are those from
2307 * zones coming right after the local ones are those from 2285 * node N+1 (modulo N)
2308 * node N+1 (modulo N) 2286 */
2309 */ 2287 for (node = local_node + 1; node < MAX_NUMNODES; node++) {
2310 for (node = local_node + 1; node < MAX_NUMNODES; node++) { 2288 if (!node_online(node))
2311 if (!node_online(node)) 2289 continue;
2312 continue; 2290 j = build_zonelists_node(NODE_DATA(node), zonelist, j,
2313 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); 2291 MAX_NR_ZONES - 1);
2314 }
2315 for (node = 0; node < local_node; node++) {
2316 if (!node_online(node))
2317 continue;
2318 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
2319 }
2320
2321 zonelist->zones[j] = NULL;
2322 } 2292 }
2293 for (node = 0; node < local_node; node++) {
2294 if (!node_online(node))
2295 continue;
2296 j = build_zonelists_node(NODE_DATA(node), zonelist, j,
2297 MAX_NR_ZONES - 1);
2298 }
2299
2300 zonelist->zones[j] = NULL;
2323} 2301}
2324 2302
2325/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ 2303/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
2326static void build_zonelist_cache(pg_data_t *pgdat) 2304static void build_zonelist_cache(pg_data_t *pgdat)
2327{ 2305{
2328 int i; 2306 pgdat->node_zonelists[0].zlcache_ptr = NULL;
2329 2307 pgdat->node_zonelists[1].zlcache_ptr = NULL;
2330 for (i = 0; i < MAX_NR_ZONES; i++)
2331 pgdat->node_zonelists[i].zlcache_ptr = NULL;
2332} 2308}
2333 2309
2334#endif /* CONFIG_NUMA */ 2310#endif /* CONFIG_NUMA */
diff --git a/mm/slab.c b/mm/slab.c
index 5488c54b1172..29851841da62 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3243,6 +3243,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3243 struct zonelist *zonelist; 3243 struct zonelist *zonelist;
3244 gfp_t local_flags; 3244 gfp_t local_flags;
3245 struct zone **z; 3245 struct zone **z;
3246 struct zone *zone;
3247 enum zone_type high_zoneidx = gfp_zone(flags);
3246 void *obj = NULL; 3248 void *obj = NULL;
3247 int nid; 3249 int nid;
3248 3250
@@ -3257,10 +3259,10 @@ retry:
3257 * Look through allowed nodes for objects available 3259 * Look through allowed nodes for objects available
3258 * from existing per node queues. 3260 * from existing per node queues.
3259 */ 3261 */
3260 for (z = zonelist->zones; *z && !obj; z++) { 3262 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3261 nid = zone_to_nid(*z); 3263 nid = zone_to_nid(zone);
3262 3264
3263 if (cpuset_zone_allowed_hardwall(*z, flags) && 3265 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3264 cache->nodelists[nid] && 3266 cache->nodelists[nid] &&
3265 cache->nodelists[nid]->free_objects) 3267 cache->nodelists[nid]->free_objects)
3266 obj = ____cache_alloc_node(cache, 3268 obj = ____cache_alloc_node(cache,
diff --git a/mm/slub.c b/mm/slub.c
index 19ebbfb20689..80d20cc1c0f8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1285,6 +1285,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1285#ifdef CONFIG_NUMA 1285#ifdef CONFIG_NUMA
1286 struct zonelist *zonelist; 1286 struct zonelist *zonelist;
1287 struct zone **z; 1287 struct zone **z;
1288 struct zone *zone;
1289 enum zone_type high_zoneidx = gfp_zone(flags);
1288 struct page *page; 1290 struct page *page;
1289 1291
1290 /* 1292 /*
@@ -1310,12 +1312,12 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1310 return NULL; 1312 return NULL;
1311 1313
1312 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1314 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1313 for (z = zonelist->zones; *z; z++) { 1315 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1314 struct kmem_cache_node *n; 1316 struct kmem_cache_node *n;
1315 1317
1316 n = get_node(s, zone_to_nid(*z)); 1318 n = get_node(s, zone_to_nid(zone));
1317 1319
1318 if (n && cpuset_zone_allowed_hardwall(*z, flags) && 1320 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1319 n->nr_partial > MIN_PARTIAL) { 1321 n->nr_partial > MIN_PARTIAL) {
1320 page = get_partial_node(n); 1322 page = get_partial_node(n);
1321 if (page) 1323 if (page)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ef8551e0d2d0..0515b8f44894 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1249,15 +1249,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1249static unsigned long shrink_zones(int priority, struct zonelist *zonelist, 1249static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1250 struct scan_control *sc) 1250 struct scan_control *sc)
1251{ 1251{
1252 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1252 unsigned long nr_reclaimed = 0; 1253 unsigned long nr_reclaimed = 0;
1253 struct zone **zones = zonelist->zones; 1254 struct zone **z;
1254 int i; 1255 struct zone *zone;
1255
1256 1256
1257 sc->all_unreclaimable = 1; 1257 sc->all_unreclaimable = 1;
1258 for (i = 0; zones[i] != NULL; i++) { 1258 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1259 struct zone *zone = zones[i];
1260
1261 if (!populated_zone(zone)) 1259 if (!populated_zone(zone))
1262 continue; 1260 continue;
1263 /* 1261 /*
@@ -1311,8 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1311 unsigned long nr_reclaimed = 0; 1309 unsigned long nr_reclaimed = 0;
1312 struct reclaim_state *reclaim_state = current->reclaim_state; 1310 struct reclaim_state *reclaim_state = current->reclaim_state;
1313 unsigned long lru_pages = 0; 1311 unsigned long lru_pages = 0;
1314 struct zone **zones = zonelist->zones; 1312 struct zone **z;
1315 int i; 1313 struct zone *zone;
1314 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
1316 1315
1317 if (scan_global_lru(sc)) 1316 if (scan_global_lru(sc))
1318 count_vm_event(ALLOCSTALL); 1317 count_vm_event(ALLOCSTALL);
@@ -1320,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1320 * mem_cgroup will not do shrink_slab. 1319 * mem_cgroup will not do shrink_slab.
1321 */ 1320 */
1322 if (scan_global_lru(sc)) { 1321 if (scan_global_lru(sc)) {
1323 for (i = 0; zones[i] != NULL; i++) { 1322 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1324 struct zone *zone = zones[i];
1325 1323
1326 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1324 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1327 continue; 1325 continue;
@@ -1385,8 +1383,7 @@ out:
1385 priority = 0; 1383 priority = 0;
1386 1384
1387 if (scan_global_lru(sc)) { 1385 if (scan_global_lru(sc)) {
1388 for (i = 0; zones[i] != NULL; i++) { 1386 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1389 struct zone *zone = zones[i];
1390 1387
1391 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1388 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1392 continue; 1389 continue;