aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 05:12:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:18 -0400
commitdd1a239f6f2d4d3eedd318583ec319aa145b324c (patch)
treeaff4224c96b5e2e67588c3946858a724863eeaf9
parent54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (diff)
mm: have zonelist contains structs with both a zone pointer and zone_idx
Filtering zonelists requires very frequent use of zone_idx(). This is costly as it involves a lookup of another structure and a substraction operation. As the zone_idx is often required, it should be quickly accessible. The node idx could also be stored here if it was found that accessing zone->node is significant which may be the case on workloads where nodemasks are heavily used. This patch introduces a struct zoneref to store a zone pointer and a zone index. The zonelist then consists of an array of these struct zonerefs which are looked up as necessary. Helpers are given for accessing the zone index as well as the node index. [kamezawa.hiroyu@jp.fujitsu.com: Suggested struct zoneref instead of embedding information in pointers] [hugh@veritas.com: mm-have-zonelist: fix memcg ooms] [hugh@veritas.com: just return do_try_to_free_pages] [hugh@veritas.com: do_try_to_free_pages gfp_mask redundant] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <clameter@sgi.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/parisc/mm/init.c2
-rw-r--r--fs/buffer.c6
-rw-r--r--include/linux/mmzone.h64
-rw-r--r--include/linux/oom.h4
-rw-r--r--kernel/cpuset.c4
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/mempolicy.c36
-rw-r--r--mm/oom_kill.c45
-rw-r--r--mm/page_alloc.c68
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/vmscan.c22
12 files changed, 158 insertions, 100 deletions
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 9bb6136d77c2..1f012843150f 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -608,7 +608,7 @@ void show_mem(void)
608 for (i = 0; i < npmem_ranges; i++) { 608 for (i = 0; i < npmem_ranges; i++) {
609 zl = node_zonelist(i); 609 zl = node_zonelist(i);
610 for (j = 0; j < MAX_NR_ZONES; j++) { 610 for (j = 0; j < MAX_NR_ZONES; j++) {
611 struct zone **z; 611 struct zoneref *z;
612 struct zone *zone; 612 struct zone *zone;
613 613
614 printk("Zone list for zone %d on node %d: ", j, i); 614 printk("Zone list for zone %d on node %d: ", j, i);
diff --git a/fs/buffer.c b/fs/buffer.c
index 9b5434a80479..ac84cd13075d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,16 +360,16 @@ void invalidate_bdev(struct block_device *bdev)
360 */ 360 */
361static void free_more_memory(void) 361static void free_more_memory(void)
362{ 362{
363 struct zone **zones; 363 struct zoneref *zrefs;
364 int nid; 364 int nid;
365 365
366 wakeup_pdflush(1024); 366 wakeup_pdflush(1024);
367 yield(); 367 yield();
368 368
369 for_each_online_node(nid) { 369 for_each_online_node(nid) {
370 zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS), 370 zrefs = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
371 gfp_zone(GFP_NOFS)); 371 gfp_zone(GFP_NOFS));
372 if (*zones) 372 if (zrefs->zone)
373 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, 373 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
374 GFP_NOFS); 374 GFP_NOFS);
375 } 375 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d5c33a0b89e9..d34b4c290017 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -469,6 +469,15 @@ struct zonelist_cache;
469#endif 469#endif
470 470
471/* 471/*
472 * This struct contains information about a zone in a zonelist. It is stored
473 * here to avoid dereferences into large structures and lookups of tables
474 */
475struct zoneref {
476 struct zone *zone; /* Pointer to actual zone */
477 int zone_idx; /* zone_idx(zoneref->zone) */
478};
479
480/*
472 * One allocation request operates on a zonelist. A zonelist 481 * One allocation request operates on a zonelist. A zonelist
473 * is a list of zones, the first one is the 'goal' of the 482 * is a list of zones, the first one is the 'goal' of the
474 * allocation, the other zones are fallback zones, in decreasing 483 * allocation, the other zones are fallback zones, in decreasing
@@ -476,11 +485,18 @@ struct zonelist_cache;
476 * 485 *
477 * If zlcache_ptr is not NULL, then it is just the address of zlcache, 486 * If zlcache_ptr is not NULL, then it is just the address of zlcache,
478 * as explained above. If zlcache_ptr is NULL, there is no zlcache. 487 * as explained above. If zlcache_ptr is NULL, there is no zlcache.
488 * *
489 * To speed the reading of the zonelist, the zonerefs contain the zone index
490 * of the entry being read. Helper functions to access information given
491 * a struct zoneref are
492 *
493 * zonelist_zone() - Return the struct zone * for an entry in _zonerefs
494 * zonelist_zone_idx() - Return the index of the zone for an entry
495 * zonelist_node_idx() - Return the index of the node for an entry
479 */ 496 */
480
481struct zonelist { 497struct zonelist {
482 struct zonelist_cache *zlcache_ptr; // NULL or &zlcache 498 struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
483 struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited 499 struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
484#ifdef CONFIG_NUMA 500#ifdef CONFIG_NUMA
485 struct zonelist_cache zlcache; // optional ... 501 struct zonelist_cache zlcache; // optional ...
486#endif 502#endif
@@ -713,26 +729,52 @@ extern struct zone *next_zone(struct zone *zone);
713 zone; \ 729 zone; \
714 zone = next_zone(zone)) 730 zone = next_zone(zone))
715 731
732static inline struct zone *zonelist_zone(struct zoneref *zoneref)
733{
734 return zoneref->zone;
735}
736
737static inline int zonelist_zone_idx(struct zoneref *zoneref)
738{
739 return zoneref->zone_idx;
740}
741
742static inline int zonelist_node_idx(struct zoneref *zoneref)
743{
744#ifdef CONFIG_NUMA
745 /* zone_to_nid not available in this context */
746 return zoneref->zone->node;
747#else
748 return 0;
749#endif /* CONFIG_NUMA */
750}
751
752static inline void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
753{
754 zoneref->zone = zone;
755 zoneref->zone_idx = zone_idx(zone);
756}
757
716/* Returns the first zone at or below highest_zoneidx in a zonelist */ 758/* Returns the first zone at or below highest_zoneidx in a zonelist */
717static inline struct zone **first_zones_zonelist(struct zonelist *zonelist, 759static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
718 enum zone_type highest_zoneidx) 760 enum zone_type highest_zoneidx)
719{ 761{
720 struct zone **z; 762 struct zoneref *z;
721 763
722 /* Find the first suitable zone to use for the allocation */ 764 /* Find the first suitable zone to use for the allocation */
723 z = zonelist->zones; 765 z = zonelist->_zonerefs;
724 while (*z && zone_idx(*z) > highest_zoneidx) 766 while (zonelist_zone_idx(z) > highest_zoneidx)
725 z++; 767 z++;
726 768
727 return z; 769 return z;
728} 770}
729 771
730/* Returns the next zone at or below highest_zoneidx in a zonelist */ 772/* Returns the next zone at or below highest_zoneidx in a zonelist */
731static inline struct zone **next_zones_zonelist(struct zone **z, 773static inline struct zoneref *next_zones_zonelist(struct zoneref *z,
732 enum zone_type highest_zoneidx) 774 enum zone_type highest_zoneidx)
733{ 775{
734 /* Find the next suitable zone to use for the allocation */ 776 /* Find the next suitable zone to use for the allocation */
735 while (*z && zone_idx(*z) > highest_zoneidx) 777 while (zonelist_zone_idx(z) > highest_zoneidx)
736 z++; 778 z++;
737 779
738 return z; 780 return z;
@@ -748,9 +790,11 @@ static inline struct zone **next_zones_zonelist(struct zone **z,
748 * This iterator iterates though all zones at or below a given zone index. 790 * This iterator iterates though all zones at or below a given zone index.
749 */ 791 */
750#define for_each_zone_zonelist(zone, z, zlist, highidx) \ 792#define for_each_zone_zonelist(zone, z, zlist, highidx) \
751 for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \ 793 for (z = first_zones_zonelist(zlist, highidx), \
794 zone = zonelist_zone(z++); \
752 zone; \ 795 zone; \
753 z = next_zones_zonelist(z, highidx), zone = *z++) 796 z = next_zones_zonelist(z, highidx), \
797 zone = zonelist_zone(z++))
754 798
755#ifdef CONFIG_SPARSEMEM 799#ifdef CONFIG_SPARSEMEM
756#include <asm/sparsemem.h> 800#include <asm/sparsemem.h>
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 3852436b652a..a7979baf1e39 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -23,8 +23,8 @@ enum oom_constraint {
23 CONSTRAINT_MEMORY_POLICY, 23 CONSTRAINT_MEMORY_POLICY,
24}; 24};
25 25
26extern int try_set_zone_oom(struct zonelist *zonelist); 26extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags);
27extern void clear_zonelist_oom(struct zonelist *zonelist); 27extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
28 28
29extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); 29extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
30extern int register_oom_notifier(struct notifier_block *nb); 30extern int register_oom_notifier(struct notifier_block *nb);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8b35fbd8292f..a220b13cbfaf 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1967,8 +1967,8 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1967{ 1967{
1968 int i; 1968 int i;
1969 1969
1970 for (i = 0; zl->zones[i]; i++) { 1970 for (i = 0; zl->_zonerefs[i].zone; i++) {
1971 int nid = zone_to_nid(zl->zones[i]); 1971 int nid = zonelist_node_idx(&zl->_zonerefs[i]);
1972 1972
1973 if (node_isset(nid, current->mems_allowed)) 1973 if (node_isset(nid, current->mems_allowed))
1974 return 1; 1974 return 1;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ddd141cad77f..4bced0d705ca 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -97,7 +97,8 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
97 struct mempolicy *mpol; 97 struct mempolicy *mpol;
98 struct zonelist *zonelist = huge_zonelist(vma, address, 98 struct zonelist *zonelist = huge_zonelist(vma, address,
99 htlb_alloc_mask, &mpol); 99 htlb_alloc_mask, &mpol);
100 struct zone *zone, **z; 100 struct zone *zone;
101 struct zoneref *z;
101 102
102 for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) { 103 for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
103 nid = zone_to_nid(zone); 104 nid = zone_to_nid(zone);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5d20bf44062f..90193a2a915b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -186,7 +186,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
186 for_each_node_mask(nd, *nodes) { 186 for_each_node_mask(nd, *nodes) {
187 struct zone *z = &NODE_DATA(nd)->node_zones[k]; 187 struct zone *z = &NODE_DATA(nd)->node_zones[k];
188 if (z->present_pages > 0) 188 if (z->present_pages > 0)
189 zl->zones[num++] = z; 189 zoneref_set_zone(z, &zl->_zonerefs[num++]);
190 } 190 }
191 if (k == 0) 191 if (k == 0)
192 break; 192 break;
@@ -196,7 +196,8 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
196 kfree(zl); 196 kfree(zl);
197 return ERR_PTR(-EINVAL); 197 return ERR_PTR(-EINVAL);
198 } 198 }
199 zl->zones[num] = NULL; 199 zl->_zonerefs[num].zone = NULL;
200 zl->_zonerefs[num].zone_idx = 0;
200 return zl; 201 return zl;
201} 202}
202 203
@@ -504,9 +505,11 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
504 nodes_clear(*nodes); 505 nodes_clear(*nodes);
505 switch (p->policy) { 506 switch (p->policy) {
506 case MPOL_BIND: 507 case MPOL_BIND:
507 for (i = 0; p->v.zonelist->zones[i]; i++) 508 for (i = 0; p->v.zonelist->_zonerefs[i].zone; i++) {
508 node_set(zone_to_nid(p->v.zonelist->zones[i]), 509 struct zoneref *zref;
509 *nodes); 510 zref = &p->v.zonelist->_zonerefs[i];
511 node_set(zonelist_node_idx(zref), *nodes);
512 }
510 break; 513 break;
511 case MPOL_DEFAULT: 514 case MPOL_DEFAULT:
512 break; 515 break;
@@ -1212,12 +1215,13 @@ unsigned slab_node(struct mempolicy *policy)
1212 case MPOL_INTERLEAVE: 1215 case MPOL_INTERLEAVE:
1213 return interleave_nodes(policy); 1216 return interleave_nodes(policy);
1214 1217
1215 case MPOL_BIND: 1218 case MPOL_BIND: {
1216 /* 1219 /*
1217 * Follow bind policy behavior and start allocation at the 1220 * Follow bind policy behavior and start allocation at the
1218 * first node. 1221 * first node.
1219 */ 1222 */
1220 return zone_to_nid(policy->v.zonelist->zones[0]); 1223 return zonelist_node_idx(policy->v.zonelist->_zonerefs);
1224 }
1221 1225
1222 case MPOL_PREFERRED: 1226 case MPOL_PREFERRED:
1223 if (policy->v.preferred_node >= 0) 1227 if (policy->v.preferred_node >= 0)
@@ -1323,7 +1327,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1323 1327
1324 zl = node_zonelist(nid, gfp); 1328 zl = node_zonelist(nid, gfp);
1325 page = __alloc_pages(gfp, order, zl); 1329 page = __alloc_pages(gfp, order, zl);
1326 if (page && page_zone(page) == zl->zones[0]) 1330 if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
1327 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); 1331 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
1328 return page; 1332 return page;
1329} 1333}
@@ -1463,10 +1467,14 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
1463 return a->v.preferred_node == b->v.preferred_node; 1467 return a->v.preferred_node == b->v.preferred_node;
1464 case MPOL_BIND: { 1468 case MPOL_BIND: {
1465 int i; 1469 int i;
1466 for (i = 0; a->v.zonelist->zones[i]; i++) 1470 for (i = 0; a->v.zonelist->_zonerefs[i].zone; i++) {
1467 if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i]) 1471 struct zone *za, *zb;
1472 za = zonelist_zone(&a->v.zonelist->_zonerefs[i]);
1473 zb = zonelist_zone(&b->v.zonelist->_zonerefs[i]);
1474 if (za != zb)
1468 return 0; 1475 return 0;
1469 return b->v.zonelist->zones[i] == NULL; 1476 }
1477 return b->v.zonelist->_zonerefs[i].zone == NULL;
1470 } 1478 }
1471 default: 1479 default:
1472 BUG(); 1480 BUG();
@@ -1785,12 +1793,12 @@ static void mpol_rebind_policy(struct mempolicy *pol,
1785 break; 1793 break;
1786 case MPOL_BIND: { 1794 case MPOL_BIND: {
1787 nodemask_t nodes; 1795 nodemask_t nodes;
1788 struct zone **z; 1796 struct zoneref *z;
1789 struct zonelist *zonelist; 1797 struct zonelist *zonelist;
1790 1798
1791 nodes_clear(nodes); 1799 nodes_clear(nodes);
1792 for (z = pol->v.zonelist->zones; *z; z++) 1800 for (z = pol->v.zonelist->_zonerefs; z->zone; z++)
1793 node_set(zone_to_nid(*z), nodes); 1801 node_set(zonelist_node_idx(z), nodes);
1794 nodes_remap(tmp, nodes, *mpolmask, *newmask); 1802 nodes_remap(tmp, nodes, *mpolmask, *newmask);
1795 nodes = tmp; 1803 nodes = tmp;
1796 1804
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 2c93502cfcb4..e41504aa5da9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,7 +176,7 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
176{ 176{
177#ifdef CONFIG_NUMA 177#ifdef CONFIG_NUMA
178 struct zone *zone; 178 struct zone *zone;
179 struct zone **z; 179 struct zoneref *z;
180 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 180 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
181 nodemask_t nodes = node_states[N_HIGH_MEMORY]; 181 nodemask_t nodes = node_states[N_HIGH_MEMORY];
182 182
@@ -462,29 +462,29 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
462 * if a parallel OOM killing is already taking place that includes a zone in 462 * if a parallel OOM killing is already taking place that includes a zone in
463 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. 463 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
464 */ 464 */
465int try_set_zone_oom(struct zonelist *zonelist) 465int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
466{ 466{
467 struct zone **z; 467 struct zoneref *z;
468 struct zone *zone;
468 int ret = 1; 469 int ret = 1;
469 470
470 z = zonelist->zones;
471
472 spin_lock(&zone_scan_mutex); 471 spin_lock(&zone_scan_mutex);
473 do { 472 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
474 if (zone_is_oom_locked(*z)) { 473 if (zone_is_oom_locked(zone)) {
475 ret = 0; 474 ret = 0;
476 goto out; 475 goto out;
477 } 476 }
478 } while (*(++z) != NULL); 477 }
478
479 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
480 /*
481 * Lock each zone in the zonelist under zone_scan_mutex so a
482 * parallel invocation of try_set_zone_oom() doesn't succeed
483 * when it shouldn't.
484 */
485 zone_set_flag(zone, ZONE_OOM_LOCKED);
486 }
479 487
480 /*
481 * Lock each zone in the zonelist under zone_scan_mutex so a parallel
482 * invocation of try_set_zone_oom() doesn't succeed when it shouldn't.
483 */
484 z = zonelist->zones;
485 do {
486 zone_set_flag(*z, ZONE_OOM_LOCKED);
487 } while (*(++z) != NULL);
488out: 488out:
489 spin_unlock(&zone_scan_mutex); 489 spin_unlock(&zone_scan_mutex);
490 return ret; 490 return ret;
@@ -495,16 +495,15 @@ out:
495 * allocation attempts with zonelists containing them may now recall the OOM 495 * allocation attempts with zonelists containing them may now recall the OOM
496 * killer, if necessary. 496 * killer, if necessary.
497 */ 497 */
498void clear_zonelist_oom(struct zonelist *zonelist) 498void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
499{ 499{
500 struct zone **z; 500 struct zoneref *z;
501 501 struct zone *zone;
502 z = zonelist->zones;
503 502
504 spin_lock(&zone_scan_mutex); 503 spin_lock(&zone_scan_mutex);
505 do { 504 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
506 zone_clear_flag(*z, ZONE_OOM_LOCKED); 505 zone_clear_flag(zone, ZONE_OOM_LOCKED);
507 } while (*(++z) != NULL); 506 }
508 spin_unlock(&zone_scan_mutex); 507 spin_unlock(&zone_scan_mutex);
509} 508}
510 509
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ccb8651cf22..6d94d04ea784 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1317,7 +1317,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
1317 * We are low on memory in the second scan, and should leave no stone 1317 * We are low on memory in the second scan, and should leave no stone
1318 * unturned looking for a free page. 1318 * unturned looking for a free page.
1319 */ 1319 */
1320static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, 1320static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
1321 nodemask_t *allowednodes) 1321 nodemask_t *allowednodes)
1322{ 1322{
1323 struct zonelist_cache *zlc; /* cached zonelist speedup info */ 1323 struct zonelist_cache *zlc; /* cached zonelist speedup info */
@@ -1328,7 +1328,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
1328 if (!zlc) 1328 if (!zlc)
1329 return 1; 1329 return 1;
1330 1330
1331 i = z - zonelist->zones; 1331 i = z - zonelist->_zonerefs;
1332 n = zlc->z_to_n[i]; 1332 n = zlc->z_to_n[i];
1333 1333
1334 /* This zone is worth trying if it is allowed but not full */ 1334 /* This zone is worth trying if it is allowed but not full */
@@ -1340,7 +1340,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
1340 * zlc->fullzones, so that subsequent attempts to allocate a page 1340 * zlc->fullzones, so that subsequent attempts to allocate a page
1341 * from that zone don't waste time re-examining it. 1341 * from that zone don't waste time re-examining it.
1342 */ 1342 */
1343static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) 1343static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1344{ 1344{
1345 struct zonelist_cache *zlc; /* cached zonelist speedup info */ 1345 struct zonelist_cache *zlc; /* cached zonelist speedup info */
1346 int i; /* index of *z in zonelist zones */ 1346 int i; /* index of *z in zonelist zones */
@@ -1349,7 +1349,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
1349 if (!zlc) 1349 if (!zlc)
1350 return; 1350 return;
1351 1351
1352 i = z - zonelist->zones; 1352 i = z - zonelist->_zonerefs;
1353 1353
1354 set_bit(i, zlc->fullzones); 1354 set_bit(i, zlc->fullzones);
1355} 1355}
@@ -1361,13 +1361,13 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
1361 return NULL; 1361 return NULL;
1362} 1362}
1363 1363
1364static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, 1364static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
1365 nodemask_t *allowednodes) 1365 nodemask_t *allowednodes)
1366{ 1366{
1367 return 1; 1367 return 1;
1368} 1368}
1369 1369
1370static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) 1370static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1371{ 1371{
1372} 1372}
1373#endif /* CONFIG_NUMA */ 1373#endif /* CONFIG_NUMA */
@@ -1380,7 +1380,7 @@ static struct page *
1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order, 1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags) 1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
1382{ 1382{
1383 struct zone **z; 1383 struct zoneref *z;
1384 struct page *page = NULL; 1384 struct page *page = NULL;
1385 int classzone_idx; 1385 int classzone_idx;
1386 struct zone *zone, *preferred_zone; 1386 struct zone *zone, *preferred_zone;
@@ -1389,8 +1389,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1390 1390
1391 z = first_zones_zonelist(zonelist, high_zoneidx); 1391 z = first_zones_zonelist(zonelist, high_zoneidx);
1392 classzone_idx = zone_idx(*z); 1392 classzone_idx = zonelist_zone_idx(z);
1393 preferred_zone = *z; 1393 preferred_zone = zonelist_zone(z);
1394 1394
1395zonelist_scan: 1395zonelist_scan:
1396 /* 1396 /*
@@ -1453,7 +1453,8 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
1453{ 1453{
1454 const gfp_t wait = gfp_mask & __GFP_WAIT; 1454 const gfp_t wait = gfp_mask & __GFP_WAIT;
1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
1456 struct zone **z; 1456 struct zoneref *z;
1457 struct zone *zone;
1457 struct page *page; 1458 struct page *page;
1458 struct reclaim_state reclaim_state; 1459 struct reclaim_state reclaim_state;
1459 struct task_struct *p = current; 1460 struct task_struct *p = current;
@@ -1467,9 +1468,9 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
1467 return NULL; 1468 return NULL;
1468 1469
1469restart: 1470restart:
1470 z = zonelist->zones; /* the list of zones suitable for gfp_mask */ 1471 z = zonelist->_zonerefs; /* the list of zones suitable for gfp_mask */
1471 1472
1472 if (unlikely(*z == NULL)) { 1473 if (unlikely(!z->zone)) {
1473 /* 1474 /*
1474 * Happens if we have an empty zonelist as a result of 1475 * Happens if we have an empty zonelist as a result of
1475 * GFP_THISNODE being used on a memoryless node 1476 * GFP_THISNODE being used on a memoryless node
@@ -1493,8 +1494,8 @@ restart:
1493 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) 1494 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
1494 goto nopage; 1495 goto nopage;
1495 1496
1496 for (z = zonelist->zones; *z; z++) 1497 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
1497 wakeup_kswapd(*z, order); 1498 wakeup_kswapd(zone, order);
1498 1499
1499 /* 1500 /*
1500 * OK, we're below the kswapd watermark and have kicked background 1501 * OK, we're below the kswapd watermark and have kicked background
@@ -1575,7 +1576,7 @@ nofail_alloc:
1575 if (page) 1576 if (page)
1576 goto got_pg; 1577 goto got_pg;
1577 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 1578 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
1578 if (!try_set_zone_oom(zonelist)) { 1579 if (!try_set_zone_oom(zonelist, gfp_mask)) {
1579 schedule_timeout_uninterruptible(1); 1580 schedule_timeout_uninterruptible(1);
1580 goto restart; 1581 goto restart;
1581 } 1582 }
@@ -1589,18 +1590,18 @@ nofail_alloc:
1589 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, 1590 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
1590 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); 1591 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
1591 if (page) { 1592 if (page) {
1592 clear_zonelist_oom(zonelist); 1593 clear_zonelist_oom(zonelist, gfp_mask);
1593 goto got_pg; 1594 goto got_pg;
1594 } 1595 }
1595 1596
1596 /* The OOM killer will not help higher order allocs so fail */ 1597 /* The OOM killer will not help higher order allocs so fail */
1597 if (order > PAGE_ALLOC_COSTLY_ORDER) { 1598 if (order > PAGE_ALLOC_COSTLY_ORDER) {
1598 clear_zonelist_oom(zonelist); 1599 clear_zonelist_oom(zonelist, gfp_mask);
1599 goto nopage; 1600 goto nopage;
1600 } 1601 }
1601 1602
1602 out_of_memory(zonelist, gfp_mask, order); 1603 out_of_memory(zonelist, gfp_mask, order);
1603 clear_zonelist_oom(zonelist); 1604 clear_zonelist_oom(zonelist, gfp_mask);
1604 goto restart; 1605 goto restart;
1605 } 1606 }
1606 1607
@@ -1702,7 +1703,7 @@ EXPORT_SYMBOL(free_pages);
1702 1703
1703static unsigned int nr_free_zone_pages(int offset) 1704static unsigned int nr_free_zone_pages(int offset)
1704{ 1705{
1705 struct zone **z; 1706 struct zoneref *z;
1706 struct zone *zone; 1707 struct zone *zone;
1707 1708
1708 /* Just pick one node, since fallback list is circular */ 1709 /* Just pick one node, since fallback list is circular */
@@ -1896,7 +1897,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
1896 zone_type--; 1897 zone_type--;
1897 zone = pgdat->node_zones + zone_type; 1898 zone = pgdat->node_zones + zone_type;
1898 if (populated_zone(zone)) { 1899 if (populated_zone(zone)) {
1899 zonelist->zones[nr_zones++] = zone; 1900 zoneref_set_zone(zone,
1901 &zonelist->_zonerefs[nr_zones++]);
1900 check_highest_zone(zone_type); 1902 check_highest_zone(zone_type);
1901 } 1903 }
1902 1904
@@ -2072,11 +2074,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
2072 struct zonelist *zonelist; 2074 struct zonelist *zonelist;
2073 2075
2074 zonelist = &pgdat->node_zonelists[0]; 2076 zonelist = &pgdat->node_zonelists[0];
2075 for (j = 0; zonelist->zones[j] != NULL; j++) 2077 for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
2076 ; 2078 ;
2077 j = build_zonelists_node(NODE_DATA(node), zonelist, j, 2079 j = build_zonelists_node(NODE_DATA(node), zonelist, j,
2078 MAX_NR_ZONES - 1); 2080 MAX_NR_ZONES - 1);
2079 zonelist->zones[j] = NULL; 2081 zonelist->_zonerefs[j].zone = NULL;
2082 zonelist->_zonerefs[j].zone_idx = 0;
2080} 2083}
2081 2084
2082/* 2085/*
@@ -2089,7 +2092,8 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
2089 2092
2090 zonelist = &pgdat->node_zonelists[1]; 2093 zonelist = &pgdat->node_zonelists[1];
2091 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); 2094 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
2092 zonelist->zones[j] = NULL; 2095 zonelist->_zonerefs[j].zone = NULL;
2096 zonelist->_zonerefs[j].zone_idx = 0;
2093} 2097}
2094 2098
2095/* 2099/*
@@ -2114,12 +2118,14 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
2114 node = node_order[j]; 2118 node = node_order[j];
2115 z = &NODE_DATA(node)->node_zones[zone_type]; 2119 z = &NODE_DATA(node)->node_zones[zone_type];
2116 if (populated_zone(z)) { 2120 if (populated_zone(z)) {
2117 zonelist->zones[pos++] = z; 2121 zoneref_set_zone(z,
2122 &zonelist->_zonerefs[pos++]);
2118 check_highest_zone(zone_type); 2123 check_highest_zone(zone_type);
2119 } 2124 }
2120 } 2125 }
2121 } 2126 }
2122 zonelist->zones[pos] = NULL; 2127 zonelist->_zonerefs[pos].zone = NULL;
2128 zonelist->_zonerefs[pos].zone_idx = 0;
2123} 2129}
2124 2130
2125static int default_zonelist_order(void) 2131static int default_zonelist_order(void)
@@ -2196,7 +2202,8 @@ static void build_zonelists(pg_data_t *pgdat)
2196 /* initialize zonelists */ 2202 /* initialize zonelists */
2197 for (i = 0; i < MAX_ZONELISTS; i++) { 2203 for (i = 0; i < MAX_ZONELISTS; i++) {
2198 zonelist = pgdat->node_zonelists + i; 2204 zonelist = pgdat->node_zonelists + i;
2199 zonelist->zones[0] = NULL; 2205 zonelist->_zonerefs[0].zone = NULL;
2206 zonelist->_zonerefs[0].zone_idx = 0;
2200 } 2207 }
2201 2208
2202 /* NUMA-aware ordering of nodes */ 2209 /* NUMA-aware ordering of nodes */
@@ -2248,13 +2255,13 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2248{ 2255{
2249 struct zonelist *zonelist; 2256 struct zonelist *zonelist;
2250 struct zonelist_cache *zlc; 2257 struct zonelist_cache *zlc;
2251 struct zone **z; 2258 struct zoneref *z;
2252 2259
2253 zonelist = &pgdat->node_zonelists[0]; 2260 zonelist = &pgdat->node_zonelists[0];
2254 zonelist->zlcache_ptr = zlc = &zonelist->zlcache; 2261 zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
2255 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); 2262 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
2256 for (z = zonelist->zones; *z; z++) 2263 for (z = zonelist->_zonerefs; z->zone; z++)
2257 zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); 2264 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
2258} 2265}
2259 2266
2260 2267
@@ -2297,7 +2304,8 @@ static void build_zonelists(pg_data_t *pgdat)
2297 MAX_NR_ZONES - 1); 2304 MAX_NR_ZONES - 1);
2298 } 2305 }
2299 2306
2300 zonelist->zones[j] = NULL; 2307 zonelist->_zonerefs[j].zone = NULL;
2308 zonelist->_zonerefs[j].zone_idx = 0;
2301} 2309}
2302 2310
2303/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ 2311/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
diff --git a/mm/slab.c b/mm/slab.c
index 29851841da62..7bc4a136846e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3242,7 +3242,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3242{ 3242{
3243 struct zonelist *zonelist; 3243 struct zonelist *zonelist;
3244 gfp_t local_flags; 3244 gfp_t local_flags;
3245 struct zone **z; 3245 struct zoneref *z;
3246 struct zone *zone; 3246 struct zone *zone;
3247 enum zone_type high_zoneidx = gfp_zone(flags); 3247 enum zone_type high_zoneidx = gfp_zone(flags);
3248 void *obj = NULL; 3248 void *obj = NULL;
diff --git a/mm/slub.c b/mm/slub.c
index 80d20cc1c0f8..48fff83a1e9d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1284,7 +1284,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1284{ 1284{
1285#ifdef CONFIG_NUMA 1285#ifdef CONFIG_NUMA
1286 struct zonelist *zonelist; 1286 struct zonelist *zonelist;
1287 struct zone **z; 1287 struct zoneref *z;
1288 struct zone *zone; 1288 struct zone *zone;
1289 enum zone_type high_zoneidx = gfp_zone(flags); 1289 enum zone_type high_zoneidx = gfp_zone(flags);
1290 struct page *page; 1290 struct page *page;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0515b8f44894..eceac9f9032f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1251,7 +1251,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1251{ 1251{
1252 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); 1252 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1253 unsigned long nr_reclaimed = 0; 1253 unsigned long nr_reclaimed = 0;
1254 struct zone **z; 1254 struct zoneref *z;
1255 struct zone *zone; 1255 struct zone *zone;
1256 1256
1257 sc->all_unreclaimable = 1; 1257 sc->all_unreclaimable = 1;
@@ -1301,7 +1301,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1301 * allocation attempt will fail. 1301 * allocation attempt will fail.
1302 */ 1302 */
1303static unsigned long do_try_to_free_pages(struct zonelist *zonelist, 1303static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1304 gfp_t gfp_mask, struct scan_control *sc) 1304 struct scan_control *sc)
1305{ 1305{
1306 int priority; 1306 int priority;
1307 int ret = 0; 1307 int ret = 0;
@@ -1309,9 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1309 unsigned long nr_reclaimed = 0; 1309 unsigned long nr_reclaimed = 0;
1310 struct reclaim_state *reclaim_state = current->reclaim_state; 1310 struct reclaim_state *reclaim_state = current->reclaim_state;
1311 unsigned long lru_pages = 0; 1311 unsigned long lru_pages = 0;
1312 struct zone **z; 1312 struct zoneref *z;
1313 struct zone *zone; 1313 struct zone *zone;
1314 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1314 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1315 1315
1316 if (scan_global_lru(sc)) 1316 if (scan_global_lru(sc))
1317 count_vm_event(ALLOCSTALL); 1317 count_vm_event(ALLOCSTALL);
@@ -1339,7 +1339,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1339 * over limit cgroups 1339 * over limit cgroups
1340 */ 1340 */
1341 if (scan_global_lru(sc)) { 1341 if (scan_global_lru(sc)) {
1342 shrink_slab(sc->nr_scanned, gfp_mask, lru_pages); 1342 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1343 if (reclaim_state) { 1343 if (reclaim_state) {
1344 nr_reclaimed += reclaim_state->reclaimed_slab; 1344 nr_reclaimed += reclaim_state->reclaimed_slab;
1345 reclaim_state->reclaimed_slab = 0; 1345 reclaim_state->reclaimed_slab = 0;
@@ -1410,7 +1410,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1410 .isolate_pages = isolate_pages_global, 1410 .isolate_pages = isolate_pages_global,
1411 }; 1411 };
1412 1412
1413 return do_try_to_free_pages(zonelist, gfp_mask, &sc); 1413 return do_try_to_free_pages(zonelist, &sc);
1414} 1414}
1415 1415
1416#ifdef CONFIG_CGROUP_MEM_RES_CTLR 1416#ifdef CONFIG_CGROUP_MEM_RES_CTLR
@@ -1419,7 +1419,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1419 gfp_t gfp_mask) 1419 gfp_t gfp_mask)
1420{ 1420{
1421 struct scan_control sc = { 1421 struct scan_control sc = {
1422 .gfp_mask = gfp_mask,
1423 .may_writepage = !laptop_mode, 1422 .may_writepage = !laptop_mode,
1424 .may_swap = 1, 1423 .may_swap = 1,
1425 .swap_cluster_max = SWAP_CLUSTER_MAX, 1424 .swap_cluster_max = SWAP_CLUSTER_MAX,
@@ -1429,12 +1428,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1429 .isolate_pages = mem_cgroup_isolate_pages, 1428 .isolate_pages = mem_cgroup_isolate_pages,
1430 }; 1429 };
1431 struct zonelist *zonelist; 1430 struct zonelist *zonelist;
1432 int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
1433 1431
1434 zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone]; 1432 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1435 if (do_try_to_free_pages(zonelist, sc.gfp_mask, &sc)) 1433 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1436 return 1; 1434 zonelist = NODE_DATA(numa_node_id())->node_zonelists;
1437 return 0; 1435 return do_try_to_free_pages(zonelist, &sc);
1438} 1436}
1439#endif 1437#endif
1440 1438