diff options
| author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:16 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:18 -0400 |
| commit | 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (patch) | |
| tree | 547176a090beb787722a153cf2b8b942dc0e68db | |
| parent | 18ea7e710d2452fa726814a406779188028cf1bf (diff) | |
mm: use two zonelist that are filtered by GFP mask
Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations. Based on the zones
allowed by a gfp mask, one of these zonelists is selected. All of these
zonelists consume memory and occupy cache lines.
This patch replaces the multiple zonelists per-node with two zonelists. The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages. The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.
An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | arch/parisc/mm/init.c | 11 | ||||
| -rw-r--r-- | fs/buffer.c | 10 | ||||
| -rw-r--r-- | include/linux/gfp.h | 13 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 65 | ||||
| -rw-r--r-- | mm/hugetlb.c | 8 | ||||
| -rw-r--r-- | mm/oom_kill.c | 8 | ||||
| -rw-r--r-- | mm/page_alloc.c | 170 | ||||
| -rw-r--r-- | mm/slab.c | 8 | ||||
| -rw-r--r-- | mm/slub.c | 8 | ||||
| -rw-r--r-- | mm/vmscan.c | 21 |
10 files changed, 168 insertions, 154 deletions
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index eb80f5e33d7d..9bb6136d77c2 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
| @@ -603,15 +603,18 @@ void show_mem(void) | |||
| 603 | #ifdef CONFIG_DISCONTIGMEM | 603 | #ifdef CONFIG_DISCONTIGMEM |
| 604 | { | 604 | { |
| 605 | struct zonelist *zl; | 605 | struct zonelist *zl; |
| 606 | int i, j, k; | 606 | int i, j; |
| 607 | 607 | ||
| 608 | for (i = 0; i < npmem_ranges; i++) { | 608 | for (i = 0; i < npmem_ranges; i++) { |
| 609 | zl = node_zonelist(i); | ||
| 609 | for (j = 0; j < MAX_NR_ZONES; j++) { | 610 | for (j = 0; j < MAX_NR_ZONES; j++) { |
| 610 | zl = NODE_DATA(i)->node_zonelists + j; | 611 | struct zone **z; |
| 612 | struct zone *zone; | ||
| 611 | 613 | ||
| 612 | printk("Zone list for zone %d on node %d: ", j, i); | 614 | printk("Zone list for zone %d on node %d: ", j, i); |
| 613 | for (k = 0; zl->zones[k] != NULL; k++) | 615 | for_each_zone_zonelist(zone, z, zl, j) |
| 614 | printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); | 616 | printk("[%d/%s] ", zone_to_nid(zone), |
| 617 | zone->name); | ||
| 615 | printk("\n"); | 618 | printk("\n"); |
| 616 | } | 619 | } |
| 617 | } | 620 | } |
diff --git a/fs/buffer.c b/fs/buffer.c index 71358499bc57..9b5434a80479 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev) | |||
| 360 | */ | 360 | */ |
| 361 | static void free_more_memory(void) | 361 | static void free_more_memory(void) |
| 362 | { | 362 | { |
| 363 | struct zonelist *zonelist; | 363 | struct zone **zones; |
| 364 | int nid; | 364 | int nid; |
| 365 | 365 | ||
| 366 | wakeup_pdflush(1024); | 366 | wakeup_pdflush(1024); |
| 367 | yield(); | 367 | yield(); |
| 368 | 368 | ||
| 369 | for_each_online_node(nid) { | 369 | for_each_online_node(nid) { |
| 370 | zonelist = node_zonelist(nid, GFP_NOFS); | 370 | zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS), |
| 371 | if (zonelist->zones[0]) | 371 | gfp_zone(GFP_NOFS)); |
| 372 | try_to_free_pages(zonelist, 0, GFP_NOFS); | 372 | if (*zones) |
| 373 | try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, | ||
| 374 | GFP_NOFS); | ||
| 373 | } | 375 | } |
| 374 | } | 376 | } |
| 375 | 377 | ||
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e865d51f1c74..e1c6064cb6c7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags) | |||
| 151 | * virtual kernel addresses to the allocated page(s). | 151 | * virtual kernel addresses to the allocated page(s). |
| 152 | */ | 152 | */ |
| 153 | 153 | ||
| 154 | static inline int gfp_zonelist(gfp_t flags) | ||
| 155 | { | ||
| 156 | if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) | ||
| 157 | return 1; | ||
| 158 | |||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | |||
| 154 | /* | 162 | /* |
| 155 | * We get the zone list from the current node and the gfp_mask. | 163 | * We get the zone list from the current node and the gfp_mask. |
| 156 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. | 164 | * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. |
| 157 | * There are many zonelists per node, two for each active zone. | 165 | * There are two zonelists per node, one for all zones with memory and |
| 166 | * one containing just zones from the node the zonelist belongs to. | ||
| 158 | * | 167 | * |
| 159 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets | 168 | * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets |
| 160 | * optimized to &contig_page_data at compile-time. | 169 | * optimized to &contig_page_data at compile-time. |
| 161 | */ | 170 | */ |
| 162 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) | 171 | static inline struct zonelist *node_zonelist(int nid, gfp_t flags) |
| 163 | { | 172 | { |
| 164 | return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); | 173 | return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); |
| 165 | } | 174 | } |
| 166 | 175 | ||
| 167 | #ifndef HAVE_ARCH_FREE_PAGE | 176 | #ifndef HAVE_ARCH_FREE_PAGE |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 451eaa13bc28..d5c33a0b89e9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) | |||
| 393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the | 393 | * The NUMA zonelists are doubled becausse we need zonelists that restrict the |
| 394 | * allocations to a single node for GFP_THISNODE. | 394 | * allocations to a single node for GFP_THISNODE. |
| 395 | * | 395 | * |
| 396 | * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback | 396 | * [0] : Zonelist with fallback |
| 397 | * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) | 397 | * [1] : No fallback (GFP_THISNODE) |
| 398 | */ | 398 | */ |
| 399 | #define MAX_ZONELISTS (2 * MAX_NR_ZONES) | 399 | #define MAX_ZONELISTS 2 |
| 400 | 400 | ||
| 401 | 401 | ||
| 402 | /* | 402 | /* |
| @@ -464,7 +464,7 @@ struct zonelist_cache { | |||
| 464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ | 464 | unsigned long last_full_zap; /* when last zap'd (jiffies) */ |
| 465 | }; | 465 | }; |
| 466 | #else | 466 | #else |
| 467 | #define MAX_ZONELISTS MAX_NR_ZONES | 467 | #define MAX_ZONELISTS 1 |
| 468 | struct zonelist_cache; | 468 | struct zonelist_cache; |
| 469 | #endif | 469 | #endif |
| 470 | 470 | ||
| @@ -486,24 +486,6 @@ struct zonelist { | |||
| 486 | #endif | 486 | #endif |
| 487 | }; | 487 | }; |
| 488 | 488 | ||
| 489 | #ifdef CONFIG_NUMA | ||
| 490 | /* | ||
| 491 | * Only custom zonelists like MPOL_BIND need to be filtered as part of | ||
| 492 | * policies. As described in the comment for struct zonelist_cache, these | ||
| 493 | * zonelists will not have a zlcache so zlcache_ptr will not be set. Use | ||
| 494 | * that to determine if the zonelists needs to be filtered or not. | ||
| 495 | */ | ||
| 496 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
| 497 | { | ||
| 498 | return !zonelist->zlcache_ptr; | ||
| 499 | } | ||
| 500 | #else | ||
| 501 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
| 502 | { | ||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | #endif /* CONFIG_NUMA */ | ||
| 506 | |||
| 507 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 489 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
| 508 | struct node_active_region { | 490 | struct node_active_region { |
| 509 | unsigned long start_pfn; | 491 | unsigned long start_pfn; |
| @@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone); | |||
| 731 | zone; \ | 713 | zone; \ |
| 732 | zone = next_zone(zone)) | 714 | zone = next_zone(zone)) |
| 733 | 715 | ||
| 716 | /* Returns the first zone at or below highest_zoneidx in a zonelist */ | ||
| 717 | static inline struct zone **first_zones_zonelist(struct zonelist *zonelist, | ||
| 718 | enum zone_type highest_zoneidx) | ||
| 719 | { | ||
| 720 | struct zone **z; | ||
| 721 | |||
| 722 | /* Find the first suitable zone to use for the allocation */ | ||
| 723 | z = zonelist->zones; | ||
| 724 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
| 725 | z++; | ||
| 726 | |||
| 727 | return z; | ||
| 728 | } | ||
| 729 | |||
| 730 | /* Returns the next zone at or below highest_zoneidx in a zonelist */ | ||
| 731 | static inline struct zone **next_zones_zonelist(struct zone **z, | ||
| 732 | enum zone_type highest_zoneidx) | ||
| 733 | { | ||
| 734 | /* Find the next suitable zone to use for the allocation */ | ||
| 735 | while (*z && zone_idx(*z) > highest_zoneidx) | ||
| 736 | z++; | ||
| 737 | |||
| 738 | return z; | ||
| 739 | } | ||
| 740 | |||
| 741 | /** | ||
| 742 | * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index | ||
| 743 | * @zone - The current zone in the iterator | ||
| 744 | * @z - The current pointer within zonelist->zones being iterated | ||
| 745 | * @zlist - The zonelist being iterated | ||
| 746 | * @highidx - The zone index of the highest zone to return | ||
| 747 | * | ||
| 748 | * This iterator iterates though all zones at or below a given zone index. | ||
| 749 | */ | ||
| 750 | #define for_each_zone_zonelist(zone, z, zlist, highidx) \ | ||
| 751 | for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \ | ||
| 752 | zone; \ | ||
| 753 | z = next_zones_zonelist(z, highidx), zone = *z++) | ||
| 754 | |||
| 734 | #ifdef CONFIG_SPARSEMEM | 755 | #ifdef CONFIG_SPARSEMEM |
| 735 | #include <asm/sparsemem.h> | 756 | #include <asm/sparsemem.h> |
| 736 | #endif | 757 | #endif |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 51c9e2c01640..ddd141cad77f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | |||
| 97 | struct mempolicy *mpol; | 97 | struct mempolicy *mpol; |
| 98 | struct zonelist *zonelist = huge_zonelist(vma, address, | 98 | struct zonelist *zonelist = huge_zonelist(vma, address, |
| 99 | htlb_alloc_mask, &mpol); | 99 | htlb_alloc_mask, &mpol); |
| 100 | struct zone **z; | 100 | struct zone *zone, **z; |
| 101 | 101 | ||
| 102 | for (z = zonelist->zones; *z; z++) { | 102 | for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) { |
| 103 | nid = zone_to_nid(*z); | 103 | nid = zone_to_nid(zone); |
| 104 | if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && | 104 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && |
| 105 | !list_empty(&hugepage_freelists[nid])) { | 105 | !list_empty(&hugepage_freelists[nid])) { |
| 106 | page = list_entry(hugepage_freelists[nid].next, | 106 | page = list_entry(hugepage_freelists[nid].next, |
| 107 | struct page, lru); | 107 | struct page, lru); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index beb592fe9389..2c93502cfcb4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, | |||
| 175 | gfp_t gfp_mask) | 175 | gfp_t gfp_mask) |
| 176 | { | 176 | { |
| 177 | #ifdef CONFIG_NUMA | 177 | #ifdef CONFIG_NUMA |
| 178 | struct zone *zone; | ||
| 178 | struct zone **z; | 179 | struct zone **z; |
| 180 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
| 179 | nodemask_t nodes = node_states[N_HIGH_MEMORY]; | 181 | nodemask_t nodes = node_states[N_HIGH_MEMORY]; |
| 180 | 182 | ||
| 181 | for (z = zonelist->zones; *z; z++) | 183 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) |
| 182 | if (cpuset_zone_allowed_softwall(*z, gfp_mask)) | 184 | if (cpuset_zone_allowed_softwall(zone, gfp_mask)) |
| 183 | node_clear(zone_to_nid(*z), nodes); | 185 | node_clear(zone_to_nid(zone), nodes); |
| 184 | else | 186 | else |
| 185 | return CONSTRAINT_CPUSET; | 187 | return CONSTRAINT_CPUSET; |
| 186 | 188 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 187efd47a446..4ccb8651cf22 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) | |||
| 1378 | */ | 1378 | */ |
| 1379 | static struct page * | 1379 | static struct page * |
| 1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | 1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, |
| 1381 | struct zonelist *zonelist, int alloc_flags) | 1381 | struct zonelist *zonelist, int high_zoneidx, int alloc_flags) |
| 1382 | { | 1382 | { |
| 1383 | struct zone **z; | 1383 | struct zone **z; |
| 1384 | struct page *page = NULL; | 1384 | struct page *page = NULL; |
| 1385 | int classzone_idx = zone_idx(zonelist->zones[0]); | 1385 | int classzone_idx; |
| 1386 | struct zone *zone, *preferred_zone; | 1386 | struct zone *zone, *preferred_zone; |
| 1387 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ | 1387 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ |
| 1388 | int zlc_active = 0; /* set if using zonelist_cache */ | 1388 | int zlc_active = 0; /* set if using zonelist_cache */ |
| 1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ | 1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ |
| 1390 | enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ | 1390 | |
| 1391 | z = first_zones_zonelist(zonelist, high_zoneidx); | ||
| 1392 | classzone_idx = zone_idx(*z); | ||
| 1393 | preferred_zone = *z; | ||
| 1391 | 1394 | ||
| 1392 | zonelist_scan: | 1395 | zonelist_scan: |
| 1393 | /* | 1396 | /* |
| 1394 | * Scan zonelist, looking for a zone with enough free. | 1397 | * Scan zonelist, looking for a zone with enough free. |
| 1395 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1398 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
| 1396 | */ | 1399 | */ |
| 1397 | z = zonelist->zones; | 1400 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
| 1398 | preferred_zone = *z; | ||
| 1399 | |||
| 1400 | do { | ||
| 1401 | /* | ||
| 1402 | * In NUMA, this could be a policy zonelist which contains | ||
| 1403 | * zones that may not be allowed by the current gfp_mask. | ||
| 1404 | * Check the zone is allowed by the current flags | ||
| 1405 | */ | ||
| 1406 | if (unlikely(alloc_should_filter_zonelist(zonelist))) { | ||
| 1407 | if (highest_zoneidx == -1) | ||
| 1408 | highest_zoneidx = gfp_zone(gfp_mask); | ||
| 1409 | if (zone_idx(*z) > highest_zoneidx) | ||
| 1410 | continue; | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | if (NUMA_BUILD && zlc_active && | 1401 | if (NUMA_BUILD && zlc_active && |
| 1414 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1402 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
| 1415 | continue; | 1403 | continue; |
| 1416 | zone = *z; | ||
| 1417 | if ((alloc_flags & ALLOC_CPUSET) && | 1404 | if ((alloc_flags & ALLOC_CPUSET) && |
| 1418 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 1405 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
| 1419 | goto try_next_zone; | 1406 | goto try_next_zone; |
| @@ -1447,7 +1434,7 @@ try_next_zone: | |||
| 1447 | zlc_active = 1; | 1434 | zlc_active = 1; |
| 1448 | did_zlc_setup = 1; | 1435 | did_zlc_setup = 1; |
| 1449 | } | 1436 | } |
| 1450 | } while (*(++z) != NULL); | 1437 | } |
| 1451 | 1438 | ||
| 1452 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { | 1439 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { |
| 1453 | /* Disable zlc cache for second zonelist scan */ | 1440 | /* Disable zlc cache for second zonelist scan */ |
| @@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
| 1465 | struct zonelist *zonelist) | 1452 | struct zonelist *zonelist) |
| 1466 | { | 1453 | { |
| 1467 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 1454 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
| 1455 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
| 1468 | struct zone **z; | 1456 | struct zone **z; |
| 1469 | struct page *page; | 1457 | struct page *page; |
| 1470 | struct reclaim_state reclaim_state; | 1458 | struct reclaim_state reclaim_state; |
| @@ -1490,7 +1478,7 @@ restart: | |||
| 1490 | } | 1478 | } |
| 1491 | 1479 | ||
| 1492 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1480 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
| 1493 | zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); | 1481 | zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); |
| 1494 | if (page) | 1482 | if (page) |
| 1495 | goto got_pg; | 1483 | goto got_pg; |
| 1496 | 1484 | ||
| @@ -1534,7 +1522,8 @@ restart: | |||
| 1534 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 1522 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. |
| 1535 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1523 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
| 1536 | */ | 1524 | */ |
| 1537 | page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); | 1525 | page = get_page_from_freelist(gfp_mask, order, zonelist, |
| 1526 | high_zoneidx, alloc_flags); | ||
| 1538 | if (page) | 1527 | if (page) |
| 1539 | goto got_pg; | 1528 | goto got_pg; |
| 1540 | 1529 | ||
| @@ -1547,7 +1536,7 @@ rebalance: | |||
| 1547 | nofail_alloc: | 1536 | nofail_alloc: |
| 1548 | /* go through the zonelist yet again, ignoring mins */ | 1537 | /* go through the zonelist yet again, ignoring mins */ |
| 1549 | page = get_page_from_freelist(gfp_mask, order, | 1538 | page = get_page_from_freelist(gfp_mask, order, |
| 1550 | zonelist, ALLOC_NO_WATERMARKS); | 1539 | zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); |
| 1551 | if (page) | 1540 | if (page) |
| 1552 | goto got_pg; | 1541 | goto got_pg; |
| 1553 | if (gfp_mask & __GFP_NOFAIL) { | 1542 | if (gfp_mask & __GFP_NOFAIL) { |
| @@ -1582,7 +1571,7 @@ nofail_alloc: | |||
| 1582 | 1571 | ||
| 1583 | if (likely(did_some_progress)) { | 1572 | if (likely(did_some_progress)) { |
| 1584 | page = get_page_from_freelist(gfp_mask, order, | 1573 | page = get_page_from_freelist(gfp_mask, order, |
| 1585 | zonelist, alloc_flags); | 1574 | zonelist, high_zoneidx, alloc_flags); |
| 1586 | if (page) | 1575 | if (page) |
| 1587 | goto got_pg; | 1576 | goto got_pg; |
| 1588 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { | 1577 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { |
| @@ -1598,7 +1587,7 @@ nofail_alloc: | |||
| 1598 | * under heavy pressure. | 1587 | * under heavy pressure. |
| 1599 | */ | 1588 | */ |
| 1600 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1589 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
| 1601 | zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); | 1590 | zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); |
| 1602 | if (page) { | 1591 | if (page) { |
| 1603 | clear_zonelist_oom(zonelist); | 1592 | clear_zonelist_oom(zonelist); |
| 1604 | goto got_pg; | 1593 | goto got_pg; |
| @@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages); | |||
| 1713 | 1702 | ||
| 1714 | static unsigned int nr_free_zone_pages(int offset) | 1703 | static unsigned int nr_free_zone_pages(int offset) |
| 1715 | { | 1704 | { |
| 1705 | struct zone **z; | ||
| 1706 | struct zone *zone; | ||
| 1707 | |||
| 1716 | /* Just pick one node, since fallback list is circular */ | 1708 | /* Just pick one node, since fallback list is circular */ |
| 1717 | unsigned int sum = 0; | 1709 | unsigned int sum = 0; |
| 1718 | 1710 | ||
| 1719 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | 1711 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); |
| 1720 | struct zone **zonep = zonelist->zones; | ||
| 1721 | struct zone *zone; | ||
| 1722 | 1712 | ||
| 1723 | for (zone = *zonep++; zone; zone = *zonep++) { | 1713 | for_each_zone_zonelist(zone, z, zonelist, offset) { |
| 1724 | unsigned long size = zone->present_pages; | 1714 | unsigned long size = zone->present_pages; |
| 1725 | unsigned long high = zone->pages_high; | 1715 | unsigned long high = zone->pages_high; |
| 1726 | if (size > high) | 1716 | if (size > high) |
| @@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) | |||
| 2078 | */ | 2068 | */ |
| 2079 | static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) | 2069 | static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) |
| 2080 | { | 2070 | { |
| 2081 | enum zone_type i; | ||
| 2082 | int j; | 2071 | int j; |
| 2083 | struct zonelist *zonelist; | 2072 | struct zonelist *zonelist; |
| 2084 | 2073 | ||
| 2085 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2074 | zonelist = &pgdat->node_zonelists[0]; |
| 2086 | zonelist = pgdat->node_zonelists + i; | 2075 | for (j = 0; zonelist->zones[j] != NULL; j++) |
| 2087 | for (j = 0; zonelist->zones[j] != NULL; j++) | 2076 | ; |
| 2088 | ; | 2077 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, |
| 2089 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); | 2078 | MAX_NR_ZONES - 1); |
| 2090 | zonelist->zones[j] = NULL; | 2079 | zonelist->zones[j] = NULL; |
| 2091 | } | ||
| 2092 | } | 2080 | } |
| 2093 | 2081 | ||
| 2094 | /* | 2082 | /* |
| @@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) | |||
| 2096 | */ | 2084 | */ |
| 2097 | static void build_thisnode_zonelists(pg_data_t *pgdat) | 2085 | static void build_thisnode_zonelists(pg_data_t *pgdat) |
| 2098 | { | 2086 | { |
| 2099 | enum zone_type i; | ||
| 2100 | int j; | 2087 | int j; |
| 2101 | struct zonelist *zonelist; | 2088 | struct zonelist *zonelist; |
| 2102 | 2089 | ||
| 2103 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2090 | zonelist = &pgdat->node_zonelists[1]; |
| 2104 | zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i; | 2091 | j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); |
| 2105 | j = build_zonelists_node(pgdat, zonelist, 0, i); | 2092 | zonelist->zones[j] = NULL; |
| 2106 | zonelist->zones[j] = NULL; | ||
| 2107 | } | ||
| 2108 | } | 2093 | } |
| 2109 | 2094 | ||
| 2110 | /* | 2095 | /* |
| @@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES]; | |||
| 2117 | 2102 | ||
| 2118 | static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) | 2103 | static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) |
| 2119 | { | 2104 | { |
| 2120 | enum zone_type i; | ||
| 2121 | int pos, j, node; | 2105 | int pos, j, node; |
| 2122 | int zone_type; /* needs to be signed */ | 2106 | int zone_type; /* needs to be signed */ |
| 2123 | struct zone *z; | 2107 | struct zone *z; |
| 2124 | struct zonelist *zonelist; | 2108 | struct zonelist *zonelist; |
| 2125 | 2109 | ||
| 2126 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2110 | zonelist = &pgdat->node_zonelists[0]; |
| 2127 | zonelist = pgdat->node_zonelists + i; | 2111 | pos = 0; |
| 2128 | pos = 0; | 2112 | for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) { |
| 2129 | for (zone_type = i; zone_type >= 0; zone_type--) { | 2113 | for (j = 0; j < nr_nodes; j++) { |
| 2130 | for (j = 0; j < nr_nodes; j++) { | 2114 | node = node_order[j]; |
| 2131 | node = node_order[j]; | 2115 | z = &NODE_DATA(node)->node_zones[zone_type]; |
| 2132 | z = &NODE_DATA(node)->node_zones[zone_type]; | 2116 | if (populated_zone(z)) { |
| 2133 | if (populated_zone(z)) { | 2117 | zonelist->zones[pos++] = z; |
| 2134 | zonelist->zones[pos++] = z; | 2118 | check_highest_zone(zone_type); |
| 2135 | check_highest_zone(zone_type); | ||
| 2136 | } | ||
| 2137 | } | 2119 | } |
| 2138 | } | 2120 | } |
| 2139 | zonelist->zones[pos] = NULL; | ||
| 2140 | } | 2121 | } |
| 2122 | zonelist->zones[pos] = NULL; | ||
| 2141 | } | 2123 | } |
| 2142 | 2124 | ||
| 2143 | static int default_zonelist_order(void) | 2125 | static int default_zonelist_order(void) |
| @@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat) | |||
| 2264 | /* Construct the zonelist performance cache - see further mmzone.h */ | 2246 | /* Construct the zonelist performance cache - see further mmzone.h */ |
| 2265 | static void build_zonelist_cache(pg_data_t *pgdat) | 2247 | static void build_zonelist_cache(pg_data_t *pgdat) |
| 2266 | { | 2248 | { |
| 2267 | int i; | 2249 | struct zonelist *zonelist; |
| 2268 | 2250 | struct zonelist_cache *zlc; | |
| 2269 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2251 | struct zone **z; |
| 2270 | struct zonelist *zonelist; | ||
| 2271 | struct zonelist_cache *zlc; | ||
| 2272 | struct zone **z; | ||
| 2273 | 2252 | ||
| 2274 | zonelist = pgdat->node_zonelists + i; | 2253 | zonelist = &pgdat->node_zonelists[0]; |
| 2275 | zonelist->zlcache_ptr = zlc = &zonelist->zlcache; | 2254 | zonelist->zlcache_ptr = zlc = &zonelist->zlcache; |
| 2276 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 2255 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
| 2277 | for (z = zonelist->zones; *z; z++) | 2256 | for (z = zonelist->zones; *z; z++) |
| 2278 | zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); | 2257 | zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); |
| 2279 | } | ||
| 2280 | } | 2258 | } |
| 2281 | 2259 | ||
| 2282 | 2260 | ||
| @@ -2290,45 +2268,43 @@ static void set_zonelist_order(void) | |||
| 2290 | static void build_zonelists(pg_data_t *pgdat) | 2268 | static void build_zonelists(pg_data_t *pgdat) |
| 2291 | { | 2269 | { |
| 2292 | int node, local_node; | 2270 | int node, local_node; |
| 2293 | enum zone_type i,j; | 2271 | enum zone_type j; |
| 2272 | struct zonelist *zonelist; | ||
| 2294 | 2273 | ||
| 2295 | local_node = pgdat->node_id; | 2274 | local_node = pgdat->node_id; |
| 2296 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
| 2297 | struct zonelist *zonelist; | ||
| 2298 | 2275 | ||
| 2299 | zonelist = pgdat->node_zonelists + i; | 2276 | zonelist = &pgdat->node_zonelists[0]; |
| 2277 | j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); | ||
| 2300 | 2278 | ||
| 2301 | j = build_zonelists_node(pgdat, zonelist, 0, i); | 2279 | /* |
| 2302 | /* | 2280 | * Now we build the zonelist so that it contains the zones |
| 2303 | * Now we build the zonelist so that it contains the zones | 2281 | * of all the other nodes. |
| 2304 | * of all the other nodes. | 2282 | * We don't want to pressure a particular node, so when |
| 2305 | * We don't want to pressure a particular node, so when | 2283 | * building the zones for node N, we make sure that the |
| 2306 | * building the zones for node N, we make sure that the | 2284 | * zones coming right after the local ones are those from |
| 2307 | * zones coming right after the local ones are those from | 2285 | * node N+1 (modulo N) |
| 2308 | * node N+1 (modulo N) | 2286 | */ |
| 2309 | */ | 2287 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { |
| 2310 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { | 2288 | if (!node_online(node)) |
| 2311 | if (!node_online(node)) | 2289 | continue; |
| 2312 | continue; | 2290 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, |
| 2313 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); | 2291 | MAX_NR_ZONES - 1); |
| 2314 | } | ||
| 2315 | for (node = 0; node < local_node; node++) { | ||
| 2316 | if (!node_online(node)) | ||
| 2317 | continue; | ||
| 2318 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | zonelist->zones[j] = NULL; | ||
| 2322 | } | 2292 | } |
| 2293 | for (node = 0; node < local_node; node++) { | ||
| 2294 | if (!node_online(node)) | ||
| 2295 | continue; | ||
| 2296 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, | ||
| 2297 | MAX_NR_ZONES - 1); | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | zonelist->zones[j] = NULL; | ||
| 2323 | } | 2301 | } |
| 2324 | 2302 | ||
| 2325 | /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ | 2303 | /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ |
| 2326 | static void build_zonelist_cache(pg_data_t *pgdat) | 2304 | static void build_zonelist_cache(pg_data_t *pgdat) |
| 2327 | { | 2305 | { |
| 2328 | int i; | 2306 | pgdat->node_zonelists[0].zlcache_ptr = NULL; |
| 2329 | 2307 | pgdat->node_zonelists[1].zlcache_ptr = NULL; | |
| 2330 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
| 2331 | pgdat->node_zonelists[i].zlcache_ptr = NULL; | ||
| 2332 | } | 2308 | } |
| 2333 | 2309 | ||
| 2334 | #endif /* CONFIG_NUMA */ | 2310 | #endif /* CONFIG_NUMA */ |
| @@ -3243,6 +3243,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | |||
| 3243 | struct zonelist *zonelist; | 3243 | struct zonelist *zonelist; |
| 3244 | gfp_t local_flags; | 3244 | gfp_t local_flags; |
| 3245 | struct zone **z; | 3245 | struct zone **z; |
| 3246 | struct zone *zone; | ||
| 3247 | enum zone_type high_zoneidx = gfp_zone(flags); | ||
| 3246 | void *obj = NULL; | 3248 | void *obj = NULL; |
| 3247 | int nid; | 3249 | int nid; |
| 3248 | 3250 | ||
| @@ -3257,10 +3259,10 @@ retry: | |||
| 3257 | * Look through allowed nodes for objects available | 3259 | * Look through allowed nodes for objects available |
| 3258 | * from existing per node queues. | 3260 | * from existing per node queues. |
| 3259 | */ | 3261 | */ |
| 3260 | for (z = zonelist->zones; *z && !obj; z++) { | 3262 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
| 3261 | nid = zone_to_nid(*z); | 3263 | nid = zone_to_nid(zone); |
| 3262 | 3264 | ||
| 3263 | if (cpuset_zone_allowed_hardwall(*z, flags) && | 3265 | if (cpuset_zone_allowed_hardwall(zone, flags) && |
| 3264 | cache->nodelists[nid] && | 3266 | cache->nodelists[nid] && |
| 3265 | cache->nodelists[nid]->free_objects) | 3267 | cache->nodelists[nid]->free_objects) |
| 3266 | obj = ____cache_alloc_node(cache, | 3268 | obj = ____cache_alloc_node(cache, |
| @@ -1285,6 +1285,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
| 1285 | #ifdef CONFIG_NUMA | 1285 | #ifdef CONFIG_NUMA |
| 1286 | struct zonelist *zonelist; | 1286 | struct zonelist *zonelist; |
| 1287 | struct zone **z; | 1287 | struct zone **z; |
| 1288 | struct zone *zone; | ||
| 1289 | enum zone_type high_zoneidx = gfp_zone(flags); | ||
| 1288 | struct page *page; | 1290 | struct page *page; |
| 1289 | 1291 | ||
| 1290 | /* | 1292 | /* |
| @@ -1310,12 +1312,12 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
| 1310 | return NULL; | 1312 | return NULL; |
| 1311 | 1313 | ||
| 1312 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); | 1314 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); |
| 1313 | for (z = zonelist->zones; *z; z++) { | 1315 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
| 1314 | struct kmem_cache_node *n; | 1316 | struct kmem_cache_node *n; |
| 1315 | 1317 | ||
| 1316 | n = get_node(s, zone_to_nid(*z)); | 1318 | n = get_node(s, zone_to_nid(zone)); |
| 1317 | 1319 | ||
| 1318 | if (n && cpuset_zone_allowed_hardwall(*z, flags) && | 1320 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && |
| 1319 | n->nr_partial > MIN_PARTIAL) { | 1321 | n->nr_partial > MIN_PARTIAL) { |
| 1320 | page = get_partial_node(n); | 1322 | page = get_partial_node(n); |
| 1321 | if (page) | 1323 | if (page) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index ef8551e0d2d0..0515b8f44894 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -1249,15 +1249,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
| 1249 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | 1249 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, |
| 1250 | struct scan_control *sc) | 1250 | struct scan_control *sc) |
| 1251 | { | 1251 | { |
| 1252 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | ||
| 1252 | unsigned long nr_reclaimed = 0; | 1253 | unsigned long nr_reclaimed = 0; |
| 1253 | struct zone **zones = zonelist->zones; | 1254 | struct zone **z; |
| 1254 | int i; | 1255 | struct zone *zone; |
| 1255 | |||
| 1256 | 1256 | ||
| 1257 | sc->all_unreclaimable = 1; | 1257 | sc->all_unreclaimable = 1; |
| 1258 | for (i = 0; zones[i] != NULL; i++) { | 1258 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
| 1259 | struct zone *zone = zones[i]; | ||
| 1260 | |||
| 1261 | if (!populated_zone(zone)) | 1259 | if (!populated_zone(zone)) |
| 1262 | continue; | 1260 | continue; |
| 1263 | /* | 1261 | /* |
| @@ -1311,8 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 1311 | unsigned long nr_reclaimed = 0; | 1309 | unsigned long nr_reclaimed = 0; |
| 1312 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1310 | struct reclaim_state *reclaim_state = current->reclaim_state; |
| 1313 | unsigned long lru_pages = 0; | 1311 | unsigned long lru_pages = 0; |
| 1314 | struct zone **zones = zonelist->zones; | 1312 | struct zone **z; |
| 1315 | int i; | 1313 | struct zone *zone; |
| 1314 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
| 1316 | 1315 | ||
| 1317 | if (scan_global_lru(sc)) | 1316 | if (scan_global_lru(sc)) |
| 1318 | count_vm_event(ALLOCSTALL); | 1317 | count_vm_event(ALLOCSTALL); |
| @@ -1320,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 1320 | * mem_cgroup will not do shrink_slab. | 1319 | * mem_cgroup will not do shrink_slab. |
| 1321 | */ | 1320 | */ |
| 1322 | if (scan_global_lru(sc)) { | 1321 | if (scan_global_lru(sc)) { |
| 1323 | for (i = 0; zones[i] != NULL; i++) { | 1322 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
| 1324 | struct zone *zone = zones[i]; | ||
| 1325 | 1323 | ||
| 1326 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1324 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
| 1327 | continue; | 1325 | continue; |
| @@ -1385,8 +1383,7 @@ out: | |||
| 1385 | priority = 0; | 1383 | priority = 0; |
| 1386 | 1384 | ||
| 1387 | if (scan_global_lru(sc)) { | 1385 | if (scan_global_lru(sc)) { |
| 1388 | for (i = 0; zones[i] != NULL; i++) { | 1386 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
| 1389 | struct zone *zone = zones[i]; | ||
| 1390 | 1387 | ||
| 1391 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1388 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
| 1392 | continue; | 1389 | continue; |
