aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mmzone.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r--include/linux/mmzone.h104
1 files changed, 13 insertions, 91 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d94347737292..e23a9e704536 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,10 +37,10 @@
37 37
38enum { 38enum {
39 MIGRATE_UNMOVABLE, 39 MIGRATE_UNMOVABLE,
40 MIGRATE_RECLAIMABLE,
41 MIGRATE_MOVABLE, 40 MIGRATE_MOVABLE,
41 MIGRATE_RECLAIMABLE,
42 MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ 42 MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
43 MIGRATE_RESERVE = MIGRATE_PCPTYPES, 43 MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
44#ifdef CONFIG_CMA 44#ifdef CONFIG_CMA
45 /* 45 /*
46 * MIGRATE_CMA migration type is designed to mimic the way 46 * MIGRATE_CMA migration type is designed to mimic the way
@@ -334,13 +334,16 @@ struct zone {
334 /* zone watermarks, access with *_wmark_pages(zone) macros */ 334 /* zone watermarks, access with *_wmark_pages(zone) macros */
335 unsigned long watermark[NR_WMARK]; 335 unsigned long watermark[NR_WMARK];
336 336
337 unsigned long nr_reserved_highatomic;
338
337 /* 339 /*
338 * We don't know if the memory that we're going to allocate will be freeable 340 * We don't know if the memory that we're going to allocate will be
339 * or/and it will be released eventually, so to avoid totally wasting several 341 * freeable or/and it will be released eventually, so to avoid totally
340 * GB of ram we must reserve some of the lower zone memory (otherwise we risk 342 * wasting several GB of ram we must reserve some of the lower zone
341 * to run OOM on the lower zones despite there's tons of freeable ram 343 * memory (otherwise we risk to run OOM on the lower zones despite
342 * on the higher zones). This array is recalculated at runtime if the 344 * there being tons of freeable ram on the higher zones). This array is
343 * sysctl_lowmem_reserve_ratio sysctl changes. 345 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
346 * changes.
344 */ 347 */
345 long lowmem_reserve[MAX_NR_ZONES]; 348 long lowmem_reserve[MAX_NR_ZONES];
346 349
@@ -429,12 +432,6 @@ struct zone {
429 432
430 const char *name; 433 const char *name;
431 434
432 /*
433 * Number of MIGRATE_RESERVE page block. To maintain for just
434 * optimization. Protected by zone->lock.
435 */
436 int nr_migrate_reserve_block;
437
438#ifdef CONFIG_MEMORY_ISOLATION 435#ifdef CONFIG_MEMORY_ISOLATION
439 /* 436 /*
440 * Number of isolated pageblock. It is used to solve incorrect 437 * Number of isolated pageblock. It is used to solve incorrect
@@ -589,75 +586,8 @@ static inline bool zone_is_empty(struct zone *zone)
589 * [1] : No fallback (__GFP_THISNODE) 586 * [1] : No fallback (__GFP_THISNODE)
590 */ 587 */
591#define MAX_ZONELISTS 2 588#define MAX_ZONELISTS 2
592
593
594/*
595 * We cache key information from each zonelist for smaller cache
596 * footprint when scanning for free pages in get_page_from_freelist().
597 *
598 * 1) The BITMAP fullzones tracks which zones in a zonelist have come
599 * up short of free memory since the last time (last_fullzone_zap)
600 * we zero'd fullzones.
601 * 2) The array z_to_n[] maps each zone in the zonelist to its node
602 * id, so that we can efficiently evaluate whether that node is
603 * set in the current tasks mems_allowed.
604 *
605 * Both fullzones and z_to_n[] are one-to-one with the zonelist,
606 * indexed by a zones offset in the zonelist zones[] array.
607 *
608 * The get_page_from_freelist() routine does two scans. During the
609 * first scan, we skip zones whose corresponding bit in 'fullzones'
610 * is set or whose corresponding node in current->mems_allowed (which
611 * comes from cpusets) is not set. During the second scan, we bypass
612 * this zonelist_cache, to ensure we look methodically at each zone.
613 *
614 * Once per second, we zero out (zap) fullzones, forcing us to
615 * reconsider nodes that might have regained more free memory.
616 * The field last_full_zap is the time we last zapped fullzones.
617 *
618 * This mechanism reduces the amount of time we waste repeatedly
619 * reexaming zones for free memory when they just came up low on
620 * memory momentarilly ago.
621 *
622 * The zonelist_cache struct members logically belong in struct
623 * zonelist. However, the mempolicy zonelists constructed for
624 * MPOL_BIND are intentionally variable length (and usually much
625 * shorter). A general purpose mechanism for handling structs with
626 * multiple variable length members is more mechanism than we want
627 * here. We resort to some special case hackery instead.
628 *
629 * The MPOL_BIND zonelists don't need this zonelist_cache (in good
630 * part because they are shorter), so we put the fixed length stuff
631 * at the front of the zonelist struct, ending in a variable length
632 * zones[], as is needed by MPOL_BIND.
633 *
634 * Then we put the optional zonelist cache on the end of the zonelist
635 * struct. This optional stuff is found by a 'zlcache_ptr' pointer in
636 * the fixed length portion at the front of the struct. This pointer
637 * both enables us to find the zonelist cache, and in the case of
638 * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL)
639 * to know that the zonelist cache is not there.
640 *
641 * The end result is that struct zonelists come in two flavors:
642 * 1) The full, fixed length version, shown below, and
643 * 2) The custom zonelists for MPOL_BIND.
644 * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache.
645 *
646 * Even though there may be multiple CPU cores on a node modifying
647 * fullzones or last_full_zap in the same zonelist_cache at the same
648 * time, we don't lock it. This is just hint data - if it is wrong now
649 * and then, the allocator will still function, perhaps a bit slower.
650 */
651
652
653struct zonelist_cache {
654 unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */
655 DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */
656 unsigned long last_full_zap; /* when last zap'd (jiffies) */
657};
658#else 589#else
659#define MAX_ZONELISTS 1 590#define MAX_ZONELISTS 1
660struct zonelist_cache;
661#endif 591#endif
662 592
663/* 593/*
@@ -675,9 +605,6 @@ struct zoneref {
675 * allocation, the other zones are fallback zones, in decreasing 605 * allocation, the other zones are fallback zones, in decreasing
676 * priority. 606 * priority.
677 * 607 *
678 * If zlcache_ptr is not NULL, then it is just the address of zlcache,
679 * as explained above. If zlcache_ptr is NULL, there is no zlcache.
680 * *
681 * To speed the reading of the zonelist, the zonerefs contain the zone index 608 * To speed the reading of the zonelist, the zonerefs contain the zone index
682 * of the entry being read. Helper functions to access information given 609 * of the entry being read. Helper functions to access information given
683 * a struct zoneref are 610 * a struct zoneref are
@@ -687,11 +614,7 @@ struct zoneref {
687 * zonelist_node_idx() - Return the index of the node for an entry 614 * zonelist_node_idx() - Return the index of the node for an entry
688 */ 615 */
689struct zonelist { 616struct zonelist {
690 struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
691 struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; 617 struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
692#ifdef CONFIG_NUMA
693 struct zonelist_cache zlcache; // optional ...
694#endif
695}; 618};
696 619
697#ifndef CONFIG_DISCONTIGMEM 620#ifndef CONFIG_DISCONTIGMEM
@@ -817,14 +740,13 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
817bool zone_watermark_ok(struct zone *z, unsigned int order, 740bool zone_watermark_ok(struct zone *z, unsigned int order,
818 unsigned long mark, int classzone_idx, int alloc_flags); 741 unsigned long mark, int classzone_idx, int alloc_flags);
819bool zone_watermark_ok_safe(struct zone *z, unsigned int order, 742bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
820 unsigned long mark, int classzone_idx, int alloc_flags); 743 unsigned long mark, int classzone_idx);
821enum memmap_context { 744enum memmap_context {
822 MEMMAP_EARLY, 745 MEMMAP_EARLY,
823 MEMMAP_HOTPLUG, 746 MEMMAP_HOTPLUG,
824}; 747};
825extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, 748extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
826 unsigned long size, 749 unsigned long size);
827 enum memmap_context context);
828 750
829extern void lruvec_init(struct lruvec *lruvec); 751extern void lruvec_init(struct lruvec *lruvec);
830 752