diff options
Diffstat (limited to 'include/linux/mmzone.h')
| -rw-r--r-- | include/linux/mmzone.h | 178 |
1 files changed, 121 insertions, 57 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d6120fa69116..59855b8718a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -46,6 +46,29 @@ struct zone_padding { | |||
| 46 | #define ZONE_PADDING(name) | 46 | #define ZONE_PADDING(name) |
| 47 | #endif | 47 | #endif |
| 48 | 48 | ||
| 49 | enum zone_stat_item { | ||
| 50 | NR_ANON_PAGES, /* Mapped anonymous pages */ | ||
| 51 | NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. | ||
| 52 | only modified from process context */ | ||
| 53 | NR_FILE_PAGES, | ||
| 54 | NR_SLAB_RECLAIMABLE, | ||
| 55 | NR_SLAB_UNRECLAIMABLE, | ||
| 56 | NR_PAGETABLE, /* used for pagetables */ | ||
| 57 | NR_FILE_DIRTY, | ||
| 58 | NR_WRITEBACK, | ||
| 59 | NR_UNSTABLE_NFS, /* NFS unstable pages */ | ||
| 60 | NR_BOUNCE, | ||
| 61 | NR_VMSCAN_WRITE, | ||
| 62 | #ifdef CONFIG_NUMA | ||
| 63 | NUMA_HIT, /* allocated in intended node */ | ||
| 64 | NUMA_MISS, /* allocated in non intended node */ | ||
| 65 | NUMA_FOREIGN, /* was intended here, hit elsewhere */ | ||
| 66 | NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ | ||
| 67 | NUMA_LOCAL, /* allocation from local node */ | ||
| 68 | NUMA_OTHER, /* allocation from other node */ | ||
| 69 | #endif | ||
| 70 | NR_VM_ZONE_STAT_ITEMS }; | ||
| 71 | |||
| 49 | struct per_cpu_pages { | 72 | struct per_cpu_pages { |
| 50 | int count; /* number of pages in the list */ | 73 | int count; /* number of pages in the list */ |
| 51 | int high; /* high watermark, emptying needed */ | 74 | int high; /* high watermark, emptying needed */ |
| @@ -55,13 +78,9 @@ struct per_cpu_pages { | |||
| 55 | 78 | ||
| 56 | struct per_cpu_pageset { | 79 | struct per_cpu_pageset { |
| 57 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ | 80 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ |
| 58 | #ifdef CONFIG_NUMA | 81 | #ifdef CONFIG_SMP |
| 59 | unsigned long numa_hit; /* allocated in intended node */ | 82 | s8 stat_threshold; |
| 60 | unsigned long numa_miss; /* allocated in non intended node */ | 83 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; |
| 61 | unsigned long numa_foreign; /* was intended here, hit elsewhere */ | ||
| 62 | unsigned long interleave_hit; /* interleaver prefered this zone */ | ||
| 63 | unsigned long local_node; /* allocation from local node */ | ||
| 64 | unsigned long other_node; /* allocation from other node */ | ||
| 65 | #endif | 84 | #endif |
| 66 | } ____cacheline_aligned_in_smp; | 85 | } ____cacheline_aligned_in_smp; |
| 67 | 86 | ||
| @@ -71,53 +90,68 @@ struct per_cpu_pageset { | |||
| 71 | #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) | 90 | #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) |
| 72 | #endif | 91 | #endif |
| 73 | 92 | ||
| 74 | #define ZONE_DMA 0 | 93 | enum zone_type { |
| 75 | #define ZONE_DMA32 1 | 94 | /* |
| 76 | #define ZONE_NORMAL 2 | 95 | * ZONE_DMA is used when there are devices that are not able |
| 77 | #define ZONE_HIGHMEM 3 | 96 | * to do DMA to all of addressable memory (ZONE_NORMAL). Then we |
| 78 | 97 | * carve out the portion of memory that is needed for these devices. | |
| 79 | #define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ | 98 | * The range is arch specific. |
| 80 | #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ | 99 | * |
| 81 | 100 | * Some examples | |
| 101 | * | ||
| 102 | * Architecture Limit | ||
| 103 | * --------------------------- | ||
| 104 | * parisc, ia64, sparc <4G | ||
| 105 | * s390 <2G | ||
| 106 | * arm26 <48M | ||
| 107 | * arm Various | ||
| 108 | * alpha Unlimited or 0-16MB. | ||
| 109 | * | ||
| 110 | * i386, x86_64 and multiple other arches | ||
| 111 | * <16M. | ||
| 112 | */ | ||
| 113 | ZONE_DMA, | ||
| 114 | #ifdef CONFIG_ZONE_DMA32 | ||
| 115 | /* | ||
| 116 | * x86_64 needs two ZONE_DMAs because it supports devices that are | ||
| 117 | * only able to do DMA to the lower 16M but also 32 bit devices that | ||
| 118 | * can only do DMA areas below 4G. | ||
| 119 | */ | ||
| 120 | ZONE_DMA32, | ||
| 121 | #endif | ||
| 122 | /* | ||
| 123 | * Normal addressable memory is in ZONE_NORMAL. DMA operations can be | ||
| 124 | * performed on pages in ZONE_NORMAL if the DMA devices support | ||
| 125 | * transfers to all addressable memory. | ||
| 126 | */ | ||
| 127 | ZONE_NORMAL, | ||
| 128 | #ifdef CONFIG_HIGHMEM | ||
| 129 | /* | ||
| 130 | * A memory area that is only addressable by the kernel through | ||
| 131 | * mapping portions into its own address space. This is for example | ||
| 132 | * used by i386 to allow the kernel to address the memory beyond | ||
| 133 | * 900MB. The kernel will set up special mappings (page | ||
| 134 | * table entries on i386) for each page that the kernel needs to | ||
| 135 | * access. | ||
| 136 | */ | ||
| 137 | ZONE_HIGHMEM, | ||
| 138 | #endif | ||
| 139 | MAX_NR_ZONES | ||
| 140 | }; | ||
| 82 | 141 | ||
| 83 | /* | 142 | /* |
| 84 | * When a memory allocation must conform to specific limitations (such | 143 | * When a memory allocation must conform to specific limitations (such |
| 85 | * as being suitable for DMA) the caller will pass in hints to the | 144 | * as being suitable for DMA) the caller will pass in hints to the |
| 86 | * allocator in the gfp_mask, in the zone modifier bits. These bits | 145 | * allocator in the gfp_mask, in the zone modifier bits. These bits |
| 87 | * are used to select a priority ordered list of memory zones which | 146 | * are used to select a priority ordered list of memory zones which |
| 88 | * match the requested limits. GFP_ZONEMASK defines which bits within | 147 | * match the requested limits. See gfp_zone() in include/linux/gfp.h |
| 89 | * the gfp_mask should be considered as zone modifiers. Each valid | ||
| 90 | * combination of the zone modifier bits has a corresponding list | ||
| 91 | * of zones (in node_zonelists). Thus for two zone modifiers there | ||
| 92 | * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will | ||
| 93 | * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible | ||
| 94 | * combinations of zone modifiers in "zone modifier space". | ||
| 95 | * | ||
| 96 | * As an optimisation any zone modifier bits which are only valid when | ||
| 97 | * no other zone modifier bits are set (loners) should be placed in | ||
| 98 | * the highest order bits of this field. This allows us to reduce the | ||
| 99 | * extent of the zonelists thus saving space. For example in the case | ||
| 100 | * of three zone modifier bits, we could require up to eight zonelists. | ||
| 101 | * If the left most zone modifier is a "loner" then the highest valid | ||
| 102 | * zonelist would be four allowing us to allocate only five zonelists. | ||
| 103 | * Use the first form for GFP_ZONETYPES when the left most bit is not | ||
| 104 | * a "loner", otherwise use the second. | ||
| 105 | * | ||
| 106 | * NOTE! Make sure this matches the zones in <linux/gfp.h> | ||
| 107 | */ | 148 | */ |
| 108 | #define GFP_ZONEMASK 0x07 | ||
| 109 | /* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ | ||
| 110 | #define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ | ||
| 111 | 149 | ||
| 112 | /* | 150 | #if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) |
| 113 | * On machines where it is needed (eg PCs) we divide physical memory | 151 | #define ZONES_SHIFT 1 |
| 114 | * into multiple physical zones. On a 32bit PC we have 4 zones: | 152 | #else |
| 115 | * | 153 | #define ZONES_SHIFT 2 |
| 116 | * ZONE_DMA < 16 MB ISA DMA capable memory | 154 | #endif |
| 117 | * ZONE_DMA32 0 MB Empty | ||
| 118 | * ZONE_NORMAL 16-896 MB direct mapped by the kernel | ||
| 119 | * ZONE_HIGHMEM > 896 MB only page cache and user processes | ||
| 120 | */ | ||
| 121 | 155 | ||
| 122 | struct zone { | 156 | struct zone { |
| 123 | /* Fields commonly accessed by the page allocator */ | 157 | /* Fields commonly accessed by the page allocator */ |
| @@ -134,6 +168,12 @@ struct zone { | |||
| 134 | unsigned long lowmem_reserve[MAX_NR_ZONES]; | 168 | unsigned long lowmem_reserve[MAX_NR_ZONES]; |
| 135 | 169 | ||
| 136 | #ifdef CONFIG_NUMA | 170 | #ifdef CONFIG_NUMA |
| 171 | int node; | ||
| 172 | /* | ||
| 173 | * zone reclaim becomes active if more unmapped pages exist. | ||
| 174 | */ | ||
| 175 | unsigned long min_unmapped_pages; | ||
| 176 | unsigned long min_slab_pages; | ||
| 137 | struct per_cpu_pageset *pageset[NR_CPUS]; | 177 | struct per_cpu_pageset *pageset[NR_CPUS]; |
| 138 | #else | 178 | #else |
| 139 | struct per_cpu_pageset pageset[NR_CPUS]; | 179 | struct per_cpu_pageset pageset[NR_CPUS]; |
| @@ -165,12 +205,8 @@ struct zone { | |||
| 165 | /* A count of how many reclaimers are scanning this zone */ | 205 | /* A count of how many reclaimers are scanning this zone */ |
| 166 | atomic_t reclaim_in_progress; | 206 | atomic_t reclaim_in_progress; |
| 167 | 207 | ||
| 168 | /* | 208 | /* Zone statistics */ |
| 169 | * timestamp (in jiffies) of the last zone reclaim that did not | 209 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; |
| 170 | * result in freeing of pages. This is used to avoid repeated scans | ||
| 171 | * if all memory in the zone is in use. | ||
| 172 | */ | ||
| 173 | unsigned long last_unsuccessful_zone_reclaim; | ||
| 174 | 210 | ||
| 175 | /* | 211 | /* |
| 176 | * prev_priority holds the scanning priority for this zone. It is | 212 | * prev_priority holds the scanning priority for this zone. It is |
| @@ -249,7 +285,6 @@ struct zone { | |||
| 249 | char *name; | 285 | char *name; |
| 250 | } ____cacheline_internodealigned_in_smp; | 286 | } ____cacheline_internodealigned_in_smp; |
| 251 | 287 | ||
| 252 | |||
| 253 | /* | 288 | /* |
| 254 | * The "priority" of VM scanning is how much of the queues we will scan in one | 289 | * The "priority" of VM scanning is how much of the queues we will scan in one |
| 255 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the | 290 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the |
| @@ -272,6 +307,18 @@ struct zonelist { | |||
| 272 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited | 307 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited |
| 273 | }; | 308 | }; |
| 274 | 309 | ||
| 310 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
| 311 | struct node_active_region { | ||
| 312 | unsigned long start_pfn; | ||
| 313 | unsigned long end_pfn; | ||
| 314 | int nid; | ||
| 315 | }; | ||
| 316 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
| 317 | |||
| 318 | #ifndef CONFIG_DISCONTIGMEM | ||
| 319 | /* The array of struct pages - for discontigmem use pgdat->lmem_map */ | ||
| 320 | extern struct page *mem_map; | ||
| 321 | #endif | ||
| 275 | 322 | ||
| 276 | /* | 323 | /* |
| 277 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM | 324 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM |
| @@ -287,7 +334,7 @@ struct zonelist { | |||
| 287 | struct bootmem_data; | 334 | struct bootmem_data; |
| 288 | typedef struct pglist_data { | 335 | typedef struct pglist_data { |
| 289 | struct zone node_zones[MAX_NR_ZONES]; | 336 | struct zone node_zones[MAX_NR_ZONES]; |
| 290 | struct zonelist node_zonelists[GFP_ZONETYPES]; | 337 | struct zonelist node_zonelists[MAX_NR_ZONES]; |
| 291 | int nr_zones; | 338 | int nr_zones; |
| 292 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | 339 | #ifdef CONFIG_FLAT_NODE_MEM_MAP |
| 293 | struct page *node_mem_map; | 340 | struct page *node_mem_map; |
| @@ -356,12 +403,16 @@ static inline int populated_zone(struct zone *zone) | |||
| 356 | return (!!zone->present_pages); | 403 | return (!!zone->present_pages); |
| 357 | } | 404 | } |
| 358 | 405 | ||
| 359 | static inline int is_highmem_idx(int idx) | 406 | static inline int is_highmem_idx(enum zone_type idx) |
| 360 | { | 407 | { |
| 408 | #ifdef CONFIG_HIGHMEM | ||
| 361 | return (idx == ZONE_HIGHMEM); | 409 | return (idx == ZONE_HIGHMEM); |
| 410 | #else | ||
| 411 | return 0; | ||
| 412 | #endif | ||
| 362 | } | 413 | } |
| 363 | 414 | ||
| 364 | static inline int is_normal_idx(int idx) | 415 | static inline int is_normal_idx(enum zone_type idx) |
| 365 | { | 416 | { |
| 366 | return (idx == ZONE_NORMAL); | 417 | return (idx == ZONE_NORMAL); |
| 367 | } | 418 | } |
| @@ -374,7 +425,11 @@ static inline int is_normal_idx(int idx) | |||
| 374 | */ | 425 | */ |
| 375 | static inline int is_highmem(struct zone *zone) | 426 | static inline int is_highmem(struct zone *zone) |
| 376 | { | 427 | { |
| 428 | #ifdef CONFIG_HIGHMEM | ||
| 377 | return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; | 429 | return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; |
| 430 | #else | ||
| 431 | return 0; | ||
| 432 | #endif | ||
| 378 | } | 433 | } |
| 379 | 434 | ||
| 380 | static inline int is_normal(struct zone *zone) | 435 | static inline int is_normal(struct zone *zone) |
| @@ -384,7 +439,11 @@ static inline int is_normal(struct zone *zone) | |||
| 384 | 439 | ||
| 385 | static inline int is_dma32(struct zone *zone) | 440 | static inline int is_dma32(struct zone *zone) |
| 386 | { | 441 | { |
| 442 | #ifdef CONFIG_ZONE_DMA32 | ||
| 387 | return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; | 443 | return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; |
| 444 | #else | ||
| 445 | return 0; | ||
| 446 | #endif | ||
| 388 | } | 447 | } |
| 389 | 448 | ||
| 390 | static inline int is_dma(struct zone *zone) | 449 | static inline int is_dma(struct zone *zone) |
| @@ -402,6 +461,10 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, | |||
| 402 | void __user *, size_t *, loff_t *); | 461 | void __user *, size_t *, loff_t *); |
| 403 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, | 462 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, |
| 404 | void __user *, size_t *, loff_t *); | 463 | void __user *, size_t *, loff_t *); |
| 464 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, | ||
| 465 | struct file *, void __user *, size_t *, loff_t *); | ||
| 466 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, | ||
| 467 | struct file *, void __user *, size_t *, loff_t *); | ||
| 405 | 468 | ||
| 406 | #include <linux/topology.h> | 469 | #include <linux/topology.h> |
| 407 | /* Returns the number of the current Node. */ | 470 | /* Returns the number of the current Node. */ |
| @@ -469,7 +532,8 @@ extern struct zone *next_zone(struct zone *zone); | |||
| 469 | 532 | ||
| 470 | #endif | 533 | #endif |
| 471 | 534 | ||
| 472 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | 535 | #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ |
| 536 | !defined(CONFIG_ARCH_POPULATES_NODE_MAP) | ||
| 473 | #define early_pfn_to_nid(nid) (0UL) | 537 | #define early_pfn_to_nid(nid) (0UL) |
| 474 | #endif | 538 | #endif |
| 475 | 539 | ||
