diff options
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r-- | include/linux/mmzone.h | 178 |
1 files changed, 121 insertions, 57 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d6120fa6911..59855b8718a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -46,6 +46,29 @@ struct zone_padding { | |||
46 | #define ZONE_PADDING(name) | 46 | #define ZONE_PADDING(name) |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | enum zone_stat_item { | ||
50 | NR_ANON_PAGES, /* Mapped anonymous pages */ | ||
51 | NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. | ||
52 | only modified from process context */ | ||
53 | NR_FILE_PAGES, | ||
54 | NR_SLAB_RECLAIMABLE, | ||
55 | NR_SLAB_UNRECLAIMABLE, | ||
56 | NR_PAGETABLE, /* used for pagetables */ | ||
57 | NR_FILE_DIRTY, | ||
58 | NR_WRITEBACK, | ||
59 | NR_UNSTABLE_NFS, /* NFS unstable pages */ | ||
60 | NR_BOUNCE, | ||
61 | NR_VMSCAN_WRITE, | ||
62 | #ifdef CONFIG_NUMA | ||
63 | NUMA_HIT, /* allocated in intended node */ | ||
64 | NUMA_MISS, /* allocated in non intended node */ | ||
65 | NUMA_FOREIGN, /* was intended here, hit elsewhere */ | ||
66 | NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ | ||
67 | NUMA_LOCAL, /* allocation from local node */ | ||
68 | NUMA_OTHER, /* allocation from other node */ | ||
69 | #endif | ||
70 | NR_VM_ZONE_STAT_ITEMS }; | ||
71 | |||
49 | struct per_cpu_pages { | 72 | struct per_cpu_pages { |
50 | int count; /* number of pages in the list */ | 73 | int count; /* number of pages in the list */ |
51 | int high; /* high watermark, emptying needed */ | 74 | int high; /* high watermark, emptying needed */ |
@@ -55,13 +78,9 @@ struct per_cpu_pages { | |||
55 | 78 | ||
56 | struct per_cpu_pageset { | 79 | struct per_cpu_pageset { |
57 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ | 80 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ |
58 | #ifdef CONFIG_NUMA | 81 | #ifdef CONFIG_SMP |
59 | unsigned long numa_hit; /* allocated in intended node */ | 82 | s8 stat_threshold; |
60 | unsigned long numa_miss; /* allocated in non intended node */ | 83 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; |
61 | unsigned long numa_foreign; /* was intended here, hit elsewhere */ | ||
62 | unsigned long interleave_hit; /* interleaver prefered this zone */ | ||
63 | unsigned long local_node; /* allocation from local node */ | ||
64 | unsigned long other_node; /* allocation from other node */ | ||
65 | #endif | 84 | #endif |
66 | } ____cacheline_aligned_in_smp; | 85 | } ____cacheline_aligned_in_smp; |
67 | 86 | ||
@@ -71,53 +90,68 @@ struct per_cpu_pageset { | |||
71 | #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) | 90 | #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) |
72 | #endif | 91 | #endif |
73 | 92 | ||
74 | #define ZONE_DMA 0 | 93 | enum zone_type { |
75 | #define ZONE_DMA32 1 | 94 | /* |
76 | #define ZONE_NORMAL 2 | 95 | * ZONE_DMA is used when there are devices that are not able |
77 | #define ZONE_HIGHMEM 3 | 96 | * to do DMA to all of addressable memory (ZONE_NORMAL). Then we |
78 | 97 | * carve out the portion of memory that is needed for these devices. | |
79 | #define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ | 98 | * The range is arch specific. |
80 | #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ | 99 | * |
81 | 100 | * Some examples | |
101 | * | ||
102 | * Architecture Limit | ||
103 | * --------------------------- | ||
104 | * parisc, ia64, sparc <4G | ||
105 | * s390 <2G | ||
106 | * arm26 <48M | ||
107 | * arm Various | ||
108 | * alpha Unlimited or 0-16MB. | ||
109 | * | ||
110 | * i386, x86_64 and multiple other arches | ||
111 | * <16M. | ||
112 | */ | ||
113 | ZONE_DMA, | ||
114 | #ifdef CONFIG_ZONE_DMA32 | ||
115 | /* | ||
116 | * x86_64 needs two ZONE_DMAs because it supports devices that are | ||
117 | * only able to do DMA to the lower 16M but also 32 bit devices that | ||
118 | * can only do DMA areas below 4G. | ||
119 | */ | ||
120 | ZONE_DMA32, | ||
121 | #endif | ||
122 | /* | ||
123 | * Normal addressable memory is in ZONE_NORMAL. DMA operations can be | ||
124 | * performed on pages in ZONE_NORMAL if the DMA devices support | ||
125 | * transfers to all addressable memory. | ||
126 | */ | ||
127 | ZONE_NORMAL, | ||
128 | #ifdef CONFIG_HIGHMEM | ||
129 | /* | ||
130 | * A memory area that is only addressable by the kernel through | ||
131 | * mapping portions into its own address space. This is for example | ||
132 | * used by i386 to allow the kernel to address the memory beyond | ||
133 | * 900MB. The kernel will set up special mappings (page | ||
134 | * table entries on i386) for each page that the kernel needs to | ||
135 | * access. | ||
136 | */ | ||
137 | ZONE_HIGHMEM, | ||
138 | #endif | ||
139 | MAX_NR_ZONES | ||
140 | }; | ||
82 | 141 | ||
83 | /* | 142 | /* |
84 | * When a memory allocation must conform to specific limitations (such | 143 | * When a memory allocation must conform to specific limitations (such |
85 | * as being suitable for DMA) the caller will pass in hints to the | 144 | * as being suitable for DMA) the caller will pass in hints to the |
86 | * allocator in the gfp_mask, in the zone modifier bits. These bits | 145 | * allocator in the gfp_mask, in the zone modifier bits. These bits |
87 | * are used to select a priority ordered list of memory zones which | 146 | * are used to select a priority ordered list of memory zones which |
88 | * match the requested limits. GFP_ZONEMASK defines which bits within | 147 | * match the requested limits. See gfp_zone() in include/linux/gfp.h |
89 | * the gfp_mask should be considered as zone modifiers. Each valid | ||
90 | * combination of the zone modifier bits has a corresponding list | ||
91 | * of zones (in node_zonelists). Thus for two zone modifiers there | ||
92 | * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will | ||
93 | * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible | ||
94 | * combinations of zone modifiers in "zone modifier space". | ||
95 | * | ||
96 | * As an optimisation any zone modifier bits which are only valid when | ||
97 | * no other zone modifier bits are set (loners) should be placed in | ||
98 | * the highest order bits of this field. This allows us to reduce the | ||
99 | * extent of the zonelists thus saving space. For example in the case | ||
100 | * of three zone modifier bits, we could require up to eight zonelists. | ||
101 | * If the left most zone modifier is a "loner" then the highest valid | ||
102 | * zonelist would be four allowing us to allocate only five zonelists. | ||
103 | * Use the first form for GFP_ZONETYPES when the left most bit is not | ||
104 | * a "loner", otherwise use the second. | ||
105 | * | ||
106 | * NOTE! Make sure this matches the zones in <linux/gfp.h> | ||
107 | */ | 148 | */ |
108 | #define GFP_ZONEMASK 0x07 | ||
109 | /* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ | ||
110 | #define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ | ||
111 | 149 | ||
112 | /* | 150 | #if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) |
113 | * On machines where it is needed (eg PCs) we divide physical memory | 151 | #define ZONES_SHIFT 1 |
114 | * into multiple physical zones. On a 32bit PC we have 4 zones: | 152 | #else |
115 | * | 153 | #define ZONES_SHIFT 2 |
116 | * ZONE_DMA < 16 MB ISA DMA capable memory | 154 | #endif |
117 | * ZONE_DMA32 0 MB Empty | ||
118 | * ZONE_NORMAL 16-896 MB direct mapped by the kernel | ||
119 | * ZONE_HIGHMEM > 896 MB only page cache and user processes | ||
120 | */ | ||
121 | 155 | ||
122 | struct zone { | 156 | struct zone { |
123 | /* Fields commonly accessed by the page allocator */ | 157 | /* Fields commonly accessed by the page allocator */ |
@@ -134,6 +168,12 @@ struct zone { | |||
134 | unsigned long lowmem_reserve[MAX_NR_ZONES]; | 168 | unsigned long lowmem_reserve[MAX_NR_ZONES]; |
135 | 169 | ||
136 | #ifdef CONFIG_NUMA | 170 | #ifdef CONFIG_NUMA |
171 | int node; | ||
172 | /* | ||
173 | * zone reclaim becomes active if more unmapped pages exist. | ||
174 | */ | ||
175 | unsigned long min_unmapped_pages; | ||
176 | unsigned long min_slab_pages; | ||
137 | struct per_cpu_pageset *pageset[NR_CPUS]; | 177 | struct per_cpu_pageset *pageset[NR_CPUS]; |
138 | #else | 178 | #else |
139 | struct per_cpu_pageset pageset[NR_CPUS]; | 179 | struct per_cpu_pageset pageset[NR_CPUS]; |
@@ -165,12 +205,8 @@ struct zone { | |||
165 | /* A count of how many reclaimers are scanning this zone */ | 205 | /* A count of how many reclaimers are scanning this zone */ |
166 | atomic_t reclaim_in_progress; | 206 | atomic_t reclaim_in_progress; |
167 | 207 | ||
168 | /* | 208 | /* Zone statistics */ |
169 | * timestamp (in jiffies) of the last zone reclaim that did not | 209 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; |
170 | * result in freeing of pages. This is used to avoid repeated scans | ||
171 | * if all memory in the zone is in use. | ||
172 | */ | ||
173 | unsigned long last_unsuccessful_zone_reclaim; | ||
174 | 210 | ||
175 | /* | 211 | /* |
176 | * prev_priority holds the scanning priority for this zone. It is | 212 | * prev_priority holds the scanning priority for this zone. It is |
@@ -249,7 +285,6 @@ struct zone { | |||
249 | char *name; | 285 | char *name; |
250 | } ____cacheline_internodealigned_in_smp; | 286 | } ____cacheline_internodealigned_in_smp; |
251 | 287 | ||
252 | |||
253 | /* | 288 | /* |
254 | * The "priority" of VM scanning is how much of the queues we will scan in one | 289 | * The "priority" of VM scanning is how much of the queues we will scan in one |
255 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the | 290 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the |
@@ -272,6 +307,18 @@ struct zonelist { | |||
272 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited | 307 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited |
273 | }; | 308 | }; |
274 | 309 | ||
310 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
311 | struct node_active_region { | ||
312 | unsigned long start_pfn; | ||
313 | unsigned long end_pfn; | ||
314 | int nid; | ||
315 | }; | ||
316 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
317 | |||
318 | #ifndef CONFIG_DISCONTIGMEM | ||
319 | /* The array of struct pages - for discontigmem use pgdat->lmem_map */ | ||
320 | extern struct page *mem_map; | ||
321 | #endif | ||
275 | 322 | ||
276 | /* | 323 | /* |
277 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM | 324 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM |
@@ -287,7 +334,7 @@ struct zonelist { | |||
287 | struct bootmem_data; | 334 | struct bootmem_data; |
288 | typedef struct pglist_data { | 335 | typedef struct pglist_data { |
289 | struct zone node_zones[MAX_NR_ZONES]; | 336 | struct zone node_zones[MAX_NR_ZONES]; |
290 | struct zonelist node_zonelists[GFP_ZONETYPES]; | 337 | struct zonelist node_zonelists[MAX_NR_ZONES]; |
291 | int nr_zones; | 338 | int nr_zones; |
292 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | 339 | #ifdef CONFIG_FLAT_NODE_MEM_MAP |
293 | struct page *node_mem_map; | 340 | struct page *node_mem_map; |
@@ -356,12 +403,16 @@ static inline int populated_zone(struct zone *zone) | |||
356 | return (!!zone->present_pages); | 403 | return (!!zone->present_pages); |
357 | } | 404 | } |
358 | 405 | ||
359 | static inline int is_highmem_idx(int idx) | 406 | static inline int is_highmem_idx(enum zone_type idx) |
360 | { | 407 | { |
408 | #ifdef CONFIG_HIGHMEM | ||
361 | return (idx == ZONE_HIGHMEM); | 409 | return (idx == ZONE_HIGHMEM); |
410 | #else | ||
411 | return 0; | ||
412 | #endif | ||
362 | } | 413 | } |
363 | 414 | ||
364 | static inline int is_normal_idx(int idx) | 415 | static inline int is_normal_idx(enum zone_type idx) |
365 | { | 416 | { |
366 | return (idx == ZONE_NORMAL); | 417 | return (idx == ZONE_NORMAL); |
367 | } | 418 | } |
@@ -374,7 +425,11 @@ static inline int is_normal_idx(int idx) | |||
374 | */ | 425 | */ |
375 | static inline int is_highmem(struct zone *zone) | 426 | static inline int is_highmem(struct zone *zone) |
376 | { | 427 | { |
428 | #ifdef CONFIG_HIGHMEM | ||
377 | return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; | 429 | return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; |
430 | #else | ||
431 | return 0; | ||
432 | #endif | ||
378 | } | 433 | } |
379 | 434 | ||
380 | static inline int is_normal(struct zone *zone) | 435 | static inline int is_normal(struct zone *zone) |
@@ -384,7 +439,11 @@ static inline int is_normal(struct zone *zone) | |||
384 | 439 | ||
385 | static inline int is_dma32(struct zone *zone) | 440 | static inline int is_dma32(struct zone *zone) |
386 | { | 441 | { |
442 | #ifdef CONFIG_ZONE_DMA32 | ||
387 | return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; | 443 | return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; |
444 | #else | ||
445 | return 0; | ||
446 | #endif | ||
388 | } | 447 | } |
389 | 448 | ||
390 | static inline int is_dma(struct zone *zone) | 449 | static inline int is_dma(struct zone *zone) |
@@ -402,6 +461,10 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, | |||
402 | void __user *, size_t *, loff_t *); | 461 | void __user *, size_t *, loff_t *); |
403 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, | 462 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, |
404 | void __user *, size_t *, loff_t *); | 463 | void __user *, size_t *, loff_t *); |
464 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, | ||
465 | struct file *, void __user *, size_t *, loff_t *); | ||
466 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, | ||
467 | struct file *, void __user *, size_t *, loff_t *); | ||
405 | 468 | ||
406 | #include <linux/topology.h> | 469 | #include <linux/topology.h> |
407 | /* Returns the number of the current Node. */ | 470 | /* Returns the number of the current Node. */ |
@@ -469,7 +532,8 @@ extern struct zone *next_zone(struct zone *zone); | |||
469 | 532 | ||
470 | #endif | 533 | #endif |
471 | 534 | ||
472 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | 535 | #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ |
536 | !defined(CONFIG_ARCH_POPULATES_NODE_MAP) | ||
473 | #define early_pfn_to_nid(nid) (0UL) | 537 | #define early_pfn_to_nid(nid) (0UL) |
474 | #endif | 538 | #endif |
475 | 539 | ||