diff options
author | Christoph Lameter <clameter@sgi.com> | 2006-06-30 04:55:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-30 14:25:36 -0400 |
commit | ca889e6c45e0b112cb2ca9d35afc66297519b5d5 (patch) | |
tree | 0a5efdec2a61540204d34bcbf56dc691d8f9c391 | |
parent | bab1846a0582f627f5ec22aa2dc5f4f3e82e8176 (diff) |
[PATCH] Use Zoned VM Counters for NUMA statistics
The numa statistics are really event counters. But they are per node and
so we have had special treatment for these counters through additional
fields on the pcp structure. We can now use the per zone nature of the
zoned VM counters to realize these.
This will shrink the size of the pcp structure on NUMA systems. We will
have some room to add additional per zone counters that will all still fit
in the same cacheline.
Bits Prior pcp size Size after patch We can add
------------------------------------------------------------------
64 128 bytes (16 words) 80 bytes (10 words) 48
32 76 bytes (19 words) 56 bytes (14 words) 8 (64 byte cacheline)
72 (128 byte)
Remove the special statistics for numa and replace them with zoned vm
counters. This has the side effect that global sums of these events now
show up in /proc/vmstat.
Also take the opportunity to move the zone_statistics() function from
page_alloc.c into vmstat.c.
Discussions:
V2 http://marc.theaimsgroup.com/?t=115048227000002&r=1&w=2
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/base/node.c | 34 | ||||
-rw-r--r-- | include/linux/mmzone.h | 17 | ||||
-rw-r--r-- | include/linux/vmstat.h | 10 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 23 | ||||
-rw-r--r-- | mm/vmstat.c | 73 |
6 files changed, 73 insertions, 90 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c index 772eadac57a7..d7de1753e094 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c | |||
@@ -94,28 +94,6 @@ static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); | |||
94 | 94 | ||
95 | static ssize_t node_read_numastat(struct sys_device * dev, char * buf) | 95 | static ssize_t node_read_numastat(struct sys_device * dev, char * buf) |
96 | { | 96 | { |
97 | unsigned long numa_hit, numa_miss, interleave_hit, numa_foreign; | ||
98 | unsigned long local_node, other_node; | ||
99 | int i, cpu; | ||
100 | pg_data_t *pg = NODE_DATA(dev->id); | ||
101 | numa_hit = 0; | ||
102 | numa_miss = 0; | ||
103 | interleave_hit = 0; | ||
104 | numa_foreign = 0; | ||
105 | local_node = 0; | ||
106 | other_node = 0; | ||
107 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
108 | struct zone *z = &pg->node_zones[i]; | ||
109 | for_each_online_cpu(cpu) { | ||
110 | struct per_cpu_pageset *ps = zone_pcp(z,cpu); | ||
111 | numa_hit += ps->numa_hit; | ||
112 | numa_miss += ps->numa_miss; | ||
113 | numa_foreign += ps->numa_foreign; | ||
114 | interleave_hit += ps->interleave_hit; | ||
115 | local_node += ps->local_node; | ||
116 | other_node += ps->other_node; | ||
117 | } | ||
118 | } | ||
119 | return sprintf(buf, | 97 | return sprintf(buf, |
120 | "numa_hit %lu\n" | 98 | "numa_hit %lu\n" |
121 | "numa_miss %lu\n" | 99 | "numa_miss %lu\n" |
@@ -123,12 +101,12 @@ static ssize_t node_read_numastat(struct sys_device * dev, char * buf) | |||
123 | "interleave_hit %lu\n" | 101 | "interleave_hit %lu\n" |
124 | "local_node %lu\n" | 102 | "local_node %lu\n" |
125 | "other_node %lu\n", | 103 | "other_node %lu\n", |
126 | numa_hit, | 104 | node_page_state(dev->id, NUMA_HIT), |
127 | numa_miss, | 105 | node_page_state(dev->id, NUMA_MISS), |
128 | numa_foreign, | 106 | node_page_state(dev->id, NUMA_FOREIGN), |
129 | interleave_hit, | 107 | node_page_state(dev->id, NUMA_INTERLEAVE_HIT), |
130 | local_node, | 108 | node_page_state(dev->id, NUMA_LOCAL), |
131 | other_node); | 109 | node_page_state(dev->id, NUMA_OTHER)); |
132 | } | 110 | } |
133 | static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); | 111 | static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); |
134 | 112 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2dbeec1d2874..27e748eb72b0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -57,6 +57,14 @@ enum zone_stat_item { | |||
57 | NR_WRITEBACK, | 57 | NR_WRITEBACK, |
58 | NR_UNSTABLE_NFS, /* NFS unstable pages */ | 58 | NR_UNSTABLE_NFS, /* NFS unstable pages */ |
59 | NR_BOUNCE, | 59 | NR_BOUNCE, |
60 | #ifdef CONFIG_NUMA | ||
61 | NUMA_HIT, /* allocated in intended node */ | ||
62 | NUMA_MISS, /* allocated in non intended node */ | ||
63 | NUMA_FOREIGN, /* was intended here, hit elsewhere */ | ||
64 | NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ | ||
65 | NUMA_LOCAL, /* allocation from local node */ | ||
66 | NUMA_OTHER, /* allocation from other node */ | ||
67 | #endif | ||
60 | NR_VM_ZONE_STAT_ITEMS }; | 68 | NR_VM_ZONE_STAT_ITEMS }; |
61 | 69 | ||
62 | struct per_cpu_pages { | 70 | struct per_cpu_pages { |
@@ -71,15 +79,6 @@ struct per_cpu_pageset { | |||
71 | #ifdef CONFIG_SMP | 79 | #ifdef CONFIG_SMP |
72 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; | 80 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; |
73 | #endif | 81 | #endif |
74 | |||
75 | #ifdef CONFIG_NUMA | ||
76 | unsigned long numa_hit; /* allocated in intended node */ | ||
77 | unsigned long numa_miss; /* allocated in non intended node */ | ||
78 | unsigned long numa_foreign; /* was intended here, hit elsewhere */ | ||
79 | unsigned long interleave_hit; /* interleaver prefered this zone */ | ||
80 | unsigned long local_node; /* allocation from local node */ | ||
81 | unsigned long other_node; /* allocation from other node */ | ||
82 | #endif | ||
83 | } ____cacheline_aligned_in_smp; | 82 | } ____cacheline_aligned_in_smp; |
84 | 83 | ||
85 | #ifdef CONFIG_NUMA | 84 | #ifdef CONFIG_NUMA |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 5fad1613e7d6..16173b63ee67 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -173,9 +173,15 @@ static inline unsigned long node_page_state(int node, | |||
173 | #endif | 173 | #endif |
174 | zone_page_state(&zones[ZONE_DMA], item); | 174 | zone_page_state(&zones[ZONE_DMA], item); |
175 | } | 175 | } |
176 | |||
177 | extern void zone_statistics(struct zonelist *, struct zone *); | ||
178 | |||
176 | #else | 179 | #else |
180 | |||
177 | #define node_page_state(node, item) global_page_state(item) | 181 | #define node_page_state(node, item) global_page_state(item) |
178 | #endif | 182 | #define zone_statistics(_zl,_z) do { } while (0) |
183 | |||
184 | #endif /* CONFIG_NUMA */ | ||
179 | 185 | ||
180 | #define __add_zone_page_state(__z, __i, __d) \ | 186 | #define __add_zone_page_state(__z, __i, __d) \ |
181 | __mod_zone_page_state(__z, __i, __d) | 187 | __mod_zone_page_state(__z, __i, __d) |
@@ -190,6 +196,8 @@ static inline void zap_zone_vm_stats(struct zone *zone) | |||
190 | memset(zone->vm_stat, 0, sizeof(zone->vm_stat)); | 196 | memset(zone->vm_stat, 0, sizeof(zone->vm_stat)); |
191 | } | 197 | } |
192 | 198 | ||
199 | extern void inc_zone_state(struct zone *, enum zone_stat_item); | ||
200 | |||
193 | #ifdef CONFIG_SMP | 201 | #ifdef CONFIG_SMP |
194 | void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int); | 202 | void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int); |
195 | void __inc_zone_page_state(struct page *, enum zone_stat_item); | 203 | void __inc_zone_page_state(struct page *, enum zone_stat_item); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6b9740bbf4c0..e07e27e846a2 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1209,10 +1209,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
1209 | 1209 | ||
1210 | zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); | 1210 | zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); |
1211 | page = __alloc_pages(gfp, order, zl); | 1211 | page = __alloc_pages(gfp, order, zl); |
1212 | if (page && page_zone(page) == zl->zones[0]) { | 1212 | if (page && page_zone(page) == zl->zones[0]) |
1213 | zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; | 1213 | inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); |
1214 | put_cpu(); | ||
1215 | } | ||
1216 | return page; | 1214 | return page; |
1217 | } | 1215 | } |
1218 | 1216 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6aa2c31f513b..d61671260f92 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -709,27 +709,6 @@ void drain_local_pages(void) | |||
709 | } | 709 | } |
710 | #endif /* CONFIG_PM */ | 710 | #endif /* CONFIG_PM */ |
711 | 711 | ||
712 | static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu) | ||
713 | { | ||
714 | #ifdef CONFIG_NUMA | ||
715 | pg_data_t *pg = z->zone_pgdat; | ||
716 | pg_data_t *orig = zonelist->zones[0]->zone_pgdat; | ||
717 | struct per_cpu_pageset *p; | ||
718 | |||
719 | p = zone_pcp(z, cpu); | ||
720 | if (pg == orig) { | ||
721 | p->numa_hit++; | ||
722 | } else { | ||
723 | p->numa_miss++; | ||
724 | zone_pcp(zonelist->zones[0], cpu)->numa_foreign++; | ||
725 | } | ||
726 | if (pg == NODE_DATA(numa_node_id())) | ||
727 | p->local_node++; | ||
728 | else | ||
729 | p->other_node++; | ||
730 | #endif | ||
731 | } | ||
732 | |||
733 | /* | 712 | /* |
734 | * Free a 0-order page | 713 | * Free a 0-order page |
735 | */ | 714 | */ |
@@ -827,7 +806,7 @@ again: | |||
827 | } | 806 | } |
828 | 807 | ||
829 | __mod_page_state_zone(zone, pgalloc, 1 << order); | 808 | __mod_page_state_zone(zone, pgalloc, 1 << order); |
830 | zone_statistics(zonelist, zone, cpu); | 809 | zone_statistics(zonelist, zone); |
831 | local_irq_restore(flags); | 810 | local_irq_restore(flags); |
832 | put_cpu(); | 811 | put_cpu(); |
833 | 812 | ||
diff --git a/mm/vmstat.c b/mm/vmstat.c index 06a6d1052198..ee7f89666250 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -185,9 +185,8 @@ EXPORT_SYMBOL(mod_zone_page_state); | |||
185 | * in between and therefore the atomicity vs. interrupt cannot be exploited | 185 | * in between and therefore the atomicity vs. interrupt cannot be exploited |
186 | * in a useful way here. | 186 | * in a useful way here. |
187 | */ | 187 | */ |
188 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | 188 | static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
189 | { | 189 | { |
190 | struct zone *zone = page_zone(page); | ||
191 | s8 *p = diff_pointer(zone, item); | 190 | s8 *p = diff_pointer(zone, item); |
192 | 191 | ||
193 | (*p)++; | 192 | (*p)++; |
@@ -197,6 +196,11 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |||
197 | *p = 0; | 196 | *p = 0; |
198 | } | 197 | } |
199 | } | 198 | } |
199 | |||
200 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | ||
201 | { | ||
202 | __inc_zone_state(page_zone(page), item); | ||
203 | } | ||
200 | EXPORT_SYMBOL(__inc_zone_page_state); | 204 | EXPORT_SYMBOL(__inc_zone_page_state); |
201 | 205 | ||
202 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | 206 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) |
@@ -213,22 +217,23 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |||
213 | } | 217 | } |
214 | EXPORT_SYMBOL(__dec_zone_page_state); | 218 | EXPORT_SYMBOL(__dec_zone_page_state); |
215 | 219 | ||
220 | void inc_zone_state(struct zone *zone, enum zone_stat_item item) | ||
221 | { | ||
222 | unsigned long flags; | ||
223 | |||
224 | local_irq_save(flags); | ||
225 | __inc_zone_state(zone, item); | ||
226 | local_irq_restore(flags); | ||
227 | } | ||
228 | |||
216 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) | 229 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) |
217 | { | 230 | { |
218 | unsigned long flags; | 231 | unsigned long flags; |
219 | struct zone *zone; | 232 | struct zone *zone; |
220 | s8 *p; | ||
221 | 233 | ||
222 | zone = page_zone(page); | 234 | zone = page_zone(page); |
223 | local_irq_save(flags); | 235 | local_irq_save(flags); |
224 | p = diff_pointer(zone, item); | 236 | __inc_zone_state(zone, item); |
225 | |||
226 | (*p)++; | ||
227 | |||
228 | if (unlikely(*p > STAT_THRESHOLD)) { | ||
229 | zone_page_state_add(*p, zone, item); | ||
230 | *p = 0; | ||
231 | } | ||
232 | local_irq_restore(flags); | 237 | local_irq_restore(flags); |
233 | } | 238 | } |
234 | EXPORT_SYMBOL(inc_zone_page_state); | 239 | EXPORT_SYMBOL(inc_zone_page_state); |
@@ -297,6 +302,28 @@ EXPORT_SYMBOL(refresh_vm_stats); | |||
297 | 302 | ||
298 | #endif | 303 | #endif |
299 | 304 | ||
305 | #ifdef CONFIG_NUMA | ||
306 | /* | ||
307 | * zonelist = the list of zones passed to the allocator | ||
308 | * z = the zone from which the allocation occurred. | ||
309 | * | ||
310 | * Must be called with interrupts disabled. | ||
311 | */ | ||
312 | void zone_statistics(struct zonelist *zonelist, struct zone *z) | ||
313 | { | ||
314 | if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) { | ||
315 | __inc_zone_state(z, NUMA_HIT); | ||
316 | } else { | ||
317 | __inc_zone_state(z, NUMA_MISS); | ||
318 | __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN); | ||
319 | } | ||
320 | if (z->zone_pgdat == NODE_DATA(numa_node_id())) | ||
321 | __inc_zone_state(z, NUMA_LOCAL); | ||
322 | else | ||
323 | __inc_zone_state(z, NUMA_OTHER); | ||
324 | } | ||
325 | #endif | ||
326 | |||
300 | #ifdef CONFIG_PROC_FS | 327 | #ifdef CONFIG_PROC_FS |
301 | 328 | ||
302 | #include <linux/seq_file.h> | 329 | #include <linux/seq_file.h> |
@@ -369,6 +396,15 @@ static char *vmstat_text[] = { | |||
369 | "nr_unstable", | 396 | "nr_unstable", |
370 | "nr_bounce", | 397 | "nr_bounce", |
371 | 398 | ||
399 | #ifdef CONFIG_NUMA | ||
400 | "numa_hit", | ||
401 | "numa_miss", | ||
402 | "numa_foreign", | ||
403 | "numa_interleave", | ||
404 | "numa_local", | ||
405 | "numa_other", | ||
406 | #endif | ||
407 | |||
372 | /* Event counters */ | 408 | /* Event counters */ |
373 | "pgpgin", | 409 | "pgpgin", |
374 | "pgpgout", | 410 | "pgpgout", |
@@ -490,21 +526,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
490 | pageset->pcp[j].high, | 526 | pageset->pcp[j].high, |
491 | pageset->pcp[j].batch); | 527 | pageset->pcp[j].batch); |
492 | } | 528 | } |
493 | #ifdef CONFIG_NUMA | ||
494 | seq_printf(m, | ||
495 | "\n numa_hit: %lu" | ||
496 | "\n numa_miss: %lu" | ||
497 | "\n numa_foreign: %lu" | ||
498 | "\n interleave_hit: %lu" | ||
499 | "\n local_node: %lu" | ||
500 | "\n other_node: %lu", | ||
501 | pageset->numa_hit, | ||
502 | pageset->numa_miss, | ||
503 | pageset->numa_foreign, | ||
504 | pageset->interleave_hit, | ||
505 | pageset->local_node, | ||
506 | pageset->other_node); | ||
507 | #endif | ||
508 | } | 529 | } |
509 | seq_printf(m, | 530 | seq_printf(m, |
510 | "\n all_unreclaimable: %u" | 531 | "\n all_unreclaimable: %u" |