aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-06-30 04:55:44 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-30 14:25:36 -0400
commitca889e6c45e0b112cb2ca9d35afc66297519b5d5 (patch)
tree0a5efdec2a61540204d34bcbf56dc691d8f9c391
parentbab1846a0582f627f5ec22aa2dc5f4f3e82e8176 (diff)
[PATCH] Use Zoned VM Counters for NUMA statistics
The numa statistics are really event counters. But they are per node and so we have had special treatment for these counters through additional fields on the pcp structure. We can now use the per zone nature of the zoned VM counters to realize these. This will shrink the size of the pcp structure on NUMA systems. We will have some room to add additional per zone counters that will all still fit in the same cacheline. Bits Prior pcp size Size after patch We can add ------------------------------------------------------------------ 64 128 bytes (16 words) 80 bytes (10 words) 48 32 76 bytes (19 words) 56 bytes (14 words) 8 (64 byte cacheline) 72 (128 byte) Remove the special statistics for numa and replace them with zoned vm counters. This has the side effect that global sums of these events now show up in /proc/vmstat. Also take the opportunity to move the zone_statistics() function from page_alloc.c into vmstat.c. Discussions: V2 http://marc.theaimsgroup.com/?t=115048227000002&r=1&w=2 Signed-off-by: Christoph Lameter <clameter@sgi.com> Acked-by: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/base/node.c34
-rw-r--r--include/linux/mmzone.h17
-rw-r--r--include/linux/vmstat.h10
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/page_alloc.c23
-rw-r--r--mm/vmstat.c73
6 files changed, 73 insertions, 90 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 772eadac57a7..d7de1753e094 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -94,28 +94,6 @@ static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
94 94
95static ssize_t node_read_numastat(struct sys_device * dev, char * buf) 95static ssize_t node_read_numastat(struct sys_device * dev, char * buf)
96{ 96{
97 unsigned long numa_hit, numa_miss, interleave_hit, numa_foreign;
98 unsigned long local_node, other_node;
99 int i, cpu;
100 pg_data_t *pg = NODE_DATA(dev->id);
101 numa_hit = 0;
102 numa_miss = 0;
103 interleave_hit = 0;
104 numa_foreign = 0;
105 local_node = 0;
106 other_node = 0;
107 for (i = 0; i < MAX_NR_ZONES; i++) {
108 struct zone *z = &pg->node_zones[i];
109 for_each_online_cpu(cpu) {
110 struct per_cpu_pageset *ps = zone_pcp(z,cpu);
111 numa_hit += ps->numa_hit;
112 numa_miss += ps->numa_miss;
113 numa_foreign += ps->numa_foreign;
114 interleave_hit += ps->interleave_hit;
115 local_node += ps->local_node;
116 other_node += ps->other_node;
117 }
118 }
119 return sprintf(buf, 97 return sprintf(buf,
120 "numa_hit %lu\n" 98 "numa_hit %lu\n"
121 "numa_miss %lu\n" 99 "numa_miss %lu\n"
@@ -123,12 +101,12 @@ static ssize_t node_read_numastat(struct sys_device * dev, char * buf)
123 "interleave_hit %lu\n" 101 "interleave_hit %lu\n"
124 "local_node %lu\n" 102 "local_node %lu\n"
125 "other_node %lu\n", 103 "other_node %lu\n",
126 numa_hit, 104 node_page_state(dev->id, NUMA_HIT),
127 numa_miss, 105 node_page_state(dev->id, NUMA_MISS),
128 numa_foreign, 106 node_page_state(dev->id, NUMA_FOREIGN),
129 interleave_hit, 107 node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
130 local_node, 108 node_page_state(dev->id, NUMA_LOCAL),
131 other_node); 109 node_page_state(dev->id, NUMA_OTHER));
132} 110}
133static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); 111static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
134 112
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2dbeec1d2874..27e748eb72b0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -57,6 +57,14 @@ enum zone_stat_item {
57 NR_WRITEBACK, 57 NR_WRITEBACK,
58 NR_UNSTABLE_NFS, /* NFS unstable pages */ 58 NR_UNSTABLE_NFS, /* NFS unstable pages */
59 NR_BOUNCE, 59 NR_BOUNCE,
60#ifdef CONFIG_NUMA
61 NUMA_HIT, /* allocated in intended node */
62 NUMA_MISS, /* allocated in non intended node */
63 NUMA_FOREIGN, /* was intended here, hit elsewhere */
64 NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
65 NUMA_LOCAL, /* allocation from local node */
66 NUMA_OTHER, /* allocation from other node */
67#endif
60 NR_VM_ZONE_STAT_ITEMS }; 68 NR_VM_ZONE_STAT_ITEMS };
61 69
62struct per_cpu_pages { 70struct per_cpu_pages {
@@ -71,15 +79,6 @@ struct per_cpu_pageset {
71#ifdef CONFIG_SMP 79#ifdef CONFIG_SMP
72 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; 80 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
73#endif 81#endif
74
75#ifdef CONFIG_NUMA
76 unsigned long numa_hit; /* allocated in intended node */
77 unsigned long numa_miss; /* allocated in non intended node */
78 unsigned long numa_foreign; /* was intended here, hit elsewhere */
79 unsigned long interleave_hit; /* interleaver prefered this zone */
80 unsigned long local_node; /* allocation from local node */
81 unsigned long other_node; /* allocation from other node */
82#endif
83} ____cacheline_aligned_in_smp; 82} ____cacheline_aligned_in_smp;
84 83
85#ifdef CONFIG_NUMA 84#ifdef CONFIG_NUMA
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 5fad1613e7d6..16173b63ee67 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -173,9 +173,15 @@ static inline unsigned long node_page_state(int node,
173#endif 173#endif
174 zone_page_state(&zones[ZONE_DMA], item); 174 zone_page_state(&zones[ZONE_DMA], item);
175} 175}
176
177extern void zone_statistics(struct zonelist *, struct zone *);
178
176#else 179#else
180
177#define node_page_state(node, item) global_page_state(item) 181#define node_page_state(node, item) global_page_state(item)
178#endif 182#define zone_statistics(_zl,_z) do { } while (0)
183
184#endif /* CONFIG_NUMA */
179 185
180#define __add_zone_page_state(__z, __i, __d) \ 186#define __add_zone_page_state(__z, __i, __d) \
181 __mod_zone_page_state(__z, __i, __d) 187 __mod_zone_page_state(__z, __i, __d)
@@ -190,6 +196,8 @@ static inline void zap_zone_vm_stats(struct zone *zone)
190 memset(zone->vm_stat, 0, sizeof(zone->vm_stat)); 196 memset(zone->vm_stat, 0, sizeof(zone->vm_stat));
191} 197}
192 198
199extern void inc_zone_state(struct zone *, enum zone_stat_item);
200
193#ifdef CONFIG_SMP 201#ifdef CONFIG_SMP
194void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int); 202void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int);
195void __inc_zone_page_state(struct page *, enum zone_stat_item); 203void __inc_zone_page_state(struct page *, enum zone_stat_item);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6b9740bbf4c0..e07e27e846a2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1209,10 +1209,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1209 1209
1210 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); 1210 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
1211 page = __alloc_pages(gfp, order, zl); 1211 page = __alloc_pages(gfp, order, zl);
1212 if (page && page_zone(page) == zl->zones[0]) { 1212 if (page && page_zone(page) == zl->zones[0])
1213 zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; 1213 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
1214 put_cpu();
1215 }
1216 return page; 1214 return page;
1217} 1215}
1218 1216
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6aa2c31f513b..d61671260f92 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -709,27 +709,6 @@ void drain_local_pages(void)
709} 709}
710#endif /* CONFIG_PM */ 710#endif /* CONFIG_PM */
711 711
712static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
713{
714#ifdef CONFIG_NUMA
715 pg_data_t *pg = z->zone_pgdat;
716 pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
717 struct per_cpu_pageset *p;
718
719 p = zone_pcp(z, cpu);
720 if (pg == orig) {
721 p->numa_hit++;
722 } else {
723 p->numa_miss++;
724 zone_pcp(zonelist->zones[0], cpu)->numa_foreign++;
725 }
726 if (pg == NODE_DATA(numa_node_id()))
727 p->local_node++;
728 else
729 p->other_node++;
730#endif
731}
732
733/* 712/*
734 * Free a 0-order page 713 * Free a 0-order page
735 */ 714 */
@@ -827,7 +806,7 @@ again:
827 } 806 }
828 807
829 __mod_page_state_zone(zone, pgalloc, 1 << order); 808 __mod_page_state_zone(zone, pgalloc, 1 << order);
830 zone_statistics(zonelist, zone, cpu); 809 zone_statistics(zonelist, zone);
831 local_irq_restore(flags); 810 local_irq_restore(flags);
832 put_cpu(); 811 put_cpu();
833 812
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 06a6d1052198..ee7f89666250 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -185,9 +185,8 @@ EXPORT_SYMBOL(mod_zone_page_state);
185 * in between and therefore the atomicity vs. interrupt cannot be exploited 185 * in between and therefore the atomicity vs. interrupt cannot be exploited
186 * in a useful way here. 186 * in a useful way here.
187 */ 187 */
188void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 188static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
189{ 189{
190 struct zone *zone = page_zone(page);
191 s8 *p = diff_pointer(zone, item); 190 s8 *p = diff_pointer(zone, item);
192 191
193 (*p)++; 192 (*p)++;
@@ -197,6 +196,11 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
197 *p = 0; 196 *p = 0;
198 } 197 }
199} 198}
199
200void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
201{
202 __inc_zone_state(page_zone(page), item);
203}
200EXPORT_SYMBOL(__inc_zone_page_state); 204EXPORT_SYMBOL(__inc_zone_page_state);
201 205
202void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 206void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -213,22 +217,23 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
213} 217}
214EXPORT_SYMBOL(__dec_zone_page_state); 218EXPORT_SYMBOL(__dec_zone_page_state);
215 219
220void inc_zone_state(struct zone *zone, enum zone_stat_item item)
221{
222 unsigned long flags;
223
224 local_irq_save(flags);
225 __inc_zone_state(zone, item);
226 local_irq_restore(flags);
227}
228
216void inc_zone_page_state(struct page *page, enum zone_stat_item item) 229void inc_zone_page_state(struct page *page, enum zone_stat_item item)
217{ 230{
218 unsigned long flags; 231 unsigned long flags;
219 struct zone *zone; 232 struct zone *zone;
220 s8 *p;
221 233
222 zone = page_zone(page); 234 zone = page_zone(page);
223 local_irq_save(flags); 235 local_irq_save(flags);
224 p = diff_pointer(zone, item); 236 __inc_zone_state(zone, item);
225
226 (*p)++;
227
228 if (unlikely(*p > STAT_THRESHOLD)) {
229 zone_page_state_add(*p, zone, item);
230 *p = 0;
231 }
232 local_irq_restore(flags); 237 local_irq_restore(flags);
233} 238}
234EXPORT_SYMBOL(inc_zone_page_state); 239EXPORT_SYMBOL(inc_zone_page_state);
@@ -297,6 +302,28 @@ EXPORT_SYMBOL(refresh_vm_stats);
297 302
298#endif 303#endif
299 304
305#ifdef CONFIG_NUMA
306/*
307 * zonelist = the list of zones passed to the allocator
308 * z = the zone from which the allocation occurred.
309 *
310 * Must be called with interrupts disabled.
311 */
312void zone_statistics(struct zonelist *zonelist, struct zone *z)
313{
314 if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
315 __inc_zone_state(z, NUMA_HIT);
316 } else {
317 __inc_zone_state(z, NUMA_MISS);
318 __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
319 }
320 if (z->zone_pgdat == NODE_DATA(numa_node_id()))
321 __inc_zone_state(z, NUMA_LOCAL);
322 else
323 __inc_zone_state(z, NUMA_OTHER);
324}
325#endif
326
300#ifdef CONFIG_PROC_FS 327#ifdef CONFIG_PROC_FS
301 328
302#include <linux/seq_file.h> 329#include <linux/seq_file.h>
@@ -369,6 +396,15 @@ static char *vmstat_text[] = {
369 "nr_unstable", 396 "nr_unstable",
370 "nr_bounce", 397 "nr_bounce",
371 398
399#ifdef CONFIG_NUMA
400 "numa_hit",
401 "numa_miss",
402 "numa_foreign",
403 "numa_interleave",
404 "numa_local",
405 "numa_other",
406#endif
407
372 /* Event counters */ 408 /* Event counters */
373 "pgpgin", 409 "pgpgin",
374 "pgpgout", 410 "pgpgout",
@@ -490,21 +526,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
490 pageset->pcp[j].high, 526 pageset->pcp[j].high,
491 pageset->pcp[j].batch); 527 pageset->pcp[j].batch);
492 } 528 }
493#ifdef CONFIG_NUMA
494 seq_printf(m,
495 "\n numa_hit: %lu"
496 "\n numa_miss: %lu"
497 "\n numa_foreign: %lu"
498 "\n interleave_hit: %lu"
499 "\n local_node: %lu"
500 "\n other_node: %lu",
501 pageset->numa_hit,
502 pageset->numa_miss,
503 pageset->numa_foreign,
504 pageset->interleave_hit,
505 pageset->local_node,
506 pageset->other_node);
507#endif
508 } 529 }
509 seq_printf(m, 530 seq_printf(m,
510 "\n all_unreclaimable: %u" 531 "\n all_unreclaimable: %u"