aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmstat.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-06-30 04:55:45 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-30 14:25:36 -0400
commitf8891e5e1f93a128c3900f82035e8541357896a7 (patch)
tree97b078ac97970962b17c85d39fd64cb48dc01168 /mm/vmstat.c
parentca889e6c45e0b112cb2ca9d35afc66297519b5d5 (diff)
[PATCH] Light weight event counters
The remaining counters in page_state after the zoned VM counter patches have been applied are all just for show in /proc/vmstat. They have no essential function for the VM. We use a simple increment of per cpu variables. In order to avoid the most severe races we disable preempt. Preempt does not prevent the race between an increment and an interrupt handler incrementing the same statistics counter. However, that race is exceedingly rare, we may only loose one increment or so and there is no requirement (at least not in kernel) that the vm event counters have to be accurate. In the non preempt case this results in a simple increment for each counter. For many architectures this will be reduced by the compiler to a single instruction. This single instruction is atomic for i386 and x86_64. And therefore even the rare race condition in an interrupt is avoided for both architectures in most cases. The patchset also adds an off switch for embedded systems that allows a building of linux kernels without these counters. The implementation of these counters is through inline code that hopefully results in only a single instruction increment instruction being emitted (i386, x86_64) or in the increment being hidden though instruction concurrency (EPIC architectures such as ia64 can get that done). Benefits: - VM event counter operations usually reduce to a single inline instruction on i386 and x86_64. - No interrupt disable, only preempt disable for the preempt case. Preempt disable can also be avoided by moving the counter into a spinlock. - Handling is similar to zoned VM counters. - Simple and easily extendable. - Can be omitted to reduce memory use for embedded use. References: RFC http://marc.theaimsgroup.com/?l=linux-kernel&m=113512330605497&w=2 RFC http://marc.theaimsgroup.com/?l=linux-kernel&m=114988082814934&w=2 local_t http://marc.theaimsgroup.com/?l=linux-kernel&m=114991748606690&w=2 V2 http://marc.theaimsgroup.com/?t=115014808400007&r=1&w=2 V3 http://marc.theaimsgroup.com/?l=linux-kernel&m=115024767022346&w=2 V4 http://marc.theaimsgroup.com/?l=linux-kernel&m=115047968808926&w=2 Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c171
1 files changed, 89 insertions, 82 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ee7f89666250..73b83d67bab6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,66 +13,6 @@
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/module.h> 14#include <linux/module.h>
15 15
16/*
17 * Accumulate the page_state information across all CPUs.
18 * The result is unavoidably approximate - it can change
19 * during and after execution of this function.
20 */
21DEFINE_PER_CPU(struct page_state, page_states) = {0};
22
23static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
24{
25 unsigned cpu;
26
27 memset(ret, 0, nr * sizeof(unsigned long));
28 cpus_and(*cpumask, *cpumask, cpu_online_map);
29
30 for_each_cpu_mask(cpu, *cpumask) {
31 unsigned long *in;
32 unsigned long *out;
33 unsigned off;
34 unsigned next_cpu;
35
36 in = (unsigned long *)&per_cpu(page_states, cpu);
37
38 next_cpu = next_cpu(cpu, *cpumask);
39 if (likely(next_cpu < NR_CPUS))
40 prefetch(&per_cpu(page_states, next_cpu));
41
42 out = (unsigned long *)ret;
43 for (off = 0; off < nr; off++)
44 *out++ += *in++;
45 }
46}
47
48void get_full_page_state(struct page_state *ret)
49{
50 cpumask_t mask = CPU_MASK_ALL;
51
52 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
53}
54
55void __mod_page_state_offset(unsigned long offset, unsigned long delta)
56{
57 void *ptr;
58
59 ptr = &__get_cpu_var(page_states);
60 *(unsigned long *)(ptr + offset) += delta;
61}
62EXPORT_SYMBOL(__mod_page_state_offset);
63
64void mod_page_state_offset(unsigned long offset, unsigned long delta)
65{
66 unsigned long flags;
67 void *ptr;
68
69 local_irq_save(flags);
70 ptr = &__get_cpu_var(page_states);
71 *(unsigned long *)(ptr + offset) += delta;
72 local_irq_restore(flags);
73}
74EXPORT_SYMBOL(mod_page_state_offset);
75
76void __get_zone_counts(unsigned long *active, unsigned long *inactive, 16void __get_zone_counts(unsigned long *active, unsigned long *inactive,
77 unsigned long *free, struct pglist_data *pgdat) 17 unsigned long *free, struct pglist_data *pgdat)
78{ 18{
@@ -106,6 +46,63 @@ void get_zone_counts(unsigned long *active,
106 } 46 }
107} 47}
108 48
49#ifdef CONFIG_VM_EVENT_COUNTERS
50DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
51EXPORT_PER_CPU_SYMBOL(vm_event_states);
52
53static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
54{
55 int cpu = 0;
56 int i;
57
58 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
59
60 cpu = first_cpu(*cpumask);
61 while (cpu < NR_CPUS) {
62 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
63
64 cpu = next_cpu(cpu, *cpumask);
65
66 if (cpu < NR_CPUS)
67 prefetch(&per_cpu(vm_event_states, cpu));
68
69
70 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
71 ret[i] += this->event[i];
72 }
73}
74
75/*
76 * Accumulate the vm event counters across all CPUs.
77 * The result is unavoidably approximate - it can change
78 * during and after execution of this function.
79*/
80void all_vm_events(unsigned long *ret)
81{
82 sum_vm_events(ret, &cpu_online_map);
83}
84
85#ifdef CONFIG_HOTPLUG
86/*
87 * Fold the foreign cpu events into our own.
88 *
89 * This is adding to the events on one processor
90 * but keeps the global counts constant.
91 */
92void vm_events_fold_cpu(int cpu)
93{
94 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
95 int i;
96
97 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
98 count_vm_events(i, fold_state->event[i]);
99 fold_state->event[i] = 0;
100 }
101}
102#endif /* CONFIG_HOTPLUG */
103
104#endif /* CONFIG_VM_EVENT_COUNTERS */
105
109/* 106/*
110 * Manage combined zone based / global counters 107 * Manage combined zone based / global counters
111 * 108 *
@@ -405,16 +402,16 @@ static char *vmstat_text[] = {
405 "numa_other", 402 "numa_other",
406#endif 403#endif
407 404
408 /* Event counters */ 405#ifdef CONFIG_VM_EVENT_COUNTERS
409 "pgpgin", 406 "pgpgin",
410 "pgpgout", 407 "pgpgout",
411 "pswpin", 408 "pswpin",
412 "pswpout", 409 "pswpout",
413 410
414 "pgalloc_high",
415 "pgalloc_normal",
416 "pgalloc_dma32",
417 "pgalloc_dma", 411 "pgalloc_dma",
412 "pgalloc_dma32",
413 "pgalloc_normal",
414 "pgalloc_high",
418 415
419 "pgfree", 416 "pgfree",
420 "pgactivate", 417 "pgactivate",
@@ -423,25 +420,25 @@ static char *vmstat_text[] = {
423 "pgfault", 420 "pgfault",
424 "pgmajfault", 421 "pgmajfault",
425 422
426 "pgrefill_high",
427 "pgrefill_normal",
428 "pgrefill_dma32",
429 "pgrefill_dma", 423 "pgrefill_dma",
424 "pgrefill_dma32",
425 "pgrefill_normal",
426 "pgrefill_high",
430 427
431 "pgsteal_high",
432 "pgsteal_normal",
433 "pgsteal_dma32",
434 "pgsteal_dma", 428 "pgsteal_dma",
429 "pgsteal_dma32",
430 "pgsteal_normal",
431 "pgsteal_high",
435 432
436 "pgscan_kswapd_high",
437 "pgscan_kswapd_normal",
438 "pgscan_kswapd_dma32",
439 "pgscan_kswapd_dma", 433 "pgscan_kswapd_dma",
434 "pgscan_kswapd_dma32",
435 "pgscan_kswapd_normal",
436 "pgscan_kswapd_high",
440 437
441 "pgscan_direct_high",
442 "pgscan_direct_normal",
443 "pgscan_direct_dma32",
444 "pgscan_direct_dma", 438 "pgscan_direct_dma",
439 "pgscan_direct_dma32",
440 "pgscan_direct_normal",
441 "pgscan_direct_high",
445 442
446 "pginodesteal", 443 "pginodesteal",
447 "slabs_scanned", 444 "slabs_scanned",
@@ -451,6 +448,7 @@ static char *vmstat_text[] = {
451 "allocstall", 448 "allocstall",
452 449
453 "pgrotated", 450 "pgrotated",
451#endif
454}; 452};
455 453
456/* 454/*
@@ -553,23 +551,32 @@ struct seq_operations zoneinfo_op = {
553static void *vmstat_start(struct seq_file *m, loff_t *pos) 551static void *vmstat_start(struct seq_file *m, loff_t *pos)
554{ 552{
555 unsigned long *v; 553 unsigned long *v;
556 struct page_state *ps; 554#ifdef CONFIG_VM_EVENT_COUNTERS
555 unsigned long *e;
556#endif
557 int i; 557 int i;
558 558
559 if (*pos >= ARRAY_SIZE(vmstat_text)) 559 if (*pos >= ARRAY_SIZE(vmstat_text))
560 return NULL; 560 return NULL;
561 561
562#ifdef CONFIG_VM_EVENT_COUNTERS
562 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) 563 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
563 + sizeof(*ps), GFP_KERNEL); 564 + sizeof(struct vm_event_state), GFP_KERNEL);
565#else
566 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
567 GFP_KERNEL);
568#endif
564 m->private = v; 569 m->private = v;
565 if (!v) 570 if (!v)
566 return ERR_PTR(-ENOMEM); 571 return ERR_PTR(-ENOMEM);
567 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 572 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
568 v[i] = global_page_state(i); 573 v[i] = global_page_state(i);
569 ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); 574#ifdef CONFIG_VM_EVENT_COUNTERS
570 get_full_page_state(ps); 575 e = v + NR_VM_ZONE_STAT_ITEMS;
571 ps->pgpgin /= 2; /* sectors -> kbytes */ 576 all_vm_events(e);
572 ps->pgpgout /= 2; 577 e[PGPGIN] /= 2; /* sectors -> kbytes */
578 e[PGPGOUT] /= 2;
579#endif
573 return v + *pos; 580 return v + *pos;
574} 581}
575 582