aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVassili Karpov <av1474@comtv.ru>2007-02-28 23:13:45 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-03-01 17:53:39 -0500
commit48dba8ab9b93c3b6b57946bd45ae013402b0b054 (patch)
tree778ecc093541ba14f70bfdeb8f4e79a77d95f2a6
parent7355690ead6d61f6344072ae61060f985060da29 (diff)
[PATCH] Documentation: CPU load calculation description
Describes how/when the information exported to `/proc/stat' is calculated, and possible problems with this approach. Signed-off-by: Vassili Karpov <av1474@comtv.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cpu-load.txt113
1 files changed, 113 insertions, 0 deletions
diff --git a/Documentation/cpu-load.txt b/Documentation/cpu-load.txt
new file mode 100644
index 000000000000..287224e57cfc
--- /dev/null
+++ b/Documentation/cpu-load.txt
@@ -0,0 +1,113 @@
1CPU load
2--------
3
4Linux exports various bits of information via `/proc/stat' and
5`/proc/uptime' that userland tools, such as top(1), use to calculate
6the average time system spent in a particular state, for example:
7
8 $ iostat
9 Linux 2.6.18.3-exp (linmac) 02/20/2007
10
11 avg-cpu: %user %nice %system %iowait %steal %idle
12 10.01 0.00 2.92 5.44 0.00 81.63
13
14 ...
15
16Here the system thinks that over the default sampling period the
17system spent 10.01% of the time doing work in user space, 2.92% in the
18kernel, and was overall 81.63% of the time idle.
19
20In most cases the `/proc/stat' information reflects the reality quite
21closely, however due to the nature of how/when the kernel collects
22this data sometimes it can not be trusted at all.
23
24So how is this information collected? Whenever timer interrupt is
25signalled the kernel looks what kind of task was running at this
26moment and increments the counter that corresponds to this tasks
27kind/state. The problem with this is that the system could have
28switched between various states multiple times between two timer
29interrupts yet the counter is incremented only for the last state.
30
31
32Example
33-------
34
35If we imagine the system with one task that periodically burns cycles
36in the following manner:
37
38 time line between two timer interrupts
39|--------------------------------------|
40 ^ ^
41 |_ something begins working |
42 |_ something goes to sleep
43 (only to be awaken quite soon)
44
45In the above situation the system will be 0% loaded according to the
46`/proc/stat' (since the timer interrupt will always happen when the
47system is executing the idle handler), but in reality the load is
48closer to 99%.
49
50One can imagine many more situations where this behavior of the kernel
51will lead to quite erratic information inside `/proc/stat'.
52
53
54/* gcc -o hog smallhog.c */
55#include <time.h>
56#include <limits.h>
57#include <signal.h>
58#include <sys/time.h>
59#define HIST 10
60
61static volatile sig_atomic_t stop;
62
63static void sighandler (int signr)
64{
65 (void) signr;
66 stop = 1;
67}
68static unsigned long hog (unsigned long niters)
69{
70 stop = 0;
71 while (!stop && --niters);
72 return niters;
73}
74int main (void)
75{
76 int i;
77 struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
78 .it_value = { .tv_sec = 0, .tv_usec = 1 } };
79 sigset_t set;
80 unsigned long v[HIST];
81 double tmp = 0.0;
82 unsigned long n;
83 signal (SIGALRM, &sighandler);
84 setitimer (ITIMER_REAL, &it, NULL);
85
86 hog (ULONG_MAX);
87 for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
88 for (i = 0; i < HIST; ++i) tmp += v[i];
89 tmp /= HIST;
90 n = tmp - (tmp / 3.0);
91
92 sigemptyset (&set);
93 sigaddset (&set, SIGALRM);
94
95 for (;;) {
96 hog (n);
97 sigwait (&set, &i);
98 }
99 return 0;
100}
101
102
103References
104----------
105
106http://lkml.org/lkml/2007/2/12/6
107Documentation/filesystems/proc.txt (1.8)
108
109
110Thanks
111------
112
113Con Kolivas, Pavel Machek