aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Lan <jlan@sgi.com>2006-10-01 02:29:00 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-01 03:39:29 -0400
commitdb5fed26b2e0beed939b773dd5896077a1794d65 (patch)
treebe7630821744aae53b2431383ef0b304a87f1268
parent8f0ab5147951267134612570604cf8341901a80c (diff)
[PATCH] csa accounting taskstats update
ChangeLog: Feedbacks from Andrew Morton: - define TS_COMM_LEN to 32 - change acct_stimexpd field of task_struct to be of cputime_t, which is to be used to save the tsk->stime of last timer interrupt update. - a new Documentation/accounting/taskstats-struct.txt to describe fields of taskstats struct. Feedback from Balbir Singh: - keep the stime of a task to be zero when both stime and utime are zero as recoreded in task_struct. Misc: - convert accumulated RSS/VM from platform dependent pages-ticks to MBytes-usecs in the kernel Cc: Shailabh Nagar <nagar@watson.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Chris Sturtivant <csturtiv@sgi.com> Cc: Tony Ernst <tee@sgi.com> Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/accounting/taskstats-struct.txt161
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/taskstats.h40
-rw-r--r--kernel/tsacct.c25
4 files changed, 207 insertions, 21 deletions
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
new file mode 100644
index 000000000000..661c797eaf79
--- /dev/null
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -0,0 +1,161 @@
1The struct taskstats
2--------------------
3
4This document contains an explanation of the struct taskstats fields.
5
6There are three different groups of fields in the struct taskstats:
7
81) Common and basic accounting fields
9 If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and
10 the common fields and basic accounting fields are collected for
11 delivery at do_exit() of a task.
122) Delay accounting fields
13 These fields are placed between
14 /* Delay accounting fields start */
15 and
16 /* Delay accounting fields end */
17 Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
183) Extended accounting fields
19 These fields are placed between
20 /* Extended accounting fields start */
21 and
22 /* Extended accounting fields end */
23 Their values are collected if CONFIG_TASK_XACCT is set.
24
25Future extension should add fields to the end of the taskstats struct, and
26should not change the relative position of each field within the struct.
27
28
29struct taskstats {
30
311) Common and basic accounting fields:
32 /* The version number of this struct. This field is always set to
33 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
34 * Each time the struct is changed, the value should be incremented.
35 */
36 __u16 version;
37
38 /* The exit code of a task. */
39 __u32 ac_exitcode; /* Exit status */
40
41 /* The accounting flags of a task as defined in <linux/acct.h>
42 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
43 */
44 __u8 ac_flag; /* Record flags */
45
46 /* The value of task_nice() of a task. */
47 __u8 ac_nice; /* task_nice */
48
49 /* The name of the command that started this task. */
50 char ac_comm[TS_COMM_LEN]; /* Command name */
51
52 /* The scheduling discipline as set in task->policy field. */
53 __u8 ac_sched; /* Scheduling discipline */
54
55 __u8 ac_pad[3];
56 __u32 ac_uid; /* User ID */
57 __u32 ac_gid; /* Group ID */
58 __u32 ac_pid; /* Process ID */
59 __u32 ac_ppid; /* Parent process ID */
60
61 /* The time when a task begins, in [secs] since 1970. */
62 __u32 ac_btime; /* Begin time [sec since 1970] */
63
64 /* The elapsed time of a task, in [usec]. */
65 __u64 ac_etime; /* Elapsed time [usec] */
66
67 /* The user CPU time of a task, in [usec]. */
68 __u64 ac_utime; /* User CPU time [usec] */
69
70 /* The system CPU time of a task, in [usec]. */
71 __u64 ac_stime; /* System CPU time [usec] */
72
73 /* The minor page fault count of a task, as set in task->min_flt. */
74 __u64 ac_minflt; /* Minor Page Fault Count */
75
76 /* The major page fault count of a task, as set in task->maj_flt. */
77 __u64 ac_majflt; /* Major Page Fault Count */
78
79
802) Delay accounting fields:
81 /* Delay accounting fields start
82 *
83 * All values, until the comment "Delay accounting fields end" are
84 * available only if delay accounting is enabled, even though the last
85 * few fields are not delays
86 *
87 * xxx_count is the number of delay values recorded
88 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
89 *
90 * xxx_delay_total wraps around to zero on overflow
91 * xxx_count incremented regardless of overflow
92 */
93
94 /* Delay waiting for cpu, while runnable
95 * count, delay_total NOT updated atomically
96 */
97 __u64 cpu_count;
98 __u64 cpu_delay_total;
99
100 /* Following four fields atomically updated using task->delays->lock */
101
102 /* Delay waiting for synchronous block I/O to complete
103 * does not account for delays in I/O submission
104 */
105 __u64 blkio_count;
106 __u64 blkio_delay_total;
107
108 /* Delay waiting for page fault I/O (swap in only) */
109 __u64 swapin_count;
110 __u64 swapin_delay_total;
111
112 /* cpu "wall-clock" running time
113 * On some architectures, value will adjust for cpu time stolen
114 * from the kernel in involuntary waits due to virtualization.
115 * Value is cumulative, in nanoseconds, without a corresponding count
116 * and wraps around to zero silently on overflow
117 */
118 __u64 cpu_run_real_total;
119
120 /* cpu "virtual" running time
121 * Uses time intervals seen by the kernel i.e. no adjustment
122 * for kernel's involuntary waits due to virtualization.
123 * Value is cumulative, in nanoseconds, without a corresponding count
124 * and wraps around to zero silently on overflow
125 */
126 __u64 cpu_run_virtual_total;
127 /* Delay accounting fields end */
128 /* version 1 ends here */
129
130
1313) Extended accounting fields
132 /* Extended accounting fields start */
133
134 /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
135 * The current rss usage is added to this counter every time
136 * a tick is charged to a task's system time. So, at the end we
137 * will have memory usage multiplied by system time. Thus an
138 * average usage per system time unit can be calculated.
139 */
140 __u64 coremem; /* accumulated RSS usage in MB-usec */
141
142 /* Accumulated virtual memory usage in duration of a task.
143 * Same as acct_rss_mem1 above except that we keep track of VM usage.
144 */
145 __u64 virtmem; /* accumulated VM usage in MB-usec */
146
147 /* High watermark of RSS usage in duration of a task, in KBytes. */
148 __u64 hiwater_rss; /* High-watermark of RSS usage */
149
150 /* High watermark of VM usage in duration of a task, in KBytes. */
151 __u64 hiwater_vm; /* High-water virtual memory usage */
152
153 /* The following four fields are I/O statistics of a task. */
154 __u64 read_char; /* bytes read */
155 __u64 write_char; /* bytes written */
156 __u64 read_syscalls; /* read syscalls */
157 __u64 write_syscalls; /* write syscalls */
158
159 /* Extended accounting fields end */
160
161}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ddeb0f982fb..7ef899c47c29 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -984,7 +984,7 @@ struct task_struct {
984#if defined(CONFIG_TASK_XACCT) 984#if defined(CONFIG_TASK_XACCT)
985 u64 acct_rss_mem1; /* accumulated rss usage */ 985 u64 acct_rss_mem1; /* accumulated rss usage */
986 u64 acct_vm_mem1; /* accumulated virtual memory usage */ 986 u64 acct_vm_mem1; /* accumulated virtual memory usage */
987 clock_t acct_stimexpd; /* clock_t-converted stime since last update */ 987 cputime_t acct_stimexpd;/* stime since last update */
988#endif 988#endif
989#ifdef CONFIG_NUMA 989#ifdef CONFIG_NUMA
990 struct mempolicy *mempolicy; 990 struct mempolicy *mempolicy;
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3d2c304886b0..45248806ae9c 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -32,14 +32,21 @@
32 32
33 33
34#define TASKSTATS_VERSION 2 34#define TASKSTATS_VERSION 2
35#define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN 35#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
36 * in linux/sched.h */ 36 * in linux/sched.h */
37 37
38struct taskstats { 38struct taskstats {
39 39
40 /* Version 1 */ 40 /* The version number of this struct. This field is always set to
41 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
42 * Each time the struct is changed, the value should be incremented.
43 */
41 __u16 version; 44 __u16 version;
42 __u32 ac_exitcode; /* Exit status */ 45 __u32 ac_exitcode; /* Exit status */
46
47 /* The accounting flags of a task as defined in <linux/acct.h>
48 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
49 */
43 __u8 ac_flag; /* Record flags */ 50 __u8 ac_flag; /* Record flags */
44 __u8 ac_nice; /* task_nice */ 51 __u8 ac_nice; /* task_nice */
45 52
@@ -104,15 +111,30 @@ struct taskstats {
104 __u64 ac_etime; /* Elapsed time [usec] */ 111 __u64 ac_etime; /* Elapsed time [usec] */
105 __u64 ac_utime; /* User CPU time [usec] */ 112 __u64 ac_utime; /* User CPU time [usec] */
106 __u64 ac_stime; /* SYstem CPU time [usec] */ 113 __u64 ac_stime; /* SYstem CPU time [usec] */
107 __u64 ac_minflt; /* Minor Page Fault */ 114 __u64 ac_minflt; /* Minor Page Fault Count */
108 __u64 ac_majflt; /* Major Page Fault */ 115 __u64 ac_majflt; /* Major Page Fault Count */
109 /* Basic Accounting Fields end */ 116 /* Basic Accounting Fields end */
110 117
111 /* Extended accounting fields start */ 118 /* Extended accounting fields start */
112 __u64 acct_rss_mem1; /* accumulated rss usage */ 119 /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
113 __u64 acct_vm_mem1; /* accumulated virtual memory usage */ 120 * The current rss usage is added to this counter every time
114 __u64 hiwater_rss; /* High-watermark of RSS usage */ 121 * a tick is charged to a task's system time. So, at the end we
115 __u64 hiwater_vm; /* High-water virtual memory usage */ 122 * will have memory usage multiplied by system time. Thus an
123 * average usage per system time unit can be calculated.
124 */
125 __u64 coremem; /* accumulated RSS usage in MB-usec */
126 /* Accumulated virtual memory usage in duration of a task.
127 * Same as acct_rss_mem1 above except that we keep track of VM usage.
128 */
129 __u64 virtmem; /* accumulated VM usage in MB-usec */
130
131 /* High watermark of RSS and virtual memory usage in duration of
132 * a task, in KBytes.
133 */
134 __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */
135 __u64 hiwater_vm; /* High-water VM usage, in KB */
136
137 /* The following four fields are I/O statistics of a task. */
116 __u64 read_char; /* bytes read */ 138 __u64 read_char; /* bytes read */
117 __u64 write_char; /* bytes written */ 139 __u64 write_char; /* bytes written */
118 __u64 read_syscalls; /* read syscalls */ 140 __u64 read_syscalls; /* read syscalls */
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 47c71daa416f..db443221ba5b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -20,6 +20,7 @@
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tsacct_kern.h> 21#include <linux/tsacct_kern.h>
22#include <linux/acct.h> 22#include <linux/acct.h>
23#include <linux/jiffies.h>
23 24
24 25
25#define USEC_PER_TICK (USEC_PER_SEC/HZ) 26#define USEC_PER_TICK (USEC_PER_SEC/HZ)
@@ -62,33 +63,35 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
62 stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; 63 stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
63 stats->ac_minflt = tsk->min_flt; 64 stats->ac_minflt = tsk->min_flt;
64 stats->ac_majflt = tsk->maj_flt; 65 stats->ac_majflt = tsk->maj_flt;
65 /* Each process gets a minimum of one usec cpu time */
66 if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) {
67 stats->ac_stime = 1;
68 }
69 66
70 strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); 67 strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
71} 68}
72 69
73 70
74#ifdef CONFIG_TASK_XACCT 71#ifdef CONFIG_TASK_XACCT
72
73#define KB 1024
74#define MB (1024*KB)
75/* 75/*
76 * fill in extended accounting fields 76 * fill in extended accounting fields
77 */ 77 */
78void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) 78void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
79{ 79{
80 stats->acct_rss_mem1 = p->acct_rss_mem1; 80 /* convert pages-jiffies to Mbyte-usec */
81 stats->acct_vm_mem1 = p->acct_vm_mem1; 81 stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
82 stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
82 if (p->mm) { 83 if (p->mm) {
83 stats->hiwater_rss = p->mm->hiwater_rss; 84 /* adjust to KB unit */
84 stats->hiwater_vm = p->mm->hiwater_vm; 85 stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB;
86 stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB;
85 } 87 }
86 stats->read_char = p->rchar; 88 stats->read_char = p->rchar;
87 stats->write_char = p->wchar; 89 stats->write_char = p->wchar;
88 stats->read_syscalls = p->syscr; 90 stats->read_syscalls = p->syscr;
89 stats->write_syscalls = p->syscw; 91 stats->write_syscalls = p->syscw;
90} 92}
91 93#undef KB
94#undef MB
92 95
93/** 96/**
94 * acct_update_integrals - update mm integral fields in task_struct 97 * acct_update_integrals - update mm integral fields in task_struct
@@ -97,8 +100,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
97void acct_update_integrals(struct task_struct *tsk) 100void acct_update_integrals(struct task_struct *tsk)
98{ 101{
99 if (likely(tsk->mm)) { 102 if (likely(tsk->mm)) {
100 long delta = 103 long delta = cputime_to_jiffies(
101 cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; 104 cputime_sub(tsk->stime, tsk->acct_stimexpd));
102 105
103 if (delta == 0) 106 if (delta == 0)
104 return; 107 return;