diff options
-rw-r--r-- | Documentation/accounting/taskstats-struct.txt | 161 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/taskstats.h | 40 | ||||
-rw-r--r-- | kernel/tsacct.c | 25 |
4 files changed, 207 insertions, 21 deletions
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt new file mode 100644 index 000000000000..661c797eaf79 --- /dev/null +++ b/Documentation/accounting/taskstats-struct.txt | |||
@@ -0,0 +1,161 @@ | |||
1 | The struct taskstats | ||
2 | -------------------- | ||
3 | |||
4 | This document contains an explanation of the struct taskstats fields. | ||
5 | |||
6 | There are three different groups of fields in the struct taskstats: | ||
7 | |||
8 | 1) Common and basic accounting fields | ||
9 | If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and | ||
10 | the common fields and basic accounting fields are collected for | ||
11 | delivery at do_exit() of a task. | ||
12 | 2) Delay accounting fields | ||
13 | These fields are placed between | ||
14 | /* Delay accounting fields start */ | ||
15 | and | ||
16 | /* Delay accounting fields end */ | ||
17 | Their values are collected if CONFIG_TASK_DELAY_ACCT is set. | ||
18 | 3) Extended accounting fields | ||
19 | These fields are placed between | ||
20 | /* Extended accounting fields start */ | ||
21 | and | ||
22 | /* Extended accounting fields end */ | ||
23 | Their values are collected if CONFIG_TASK_XACCT is set. | ||
24 | |||
25 | Future extension should add fields to the end of the taskstats struct, and | ||
26 | should not change the relative position of each field within the struct. | ||
27 | |||
28 | |||
29 | struct taskstats { | ||
30 | |||
31 | 1) Common and basic accounting fields: | ||
32 | /* The version number of this struct. This field is always set to | ||
33 | * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. | ||
34 | * Each time the struct is changed, the value should be incremented. | ||
35 | */ | ||
36 | __u16 version; | ||
37 | |||
38 | /* The exit code of a task. */ | ||
39 | __u32 ac_exitcode; /* Exit status */ | ||
40 | |||
41 | /* The accounting flags of a task as defined in <linux/acct.h> | ||
42 | * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. | ||
43 | */ | ||
44 | __u8 ac_flag; /* Record flags */ | ||
45 | |||
46 | /* The value of task_nice() of a task. */ | ||
47 | __u8 ac_nice; /* task_nice */ | ||
48 | |||
49 | /* The name of the command that started this task. */ | ||
50 | char ac_comm[TS_COMM_LEN]; /* Command name */ | ||
51 | |||
52 | /* The scheduling discipline as set in task->policy field. */ | ||
53 | __u8 ac_sched; /* Scheduling discipline */ | ||
54 | |||
55 | __u8 ac_pad[3]; | ||
56 | __u32 ac_uid; /* User ID */ | ||
57 | __u32 ac_gid; /* Group ID */ | ||
58 | __u32 ac_pid; /* Process ID */ | ||
59 | __u32 ac_ppid; /* Parent process ID */ | ||
60 | |||
61 | /* The time when a task begins, in [secs] since 1970. */ | ||
62 | __u32 ac_btime; /* Begin time [sec since 1970] */ | ||
63 | |||
64 | /* The elapsed time of a task, in [usec]. */ | ||
65 | __u64 ac_etime; /* Elapsed time [usec] */ | ||
66 | |||
67 | /* The user CPU time of a task, in [usec]. */ | ||
68 | __u64 ac_utime; /* User CPU time [usec] */ | ||
69 | |||
70 | /* The system CPU time of a task, in [usec]. */ | ||
71 | __u64 ac_stime; /* System CPU time [usec] */ | ||
72 | |||
73 | /* The minor page fault count of a task, as set in task->min_flt. */ | ||
74 | __u64 ac_minflt; /* Minor Page Fault Count */ | ||
75 | |||
76 | /* The major page fault count of a task, as set in task->maj_flt. */ | ||
77 | __u64 ac_majflt; /* Major Page Fault Count */ | ||
78 | |||
79 | |||
80 | 2) Delay accounting fields: | ||
81 | /* Delay accounting fields start | ||
82 | * | ||
83 | * All values, until the comment "Delay accounting fields end" are | ||
84 | * available only if delay accounting is enabled, even though the last | ||
85 | * few fields are not delays | ||
86 | * | ||
87 | * xxx_count is the number of delay values recorded | ||
88 | * xxx_delay_total is the corresponding cumulative delay in nanoseconds | ||
89 | * | ||
90 | * xxx_delay_total wraps around to zero on overflow | ||
91 | * xxx_count incremented regardless of overflow | ||
92 | */ | ||
93 | |||
94 | /* Delay waiting for cpu, while runnable | ||
95 | * count, delay_total NOT updated atomically | ||
96 | */ | ||
97 | __u64 cpu_count; | ||
98 | __u64 cpu_delay_total; | ||
99 | |||
100 | /* Following four fields atomically updated using task->delays->lock */ | ||
101 | |||
102 | /* Delay waiting for synchronous block I/O to complete | ||
103 | * does not account for delays in I/O submission | ||
104 | */ | ||
105 | __u64 blkio_count; | ||
106 | __u64 blkio_delay_total; | ||
107 | |||
108 | /* Delay waiting for page fault I/O (swap in only) */ | ||
109 | __u64 swapin_count; | ||
110 | __u64 swapin_delay_total; | ||
111 | |||
112 | /* cpu "wall-clock" running time | ||
113 | * On some architectures, value will adjust for cpu time stolen | ||
114 | * from the kernel in involuntary waits due to virtualization. | ||
115 | * Value is cumulative, in nanoseconds, without a corresponding count | ||
116 | * and wraps around to zero silently on overflow | ||
117 | */ | ||
118 | __u64 cpu_run_real_total; | ||
119 | |||
120 | /* cpu "virtual" running time | ||
121 | * Uses time intervals seen by the kernel i.e. no adjustment | ||
122 | * for kernel's involuntary waits due to virtualization. | ||
123 | * Value is cumulative, in nanoseconds, without a corresponding count | ||
124 | * and wraps around to zero silently on overflow | ||
125 | */ | ||
126 | __u64 cpu_run_virtual_total; | ||
127 | /* Delay accounting fields end */ | ||
128 | /* version 1 ends here */ | ||
129 | |||
130 | |||
131 | 3) Extended accounting fields | ||
132 | /* Extended accounting fields start */ | ||
133 | |||
134 | /* Accumulated RSS usage in duration of a task, in MBytes-usecs. | ||
135 | * The current rss usage is added to this counter every time | ||
136 | * a tick is charged to a task's system time. So, at the end we | ||
137 | * will have memory usage multiplied by system time. Thus an | ||
138 | * average usage per system time unit can be calculated. | ||
139 | */ | ||
140 | __u64 coremem; /* accumulated RSS usage in MB-usec */ | ||
141 | |||
142 | /* Accumulated virtual memory usage in duration of a task. | ||
143 | * Same as acct_rss_mem1 above except that we keep track of VM usage. | ||
144 | */ | ||
145 | __u64 virtmem; /* accumulated VM usage in MB-usec */ | ||
146 | |||
147 | /* High watermark of RSS usage in duration of a task, in KBytes. */ | ||
148 | __u64 hiwater_rss; /* High-watermark of RSS usage */ | ||
149 | |||
150 | /* High watermark of VM usage in duration of a task, in KBytes. */ | ||
151 | __u64 hiwater_vm; /* High-water virtual memory usage */ | ||
152 | |||
153 | /* The following four fields are I/O statistics of a task. */ | ||
154 | __u64 read_char; /* bytes read */ | ||
155 | __u64 write_char; /* bytes written */ | ||
156 | __u64 read_syscalls; /* read syscalls */ | ||
157 | __u64 write_syscalls; /* write syscalls */ | ||
158 | |||
159 | /* Extended accounting fields end */ | ||
160 | |||
161 | } | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ddeb0f982fb..7ef899c47c29 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -984,7 +984,7 @@ struct task_struct { | |||
984 | #if defined(CONFIG_TASK_XACCT) | 984 | #if defined(CONFIG_TASK_XACCT) |
985 | u64 acct_rss_mem1; /* accumulated rss usage */ | 985 | u64 acct_rss_mem1; /* accumulated rss usage */ |
986 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 986 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ |
987 | clock_t acct_stimexpd; /* clock_t-converted stime since last update */ | 987 | cputime_t acct_stimexpd;/* stime since last update */ |
988 | #endif | 988 | #endif |
989 | #ifdef CONFIG_NUMA | 989 | #ifdef CONFIG_NUMA |
990 | struct mempolicy *mempolicy; | 990 | struct mempolicy *mempolicy; |
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 3d2c304886b0..45248806ae9c 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h | |||
@@ -32,14 +32,21 @@ | |||
32 | 32 | ||
33 | 33 | ||
34 | #define TASKSTATS_VERSION 2 | 34 | #define TASKSTATS_VERSION 2 |
35 | #define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN | 35 | #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN |
36 | * in linux/sched.h */ | 36 | * in linux/sched.h */ |
37 | 37 | ||
38 | struct taskstats { | 38 | struct taskstats { |
39 | 39 | ||
40 | /* Version 1 */ | 40 | /* The version number of this struct. This field is always set to |
41 | * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. | ||
42 | * Each time the struct is changed, the value should be incremented. | ||
43 | */ | ||
41 | __u16 version; | 44 | __u16 version; |
42 | __u32 ac_exitcode; /* Exit status */ | 45 | __u32 ac_exitcode; /* Exit status */ |
46 | |||
47 | /* The accounting flags of a task as defined in <linux/acct.h> | ||
48 | * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. | ||
49 | */ | ||
43 | __u8 ac_flag; /* Record flags */ | 50 | __u8 ac_flag; /* Record flags */ |
44 | __u8 ac_nice; /* task_nice */ | 51 | __u8 ac_nice; /* task_nice */ |
45 | 52 | ||
@@ -104,15 +111,30 @@ struct taskstats { | |||
104 | __u64 ac_etime; /* Elapsed time [usec] */ | 111 | __u64 ac_etime; /* Elapsed time [usec] */ |
105 | __u64 ac_utime; /* User CPU time [usec] */ | 112 | __u64 ac_utime; /* User CPU time [usec] */ |
106 | __u64 ac_stime; /* SYstem CPU time [usec] */ | 113 | __u64 ac_stime; /* SYstem CPU time [usec] */ |
107 | __u64 ac_minflt; /* Minor Page Fault */ | 114 | __u64 ac_minflt; /* Minor Page Fault Count */ |
108 | __u64 ac_majflt; /* Major Page Fault */ | 115 | __u64 ac_majflt; /* Major Page Fault Count */ |
109 | /* Basic Accounting Fields end */ | 116 | /* Basic Accounting Fields end */ |
110 | 117 | ||
111 | /* Extended accounting fields start */ | 118 | /* Extended accounting fields start */ |
112 | __u64 acct_rss_mem1; /* accumulated rss usage */ | 119 | /* Accumulated RSS usage in duration of a task, in MBytes-usecs. |
113 | __u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 120 | * The current rss usage is added to this counter every time |
114 | __u64 hiwater_rss; /* High-watermark of RSS usage */ | 121 | * a tick is charged to a task's system time. So, at the end we |
115 | __u64 hiwater_vm; /* High-water virtual memory usage */ | 122 | * will have memory usage multiplied by system time. Thus an |
123 | * average usage per system time unit can be calculated. | ||
124 | */ | ||
125 | __u64 coremem; /* accumulated RSS usage in MB-usec */ | ||
126 | /* Accumulated virtual memory usage in duration of a task. | ||
127 | * Same as acct_rss_mem1 above except that we keep track of VM usage. | ||
128 | */ | ||
129 | __u64 virtmem; /* accumulated VM usage in MB-usec */ | ||
130 | |||
131 | /* High watermark of RSS and virtual memory usage in duration of | ||
132 | * a task, in KBytes. | ||
133 | */ | ||
134 | __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ | ||
135 | __u64 hiwater_vm; /* High-water VM usage, in KB */ | ||
136 | |||
137 | /* The following four fields are I/O statistics of a task. */ | ||
116 | __u64 read_char; /* bytes read */ | 138 | __u64 read_char; /* bytes read */ |
117 | __u64 write_char; /* bytes written */ | 139 | __u64 write_char; /* bytes written */ |
118 | __u64 read_syscalls; /* read syscalls */ | 140 | __u64 read_syscalls; /* read syscalls */ |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 47c71daa416f..db443221ba5b 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/tsacct_kern.h> | 21 | #include <linux/tsacct_kern.h> |
22 | #include <linux/acct.h> | 22 | #include <linux/acct.h> |
23 | #include <linux/jiffies.h> | ||
23 | 24 | ||
24 | 25 | ||
25 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) | 26 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) |
@@ -62,33 +63,35 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
62 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; | 63 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; |
63 | stats->ac_minflt = tsk->min_flt; | 64 | stats->ac_minflt = tsk->min_flt; |
64 | stats->ac_majflt = tsk->maj_flt; | 65 | stats->ac_majflt = tsk->maj_flt; |
65 | /* Each process gets a minimum of one usec cpu time */ | ||
66 | if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) { | ||
67 | stats->ac_stime = 1; | ||
68 | } | ||
69 | 66 | ||
70 | strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); | 67 | strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); |
71 | } | 68 | } |
72 | 69 | ||
73 | 70 | ||
74 | #ifdef CONFIG_TASK_XACCT | 71 | #ifdef CONFIG_TASK_XACCT |
72 | |||
73 | #define KB 1024 | ||
74 | #define MB (1024*KB) | ||
75 | /* | 75 | /* |
76 | * fill in extended accounting fields | 76 | * fill in extended accounting fields |
77 | */ | 77 | */ |
78 | void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | 78 | void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) |
79 | { | 79 | { |
80 | stats->acct_rss_mem1 = p->acct_rss_mem1; | 80 | /* convert pages-jiffies to Mbyte-usec */ |
81 | stats->acct_vm_mem1 = p->acct_vm_mem1; | 81 | stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; |
82 | stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; | ||
82 | if (p->mm) { | 83 | if (p->mm) { |
83 | stats->hiwater_rss = p->mm->hiwater_rss; | 84 | /* adjust to KB unit */ |
84 | stats->hiwater_vm = p->mm->hiwater_vm; | 85 | stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; |
86 | stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; | ||
85 | } | 87 | } |
86 | stats->read_char = p->rchar; | 88 | stats->read_char = p->rchar; |
87 | stats->write_char = p->wchar; | 89 | stats->write_char = p->wchar; |
88 | stats->read_syscalls = p->syscr; | 90 | stats->read_syscalls = p->syscr; |
89 | stats->write_syscalls = p->syscw; | 91 | stats->write_syscalls = p->syscw; |
90 | } | 92 | } |
91 | 93 | #undef KB | |
94 | #undef MB | ||
92 | 95 | ||
93 | /** | 96 | /** |
94 | * acct_update_integrals - update mm integral fields in task_struct | 97 | * acct_update_integrals - update mm integral fields in task_struct |
@@ -97,8 +100,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | |||
97 | void acct_update_integrals(struct task_struct *tsk) | 100 | void acct_update_integrals(struct task_struct *tsk) |
98 | { | 101 | { |
99 | if (likely(tsk->mm)) { | 102 | if (likely(tsk->mm)) { |
100 | long delta = | 103 | long delta = cputime_to_jiffies( |
101 | cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; | 104 | cputime_sub(tsk->stime, tsk->acct_stimexpd)); |
102 | 105 | ||
103 | if (delta == 0) | 106 | if (delta == 0) |
104 | return; | 107 | return; |