diff options
| -rw-r--r-- | Documentation/accounting/taskstats-struct.txt | 161 | ||||
| -rw-r--r-- | include/linux/sched.h | 2 | ||||
| -rw-r--r-- | include/linux/taskstats.h | 40 | ||||
| -rw-r--r-- | kernel/tsacct.c | 25 |
4 files changed, 207 insertions, 21 deletions
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt new file mode 100644 index 000000000000..661c797eaf79 --- /dev/null +++ b/Documentation/accounting/taskstats-struct.txt | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | The struct taskstats | ||
| 2 | -------------------- | ||
| 3 | |||
| 4 | This document contains an explanation of the struct taskstats fields. | ||
| 5 | |||
| 6 | There are three different groups of fields in the struct taskstats: | ||
| 7 | |||
| 8 | 1) Common and basic accounting fields | ||
| 9 | If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and | ||
| 10 | the common fields and basic accounting fields are collected for | ||
| 11 | delivery at do_exit() of a task. | ||
| 12 | 2) Delay accounting fields | ||
| 13 | These fields are placed between | ||
| 14 | /* Delay accounting fields start */ | ||
| 15 | and | ||
| 16 | /* Delay accounting fields end */ | ||
| 17 | Their values are collected if CONFIG_TASK_DELAY_ACCT is set. | ||
| 18 | 3) Extended accounting fields | ||
| 19 | These fields are placed between | ||
| 20 | /* Extended accounting fields start */ | ||
| 21 | and | ||
| 22 | /* Extended accounting fields end */ | ||
| 23 | Their values are collected if CONFIG_TASK_XACCT is set. | ||
| 24 | |||
| 25 | Future extension should add fields to the end of the taskstats struct, and | ||
| 26 | should not change the relative position of each field within the struct. | ||
| 27 | |||
| 28 | |||
| 29 | struct taskstats { | ||
| 30 | |||
| 31 | 1) Common and basic accounting fields: | ||
| 32 | /* The version number of this struct. This field is always set to | ||
| 33 | * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. | ||
| 34 | * Each time the struct is changed, the value should be incremented. | ||
| 35 | */ | ||
| 36 | __u16 version; | ||
| 37 | |||
| 38 | /* The exit code of a task. */ | ||
| 39 | __u32 ac_exitcode; /* Exit status */ | ||
| 40 | |||
| 41 | /* The accounting flags of a task as defined in <linux/acct.h> | ||
| 42 | * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. | ||
| 43 | */ | ||
| 44 | __u8 ac_flag; /* Record flags */ | ||
| 45 | |||
| 46 | /* The value of task_nice() of a task. */ | ||
| 47 | __u8 ac_nice; /* task_nice */ | ||
| 48 | |||
| 49 | /* The name of the command that started this task. */ | ||
| 50 | char ac_comm[TS_COMM_LEN]; /* Command name */ | ||
| 51 | |||
| 52 | /* The scheduling discipline as set in task->policy field. */ | ||
| 53 | __u8 ac_sched; /* Scheduling discipline */ | ||
| 54 | |||
| 55 | __u8 ac_pad[3]; | ||
| 56 | __u32 ac_uid; /* User ID */ | ||
| 57 | __u32 ac_gid; /* Group ID */ | ||
| 58 | __u32 ac_pid; /* Process ID */ | ||
| 59 | __u32 ac_ppid; /* Parent process ID */ | ||
| 60 | |||
| 61 | /* The time when a task begins, in [secs] since 1970. */ | ||
| 62 | __u32 ac_btime; /* Begin time [sec since 1970] */ | ||
| 63 | |||
| 64 | /* The elapsed time of a task, in [usec]. */ | ||
| 65 | __u64 ac_etime; /* Elapsed time [usec] */ | ||
| 66 | |||
| 67 | /* The user CPU time of a task, in [usec]. */ | ||
| 68 | __u64 ac_utime; /* User CPU time [usec] */ | ||
| 69 | |||
| 70 | /* The system CPU time of a task, in [usec]. */ | ||
| 71 | __u64 ac_stime; /* System CPU time [usec] */ | ||
| 72 | |||
| 73 | /* The minor page fault count of a task, as set in task->min_flt. */ | ||
| 74 | __u64 ac_minflt; /* Minor Page Fault Count */ | ||
| 75 | |||
| 76 | /* The major page fault count of a task, as set in task->maj_flt. */ | ||
| 77 | __u64 ac_majflt; /* Major Page Fault Count */ | ||
| 78 | |||
| 79 | |||
| 80 | 2) Delay accounting fields: | ||
| 81 | /* Delay accounting fields start | ||
| 82 | * | ||
| 83 | * All values, until the comment "Delay accounting fields end" are | ||
| 84 | * available only if delay accounting is enabled, even though the last | ||
| 85 | * few fields are not delays | ||
| 86 | * | ||
| 87 | * xxx_count is the number of delay values recorded | ||
| 88 | * xxx_delay_total is the corresponding cumulative delay in nanoseconds | ||
| 89 | * | ||
| 90 | * xxx_delay_total wraps around to zero on overflow | ||
| 91 | * xxx_count incremented regardless of overflow | ||
| 92 | */ | ||
| 93 | |||
| 94 | /* Delay waiting for cpu, while runnable | ||
| 95 | * count, delay_total NOT updated atomically | ||
| 96 | */ | ||
| 97 | __u64 cpu_count; | ||
| 98 | __u64 cpu_delay_total; | ||
| 99 | |||
| 100 | /* Following four fields atomically updated using task->delays->lock */ | ||
| 101 | |||
| 102 | /* Delay waiting for synchronous block I/O to complete | ||
| 103 | * does not account for delays in I/O submission | ||
| 104 | */ | ||
| 105 | __u64 blkio_count; | ||
| 106 | __u64 blkio_delay_total; | ||
| 107 | |||
| 108 | /* Delay waiting for page fault I/O (swap in only) */ | ||
| 109 | __u64 swapin_count; | ||
| 110 | __u64 swapin_delay_total; | ||
| 111 | |||
| 112 | /* cpu "wall-clock" running time | ||
| 113 | * On some architectures, value will adjust for cpu time stolen | ||
| 114 | * from the kernel in involuntary waits due to virtualization. | ||
| 115 | * Value is cumulative, in nanoseconds, without a corresponding count | ||
| 116 | * and wraps around to zero silently on overflow | ||
| 117 | */ | ||
| 118 | __u64 cpu_run_real_total; | ||
| 119 | |||
| 120 | /* cpu "virtual" running time | ||
| 121 | * Uses time intervals seen by the kernel i.e. no adjustment | ||
| 122 | * for kernel's involuntary waits due to virtualization. | ||
| 123 | * Value is cumulative, in nanoseconds, without a corresponding count | ||
| 124 | * and wraps around to zero silently on overflow | ||
| 125 | */ | ||
| 126 | __u64 cpu_run_virtual_total; | ||
| 127 | /* Delay accounting fields end */ | ||
| 128 | /* version 1 ends here */ | ||
| 129 | |||
| 130 | |||
| 131 | 3) Extended accounting fields | ||
| 132 | /* Extended accounting fields start */ | ||
| 133 | |||
| 134 | /* Accumulated RSS usage in duration of a task, in MBytes-usecs. | ||
| 135 | * The current rss usage is added to this counter every time | ||
| 136 | * a tick is charged to a task's system time. So, at the end we | ||
| 137 | * will have memory usage multiplied by system time. Thus an | ||
| 138 | * average usage per system time unit can be calculated. | ||
| 139 | */ | ||
| 140 | __u64 coremem; /* accumulated RSS usage in MB-usec */ | ||
| 141 | |||
| 142 | /* Accumulated virtual memory usage in duration of a task. | ||
| 143 | * Same as acct_rss_mem1 above except that we keep track of VM usage. | ||
| 144 | */ | ||
| 145 | __u64 virtmem; /* accumulated VM usage in MB-usec */ | ||
| 146 | |||
| 147 | /* High watermark of RSS usage in duration of a task, in KBytes. */ | ||
| 148 | __u64 hiwater_rss; /* High-watermark of RSS usage */ | ||
| 149 | |||
| 150 | /* High watermark of VM usage in duration of a task, in KBytes. */ | ||
| 151 | __u64 hiwater_vm; /* High-water virtual memory usage */ | ||
| 152 | |||
| 153 | /* The following four fields are I/O statistics of a task. */ | ||
| 154 | __u64 read_char; /* bytes read */ | ||
| 155 | __u64 write_char; /* bytes written */ | ||
| 156 | __u64 read_syscalls; /* read syscalls */ | ||
| 157 | __u64 write_syscalls; /* write syscalls */ | ||
| 158 | |||
| 159 | /* Extended accounting fields end */ | ||
| 160 | |||
| 161 | } | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ddeb0f982fb..7ef899c47c29 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -984,7 +984,7 @@ struct task_struct { | |||
| 984 | #if defined(CONFIG_TASK_XACCT) | 984 | #if defined(CONFIG_TASK_XACCT) |
| 985 | u64 acct_rss_mem1; /* accumulated rss usage */ | 985 | u64 acct_rss_mem1; /* accumulated rss usage */ |
| 986 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 986 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ |
| 987 | clock_t acct_stimexpd; /* clock_t-converted stime since last update */ | 987 | cputime_t acct_stimexpd;/* stime since last update */ |
| 988 | #endif | 988 | #endif |
| 989 | #ifdef CONFIG_NUMA | 989 | #ifdef CONFIG_NUMA |
| 990 | struct mempolicy *mempolicy; | 990 | struct mempolicy *mempolicy; |
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 3d2c304886b0..45248806ae9c 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h | |||
| @@ -32,14 +32,21 @@ | |||
| 32 | 32 | ||
| 33 | 33 | ||
| 34 | #define TASKSTATS_VERSION 2 | 34 | #define TASKSTATS_VERSION 2 |
| 35 | #define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN | 35 | #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN |
| 36 | * in linux/sched.h */ | 36 | * in linux/sched.h */ |
| 37 | 37 | ||
| 38 | struct taskstats { | 38 | struct taskstats { |
| 39 | 39 | ||
| 40 | /* Version 1 */ | 40 | /* The version number of this struct. This field is always set to |
| 41 | * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. | ||
| 42 | * Each time the struct is changed, the value should be incremented. | ||
| 43 | */ | ||
| 41 | __u16 version; | 44 | __u16 version; |
| 42 | __u32 ac_exitcode; /* Exit status */ | 45 | __u32 ac_exitcode; /* Exit status */ |
| 46 | |||
| 47 | /* The accounting flags of a task as defined in <linux/acct.h> | ||
| 48 | * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. | ||
| 49 | */ | ||
| 43 | __u8 ac_flag; /* Record flags */ | 50 | __u8 ac_flag; /* Record flags */ |
| 44 | __u8 ac_nice; /* task_nice */ | 51 | __u8 ac_nice; /* task_nice */ |
| 45 | 52 | ||
| @@ -104,15 +111,30 @@ struct taskstats { | |||
| 104 | __u64 ac_etime; /* Elapsed time [usec] */ | 111 | __u64 ac_etime; /* Elapsed time [usec] */ |
| 105 | __u64 ac_utime; /* User CPU time [usec] */ | 112 | __u64 ac_utime; /* User CPU time [usec] */ |
| 106 | __u64 ac_stime; /* SYstem CPU time [usec] */ | 113 | __u64 ac_stime; /* SYstem CPU time [usec] */ |
| 107 | __u64 ac_minflt; /* Minor Page Fault */ | 114 | __u64 ac_minflt; /* Minor Page Fault Count */ |
| 108 | __u64 ac_majflt; /* Major Page Fault */ | 115 | __u64 ac_majflt; /* Major Page Fault Count */ |
| 109 | /* Basic Accounting Fields end */ | 116 | /* Basic Accounting Fields end */ |
| 110 | 117 | ||
| 111 | /* Extended accounting fields start */ | 118 | /* Extended accounting fields start */ |
| 112 | __u64 acct_rss_mem1; /* accumulated rss usage */ | 119 | /* Accumulated RSS usage in duration of a task, in MBytes-usecs. |
| 113 | __u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 120 | * The current rss usage is added to this counter every time |
| 114 | __u64 hiwater_rss; /* High-watermark of RSS usage */ | 121 | * a tick is charged to a task's system time. So, at the end we |
| 115 | __u64 hiwater_vm; /* High-water virtual memory usage */ | 122 | * will have memory usage multiplied by system time. Thus an |
| 123 | * average usage per system time unit can be calculated. | ||
| 124 | */ | ||
| 125 | __u64 coremem; /* accumulated RSS usage in MB-usec */ | ||
| 126 | /* Accumulated virtual memory usage in duration of a task. | ||
| 127 | * Same as acct_rss_mem1 above except that we keep track of VM usage. | ||
| 128 | */ | ||
| 129 | __u64 virtmem; /* accumulated VM usage in MB-usec */ | ||
| 130 | |||
| 131 | /* High watermark of RSS and virtual memory usage in duration of | ||
| 132 | * a task, in KBytes. | ||
| 133 | */ | ||
| 134 | __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ | ||
| 135 | __u64 hiwater_vm; /* High-water VM usage, in KB */ | ||
| 136 | |||
| 137 | /* The following four fields are I/O statistics of a task. */ | ||
| 116 | __u64 read_char; /* bytes read */ | 138 | __u64 read_char; /* bytes read */ |
| 117 | __u64 write_char; /* bytes written */ | 139 | __u64 write_char; /* bytes written */ |
| 118 | __u64 read_syscalls; /* read syscalls */ | 140 | __u64 read_syscalls; /* read syscalls */ |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 47c71daa416f..db443221ba5b 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/tsacct_kern.h> | 21 | #include <linux/tsacct_kern.h> |
| 22 | #include <linux/acct.h> | 22 | #include <linux/acct.h> |
| 23 | #include <linux/jiffies.h> | ||
| 23 | 24 | ||
| 24 | 25 | ||
| 25 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) | 26 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) |
| @@ -62,33 +63,35 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
| 62 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; | 63 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; |
| 63 | stats->ac_minflt = tsk->min_flt; | 64 | stats->ac_minflt = tsk->min_flt; |
| 64 | stats->ac_majflt = tsk->maj_flt; | 65 | stats->ac_majflt = tsk->maj_flt; |
| 65 | /* Each process gets a minimum of one usec cpu time */ | ||
| 66 | if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) { | ||
| 67 | stats->ac_stime = 1; | ||
| 68 | } | ||
| 69 | 66 | ||
| 70 | strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); | 67 | strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); |
| 71 | } | 68 | } |
| 72 | 69 | ||
| 73 | 70 | ||
| 74 | #ifdef CONFIG_TASK_XACCT | 71 | #ifdef CONFIG_TASK_XACCT |
| 72 | |||
| 73 | #define KB 1024 | ||
| 74 | #define MB (1024*KB) | ||
| 75 | /* | 75 | /* |
| 76 | * fill in extended accounting fields | 76 | * fill in extended accounting fields |
| 77 | */ | 77 | */ |
| 78 | void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | 78 | void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) |
| 79 | { | 79 | { |
| 80 | stats->acct_rss_mem1 = p->acct_rss_mem1; | 80 | /* convert pages-jiffies to Mbyte-usec */ |
| 81 | stats->acct_vm_mem1 = p->acct_vm_mem1; | 81 | stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; |
| 82 | stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; | ||
| 82 | if (p->mm) { | 83 | if (p->mm) { |
| 83 | stats->hiwater_rss = p->mm->hiwater_rss; | 84 | /* adjust to KB unit */ |
| 84 | stats->hiwater_vm = p->mm->hiwater_vm; | 85 | stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; |
| 86 | stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; | ||
| 85 | } | 87 | } |
| 86 | stats->read_char = p->rchar; | 88 | stats->read_char = p->rchar; |
| 87 | stats->write_char = p->wchar; | 89 | stats->write_char = p->wchar; |
| 88 | stats->read_syscalls = p->syscr; | 90 | stats->read_syscalls = p->syscr; |
| 89 | stats->write_syscalls = p->syscw; | 91 | stats->write_syscalls = p->syscw; |
| 90 | } | 92 | } |
| 91 | 93 | #undef KB | |
| 94 | #undef MB | ||
| 92 | 95 | ||
| 93 | /** | 96 | /** |
| 94 | * acct_update_integrals - update mm integral fields in task_struct | 97 | * acct_update_integrals - update mm integral fields in task_struct |
| @@ -97,8 +100,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | |||
| 97 | void acct_update_integrals(struct task_struct *tsk) | 100 | void acct_update_integrals(struct task_struct *tsk) |
| 98 | { | 101 | { |
| 99 | if (likely(tsk->mm)) { | 102 | if (likely(tsk->mm)) { |
| 100 | long delta = | 103 | long delta = cputime_to_jiffies( |
| 101 | cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; | 104 | cputime_sub(tsk->stime, tsk->acct_stimexpd)); |
| 102 | 105 | ||
| 103 | if (delta == 0) | 106 | if (delta == 0) |
| 104 | return; | 107 | return; |
