diff options
author | KaiGai Kohei <kaigai@ak.jp.nec.com> | 2006-03-31 05:30:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-31 15:18:54 -0500 |
commit | bb231fe3a53b2d34c1aef119613816fcb18864b1 (patch) | |
tree | 55e686f0118aebedec1a6ddf52746a8a722b51cc /kernel | |
parent | f79e2abb9bd452d97295f34376dedbec9686b986 (diff) |
[PATCH] Fix pacct bug in multithreading case.
I noticed a bug on the process accounting facility. In multi-threading
process, some data would be recorded incorrectly when the group_leader dies
earlier than one or more threads. The attached patch fixes this problem.
See below. 'bugacct' is a test program that create a worker thread after 4
seconds sleeping, then the group_leader dies soon. The worker thread
consume CPU/Memory for 6 seconds, then exit. We can estimate 10 seconds as
etime and 6 seconds as stime + utime. This is a sample program which the
group_leader dies earlier than other threads.
The results of same binary execution on different kernel are below.
-- accounted records --------------------
| btime | utime | stime | etime | minflt | majflt | comm |
original | 13:16:40 | 0.00 | 0.00 | 6.10 | 171 | 0 | bugacct |
patched | 13:20:21 | 5.83 | 0.18 | 10.03 | 32776 | 0 | bugacct |
(*) bugacct allocates 128MB memory, thus 128MB / 4KB = 32768 of minflt is
appropriate.
-- Test results in original kernel ------
$ date; time -p ./bugacct
Tue Mar 28 13:16:36 JST 2006 <- But pacct said btime is 13:16:40
real 10.11 <- But pacct said etime is 6.10
user 5.96 <- But pacct said utime is 0.00
sys 0.14 <- But pacct said stime is 0.00
$
-- Test results in patched kernel -------
$ date; time -p ./bugacct
Tue Mar 28 13:20:21 JST 2006
real 10.04
user 5.83
sys 0.19
$
In the original 2.6.16 kernel, pacct records btime, utime, stime, etime and
minflt incorrectly. In my opinion, this problem is caused by an assumption
that group_leader dies last.
The following section calculates process running time for etime and btime.
But it means running time of the thread that dies last, not process. The
start_time of the first thread in the process (group_leader) should be
reduced from uptime to calculate etime and btime correctly.
---- do_acct_process() in kernel/acct.c:
/* calculate run_time in nsec*/
do_posix_clock_monotonic_gettime(&uptime);
run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC
+ current->start_time.tv_nsec;
----
The following section calculates stime and utime of the process.
But it might count the utime and stime of the group_leader duplicatly
and ignore the utime and stime of the thread dies last, when one or
more threads remain after group_leader dead.
The ac_utime should be calculated as the sum of the signal->utime
and utime of the thread dies last. The ac_stime should be done also.
---- do_acct_process() in kernel/acct.c:
jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime,
current->signal->utime));
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime,
current->signal->stime));
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
----
The part of the minflt/majflt calculation has same problem.
This patch solves those problems, I think.
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 065d8b4e51ef..b327f4d20104 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -449,8 +449,8 @@ static void do_acct_process(long exitcode, struct file *file) | |||
449 | /* calculate run_time in nsec*/ | 449 | /* calculate run_time in nsec*/ |
450 | do_posix_clock_monotonic_gettime(&uptime); | 450 | do_posix_clock_monotonic_gettime(&uptime); |
451 | run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; | 451 | run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; |
452 | run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC | 452 | run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC |
453 | + current->start_time.tv_nsec; | 453 | + current->group_leader->start_time.tv_nsec; |
454 | /* convert nsec -> AHZ */ | 454 | /* convert nsec -> AHZ */ |
455 | elapsed = nsec_to_AHZ(run_time); | 455 | elapsed = nsec_to_AHZ(run_time); |
456 | #if ACCT_VERSION==3 | 456 | #if ACCT_VERSION==3 |
@@ -469,10 +469,10 @@ static void do_acct_process(long exitcode, struct file *file) | |||
469 | #endif | 469 | #endif |
470 | do_div(elapsed, AHZ); | 470 | do_div(elapsed, AHZ); |
471 | ac.ac_btime = xtime.tv_sec - elapsed; | 471 | ac.ac_btime = xtime.tv_sec - elapsed; |
472 | jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime, | 472 | jiffies = cputime_to_jiffies(cputime_add(current->utime, |
473 | current->signal->utime)); | 473 | current->signal->utime)); |
474 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); | 474 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); |
475 | jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime, | 475 | jiffies = cputime_to_jiffies(cputime_add(current->stime, |
476 | current->signal->stime)); | 476 | current->signal->stime)); |
477 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); | 477 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); |
478 | /* we really need to bite the bullet and change layout */ | 478 | /* we really need to bite the bullet and change layout */ |
@@ -522,9 +522,9 @@ static void do_acct_process(long exitcode, struct file *file) | |||
522 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | 522 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
523 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | 523 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
524 | ac.ac_minflt = encode_comp_t(current->signal->min_flt + | 524 | ac.ac_minflt = encode_comp_t(current->signal->min_flt + |
525 | current->group_leader->min_flt); | 525 | current->min_flt); |
526 | ac.ac_majflt = encode_comp_t(current->signal->maj_flt + | 526 | ac.ac_majflt = encode_comp_t(current->signal->maj_flt + |
527 | current->group_leader->maj_flt); | 527 | current->maj_flt); |
528 | ac.ac_swaps = encode_comp_t(0); | 528 | ac.ac_swaps = encode_comp_t(0); |
529 | ac.ac_exitcode = exitcode; | 529 | ac.ac_exitcode = exitcode; |
530 | 530 | ||