aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorKaiGai Kohei <kaigai@ak.jp.nec.com>2006-03-31 05:30:45 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 15:18:54 -0500
commitbb231fe3a53b2d34c1aef119613816fcb18864b1 (patch)
tree55e686f0118aebedec1a6ddf52746a8a722b51cc /kernel
parentf79e2abb9bd452d97295f34376dedbec9686b986 (diff)
[PATCH] Fix pacct bug in multithreading case.
I noticed a bug on the process accounting facility. In multi-threading process, some data would be recorded incorrectly when the group_leader dies earlier than one or more threads. The attached patch fixes this problem. See below. 'bugacct' is a test program that create a worker thread after 4 seconds sleeping, then the group_leader dies soon. The worker thread consume CPU/Memory for 6 seconds, then exit. We can estimate 10 seconds as etime and 6 seconds as stime + utime. This is a sample program which the group_leader dies earlier than other threads. The results of same binary execution on different kernel are below. -- accounted records -------------------- | btime | utime | stime | etime | minflt | majflt | comm | original | 13:16:40 | 0.00 | 0.00 | 6.10 | 171 | 0 | bugacct | patched | 13:20:21 | 5.83 | 0.18 | 10.03 | 32776 | 0 | bugacct | (*) bugacct allocates 128MB memory, thus 128MB / 4KB = 32768 of minflt is appropriate. -- Test results in original kernel ------ $ date; time -p ./bugacct Tue Mar 28 13:16:36 JST 2006 <- But pacct said btime is 13:16:40 real 10.11 <- But pacct said etime is 6.10 user 5.96 <- But pacct said utime is 0.00 sys 0.14 <- But pacct said stime is 0.00 $ -- Test results in patched kernel ------- $ date; time -p ./bugacct Tue Mar 28 13:20:21 JST 2006 real 10.04 user 5.83 sys 0.19 $ In the original 2.6.16 kernel, pacct records btime, utime, stime, etime and minflt incorrectly. In my opinion, this problem is caused by an assumption that group_leader dies last. The following section calculates process running time for etime and btime. But it means running time of the thread that dies last, not process. The start_time of the first thread in the process (group_leader) should be reduced from uptime to calculate etime and btime correctly. ---- do_acct_process() in kernel/acct.c: /* calculate run_time in nsec*/ do_posix_clock_monotonic_gettime(&uptime); run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC + current->start_time.tv_nsec; ---- The following section calculates stime and utime of the process. But it might count the utime and stime of the group_leader duplicatly and ignore the utime and stime of the thread dies last, when one or more threads remain after group_leader dead. The ac_utime should be calculated as the sum of the signal->utime and utime of the thread dies last. The ac_stime should be done also. ---- do_acct_process() in kernel/acct.c: jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime, current->signal->utime)); ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime, current->signal->stime)); ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); ---- The part of the minflt/majflt calculation has same problem. This patch solves those problems, I think. Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 065d8b4e51ef..b327f4d20104 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -449,8 +449,8 @@ static void do_acct_process(long exitcode, struct file *file)
449 /* calculate run_time in nsec*/ 449 /* calculate run_time in nsec*/
450 do_posix_clock_monotonic_gettime(&uptime); 450 do_posix_clock_monotonic_gettime(&uptime);
451 run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; 451 run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
452 run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC 452 run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC
453 + current->start_time.tv_nsec; 453 + current->group_leader->start_time.tv_nsec;
454 /* convert nsec -> AHZ */ 454 /* convert nsec -> AHZ */
455 elapsed = nsec_to_AHZ(run_time); 455 elapsed = nsec_to_AHZ(run_time);
456#if ACCT_VERSION==3 456#if ACCT_VERSION==3
@@ -469,10 +469,10 @@ static void do_acct_process(long exitcode, struct file *file)
469#endif 469#endif
470 do_div(elapsed, AHZ); 470 do_div(elapsed, AHZ);
471 ac.ac_btime = xtime.tv_sec - elapsed; 471 ac.ac_btime = xtime.tv_sec - elapsed;
472 jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime, 472 jiffies = cputime_to_jiffies(cputime_add(current->utime,
473 current->signal->utime)); 473 current->signal->utime));
474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); 474 ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
475 jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime, 475 jiffies = cputime_to_jiffies(cputime_add(current->stime,
476 current->signal->stime)); 476 current->signal->stime));
477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); 477 ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
478 /* we really need to bite the bullet and change layout */ 478 /* we really need to bite the bullet and change layout */
@@ -522,9 +522,9 @@ static void do_acct_process(long exitcode, struct file *file)
522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ 522 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024); 523 ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
524 ac.ac_minflt = encode_comp_t(current->signal->min_flt + 524 ac.ac_minflt = encode_comp_t(current->signal->min_flt +
525 current->group_leader->min_flt); 525 current->min_flt);
526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt + 526 ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
527 current->group_leader->maj_flt); 527 current->maj_flt);
528 ac.ac_swaps = encode_comp_t(0); 528 ac.ac_swaps = encode_comp_t(0);
529 ac.ac_exitcode = exitcode; 529 ac.ac_exitcode = exitcode;
530 530