aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Righi <righi.andrea@gmail.com>2008-07-25 04:48:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 13:53:47 -0400
commit297c5d92634c809cef23d73e7b2556f2528ff7e2 (patch)
treed006fa29daa24242c64cff3b66dd75fbb0003b0f
parent0c18d7a5df82524e634637c3aec24d4cba096442 (diff)
task IO accounting: provide distinct tgid/tid I/O statistics
Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate parent I/O statistics in /proc/pid/io. This approach follows the same model used to account per-process and per-thread CPU times. As a practial application, this allows for example to quickly find the top I/O consumer when a process spawns many child threads that perform the actual I/O work, because the aggregated I/O statistics can always be found in /proc/pid/io. [ Oleg Nesterov points out that we should check that the task is still alive before we iterate over the threads, but also says that we can do that fixup on top of this later. - Linus ] Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrea Righi <righi.andrea@gmail.com> Cc: Matt Heaton <matt@hostmonster.com> Cc: Shailabh Nagar <nagar@watson.ibm.com> Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/base.c86
-rw-r--r--include/linux/sched.h4
-rw-r--r--kernel/exit.c27
-rw-r--r--kernel/fork.c6
4 files changed, 108 insertions, 15 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a891fe4cb43b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2376}
2377 2377
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2378#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2379static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2380{
2381 u64 rchar, wchar, syscr, syscw;
2382 struct task_io_accounting ioac;
2383
2384 if (!whole) {
2385 rchar = task->rchar;
2386 wchar = task->wchar;
2387 syscr = task->syscr;
2388 syscw = task->syscw;
2389 memcpy(&ioac, &task->ioac, sizeof(ioac));
2390 } else {
2391 unsigned long flags;
2392 struct task_struct *t = task;
2393 rchar = wchar = syscr = syscw = 0;
2394 memset(&ioac, 0, sizeof(ioac));
2395
2396 rcu_read_lock();
2397 do {
2398 rchar += t->rchar;
2399 wchar += t->wchar;
2400 syscr += t->syscr;
2401 syscw += t->syscw;
2402
2403 ioac.read_bytes += t->ioac.read_bytes;
2404 ioac.write_bytes += t->ioac.write_bytes;
2405 ioac.cancelled_write_bytes +=
2406 t->ioac.cancelled_write_bytes;
2407 t = next_thread(t);
2408 } while (t != task);
2409 rcu_read_unlock();
2410
2411 if (lock_task_sighand(task, &flags)) {
2412 struct signal_struct *sig = task->signal;
2413
2414 rchar += sig->rchar;
2415 wchar += sig->wchar;
2416 syscr += sig->syscr;
2417 syscw += sig->syscw;
2418
2419 ioac.read_bytes += sig->ioac.read_bytes;
2420 ioac.write_bytes += sig->ioac.write_bytes;
2421 ioac.cancelled_write_bytes +=
2422 sig->ioac.cancelled_write_bytes;
2423
2424 unlock_task_sighand(task, &flags);
2425 }
2426 }
2427
2381 return sprintf(buffer, 2428 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2429 "rchar: %llu\n"
2384 "wchar: %llu\n" 2430 "wchar: %llu\n"
2385 "syscr: %llu\n" 2431 "syscr: %llu\n"
2386 "syscw: %llu\n" 2432 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2433 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2434 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2435 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2436 (unsigned long long)rchar,
2392 (unsigned long long)task->rchar, 2437 (unsigned long long)wchar,
2393 (unsigned long long)task->wchar, 2438 (unsigned long long)syscr,
2394 (unsigned long long)task->syscr, 2439 (unsigned long long)syscw,
2395 (unsigned long long)task->syscw, 2440 (unsigned long long)ioac.read_bytes,
2396#endif 2441 (unsigned long long)ioac.write_bytes,
2397 (unsigned long long)task->ioac.read_bytes, 2442 (unsigned long long)ioac.cancelled_write_bytes);
2398 (unsigned long long)task->ioac.write_bytes, 2443}
2399 (unsigned long long)task->ioac.cancelled_write_bytes); 2444
2445static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2446{
2447 return do_io_accounting(task, buffer, 0);
2400} 2448}
2401#endif 2449
2450static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2451{
2452 return do_io_accounting(task, buffer, 1);
2453}
2454#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2455
2403/* 2456/*
2404 * Thread groups 2457 * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2523 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2524#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2525#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2526 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2527#endif
2475}; 2528};
2476 2529
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2850#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2851 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2852#endif
2853#ifdef CONFIG_TASK_IO_ACCOUNTING
2854 INF("io", S_IRUGO, tid_io_accounting),
2855#endif
2800}; 2856};
2801 2857
2802static int proc_tid_base_readdir(struct file * filp, 2858static int proc_tid_base_readdir(struct file * filp,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af780f299c7c..d22ffe06d0eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@ struct signal_struct {
506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; 506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
507 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; 507 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
508 unsigned long inblock, oublock, cinblock, coublock; 508 unsigned long inblock, oublock, cinblock, coublock;
509#ifdef CONFIG_TASK_XACCT
510 u64 rchar, wchar, syscr, syscw;
511#endif
512 struct task_io_accounting ioac;
509 513
510 /* 514 /*
511 * Cumulative ns of scheduled CPU time for dead threads in the 515 * Cumulative ns of scheduled CPU time for dead threads in the
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a4d4d12e294..ad933bb29ec7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -120,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
120 sig->nivcsw += tsk->nivcsw; 120 sig->nivcsw += tsk->nivcsw;
121 sig->inblock += task_io_get_inblock(tsk); 121 sig->inblock += task_io_get_inblock(tsk);
122 sig->oublock += task_io_get_oublock(tsk); 122 sig->oublock += task_io_get_oublock(tsk);
123#ifdef CONFIG_TASK_XACCT
124 sig->rchar += tsk->rchar;
125 sig->wchar += tsk->wchar;
126 sig->syscr += tsk->syscr;
127 sig->syscw += tsk->syscw;
128#endif /* CONFIG_TASK_XACCT */
129#ifdef CONFIG_TASK_IO_ACCOUNTING
130 sig->ioac.read_bytes += tsk->ioac.read_bytes;
131 sig->ioac.write_bytes += tsk->ioac.write_bytes;
132 sig->ioac.cancelled_write_bytes +=
133 tsk->ioac.cancelled_write_bytes;
134#endif /* CONFIG_TASK_IO_ACCOUNTING */
123 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 135 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
124 sig = NULL; /* Marker for below. */ 136 sig = NULL; /* Marker for below. */
125 } 137 }
@@ -1366,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
1366 psig->coublock += 1378 psig->coublock +=
1367 task_io_get_oublock(p) + 1379 task_io_get_oublock(p) +
1368 sig->oublock + sig->coublock; 1380 sig->oublock + sig->coublock;
1381#ifdef CONFIG_TASK_XACCT
1382 psig->rchar += p->rchar + sig->rchar;
1383 psig->wchar += p->wchar + sig->wchar;
1384 psig->syscr += p->syscr + sig->syscr;
1385 psig->syscw += p->syscw + sig->syscw;
1386#endif /* CONFIG_TASK_XACCT */
1387#ifdef CONFIG_TASK_IO_ACCOUNTING
1388 psig->ioac.read_bytes +=
1389 p->ioac.read_bytes + sig->ioac.read_bytes;
1390 psig->ioac.write_bytes +=
1391 p->ioac.write_bytes + sig->ioac.write_bytes;
1392 psig->ioac.cancelled_write_bytes +=
1393 p->ioac.cancelled_write_bytes +
1394 sig->ioac.cancelled_write_bytes;
1395#endif /* CONFIG_TASK_IO_ACCOUNTING */
1369 spin_unlock_irq(&p->parent->sighand->siglock); 1396 spin_unlock_irq(&p->parent->sighand->siglock);
1370 } 1397 }
1371 1398
diff --git a/kernel/fork.c b/kernel/fork.c
index 813d5c89b9d5..b99d73e971a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -812,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
812 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 812 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
813 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 813 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
814 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 814 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
815#ifdef CONFIG_TASK_XACCT
816 sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
817#endif
818#ifdef CONFIG_TASK_IO_ACCOUNTING
819 memset(&sig->ioac, 0, sizeof(sig->ioac));
820#endif
815 sig->sum_sched_runtime = 0; 821 sig->sum_sched_runtime = 0;
816 INIT_LIST_HEAD(&sig->cpu_timers[0]); 822 INIT_LIST_HEAD(&sig->cpu_timers[0]);
817 INIT_LIST_HEAD(&sig->cpu_timers[1]); 823 INIT_LIST_HEAD(&sig->cpu_timers[1]);