aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-01-30 08:51:37 -0500
committerIngo Molnar <mingo@elte.hu>2012-02-22 06:06:55 -0500
commit8c79a045fd590a26e81e75f5d8d4ec5c7d23e565 (patch)
tree8123849709e37dc2394373ad40cf333b2f19ebb5
parent719741d9986572d64b47c35c09f5e7bb8d389400 (diff)
sched/events: Revert trace_sched_stat_sleeptime()
Commit 1ac9bc69 ("sched/tracing: Add a new tracepoint for sleeptime") added a new sched:sched_stat_sleeptime tracepoint. It's broken: the first sample we get on a task might be bad because of a stale sleep_start value that wasn't reset at the last task switch because the tracepoint was not active. It also breaks the existing schedstat samples due to the side effects of: - se->statistics.sleep_start = 0; ... - se->statistics.block_start = 0; Nor do I see means to fix it without adding overhead to the scheduler fast path, which I'm not willing to for the sake of redundant instrumentation. Most importantly, sleep time information can already be constructed by tracing context switches and wakeups, and taking the timestamp difference between the schedule-out, the wakeup and the schedule-in. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrew Vagin <avagin@openvz.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Link: http://lkml.kernel.org/n/tip-pc4c9qhl8q6vg3bs4j6k0rbd@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/trace/events/sched.h50
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/fair.c2
3 files changed, 2 insertions, 51 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 6ba596b07a72..e33ed1bfa113 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -370,56 +370,6 @@ TRACE_EVENT(sched_stat_runtime,
370 (unsigned long long)__entry->vruntime) 370 (unsigned long long)__entry->vruntime)
371); 371);
372 372
373#ifdef CREATE_TRACE_POINTS
374static inline u64 trace_get_sleeptime(struct task_struct *tsk)
375{
376#ifdef CONFIG_SCHEDSTATS
377 u64 block, sleep;
378
379 block = tsk->se.statistics.block_start;
380 sleep = tsk->se.statistics.sleep_start;
381 tsk->se.statistics.block_start = 0;
382 tsk->se.statistics.sleep_start = 0;
383
384 return block ? block : sleep ? sleep : 0;
385#else
386 return 0;
387#endif
388}
389#endif
390
391/*
392 * Tracepoint for accounting sleeptime (time the task is sleeping
393 * or waiting for I/O).
394 */
395TRACE_EVENT(sched_stat_sleeptime,
396
397 TP_PROTO(struct task_struct *tsk, u64 now),
398
399 TP_ARGS(tsk, now),
400
401 TP_STRUCT__entry(
402 __array( char, comm, TASK_COMM_LEN )
403 __field( pid_t, pid )
404 __field( u64, sleeptime )
405 ),
406
407 TP_fast_assign(
408 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
409 __entry->pid = tsk->pid;
410 __entry->sleeptime = trace_get_sleeptime(tsk);
411 __entry->sleeptime = __entry->sleeptime ?
412 now - __entry->sleeptime : 0;
413 )
414 TP_perf_assign(
415 __perf_count(__entry->sleeptime);
416 ),
417
418 TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
419 __entry->comm, __entry->pid,
420 (unsigned long long)__entry->sleeptime)
421);
422
423/* 373/*
424 * Tracepoint for showing priority inheritance modifying a tasks 374 * Tracepoint for showing priority inheritance modifying a tasks
425 * priority. 375 * priority.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d2e053..b342f57879e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1932 local_irq_enable(); 1932 local_irq_enable();
1933#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 1933#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
1934 finish_lock_switch(rq, prev); 1934 finish_lock_switch(rq, prev);
1935 trace_sched_stat_sleeptime(current, rq->clock);
1936 1935
1937 fire_sched_in_preempt_notifiers(current); 1936 fire_sched_in_preempt_notifiers(current);
1938 if (mm) 1937 if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7c6414fc669d..aca16b843b7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
1003 if (unlikely(delta > se->statistics.sleep_max)) 1003 if (unlikely(delta > se->statistics.sleep_max))
1004 se->statistics.sleep_max = delta; 1004 se->statistics.sleep_max = delta;
1005 1005
1006 se->statistics.sleep_start = 0;
1006 se->statistics.sum_sleep_runtime += delta; 1007 se->statistics.sum_sleep_runtime += delta;
1007 1008
1008 if (tsk) { 1009 if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
1019 if (unlikely(delta > se->statistics.block_max)) 1020 if (unlikely(delta > se->statistics.block_max))
1020 se->statistics.block_max = delta; 1021 se->statistics.block_max = delta;
1021 1022
1023 se->statistics.block_start = 0;
1022 se->statistics.sum_sleep_runtime += delta; 1024 se->statistics.sum_sleep_runtime += delta;
1023 1025
1024 if (tsk) { 1026 if (tsk) {