aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/perf_counter.h
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-05-22 00:27:22 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-22 06:18:20 -0400
commit564c2b210add41df9a3a5aaa365c1d97cff6110d (patch)
tree2fe54df816abfe8c6010d277c79fbe18aa5c4082 /include/linux/perf_counter.h
parenta63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 (diff)
perf_counter: Optimize context switch between identical inherited contexts
When monitoring a process and its descendants with a set of inherited counters, we can often get the situation in a context switch where both the old (outgoing) and new (incoming) process have the same set of counters, and their values are ultimately going to be added together. In that situation it doesn't matter which set of counters are used to count the activity for the new process, so there is really no need to go through the process of reading the hardware counters and updating the old task's counters and then setting up the PMU for the new task. This optimizes the context switch in this situation. Instead of scheduling out the perf_counter_context for the old task and scheduling in the new context, we simply transfer the old context to the new task and keep using it without interruption. The new context gets transferred to the old task. This means that both tasks still have a valid perf_counter_context, so no special case is introduced when the old task gets scheduled in again, either on this CPU or another CPU. The equivalence of contexts is detected by keeping a pointer in each cloned context pointing to the context it was cloned from. To cope with the situation where a context is changed by adding or removing counters after it has been cloned, we also keep a generation number on each context which is incremented every time a context is changed. When a context is cloned we take a copy of the parent's generation number, and two cloned contexts are equivalent only if they have the same parent and the same generation number. In order that the parent context pointer remains valid (and is not reused), we increment the parent context's reference count for each context cloned from it. Since we don't have individual fds for the counters in a cloned context, the only thing that can make two clones of a given parent different after they have been cloned is enabling or disabling all counters with prctl. To account for this, we keep a count of the number of enabled counters in each context. Two contexts must have the same number of enabled counters to be considered equivalent. Here are some measurements of the context switch time as measured with the lat_ctx benchmark from lmbench, comparing the times obtained with and without this patch series: -----Unmodified----- With this patch series Counters: none 2 HW 4H+4S none 2 HW 4H+4S 2 processes: Average 3.44 6.45 11.24 3.12 3.39 3.60 St dev 0.04 0.04 0.13 0.05 0.17 0.19 8 processes: Average 6.45 8.79 14.00 5.57 6.23 7.57 St dev 1.27 1.04 0.88 1.42 1.46 1.42 32 processes: Average 5.56 8.43 13.78 5.28 5.55 7.15 St dev 0.41 0.47 0.53 0.54 0.57 0.81 The numbers are the mean and standard deviation of 20 runs of lat_ctx. The "none" columns are lat_ctx run directly without any counters. The "2 HW" columns are with lat_ctx run under perfstat, counting cycles and instructions. The "4H+4S" columns are lat_ctx run under perfstat with 4 hardware counters and 4 software counters (cycles, instructions, cache references, cache misses, task clock, context switch, cpu migrations, and page faults). [ Impact: performance optimization of counter context-switches ] Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/linux/perf_counter.h')
-rw-r--r--include/linux/perf_counter.h12
1 files changed, 11 insertions, 1 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 071309005468..4cae01a50450 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -513,6 +513,7 @@ struct perf_counter_context {
513 struct list_head event_list; 513 struct list_head event_list;
514 int nr_counters; 514 int nr_counters;
515 int nr_active; 515 int nr_active;
516 int nr_enabled;
516 int is_active; 517 int is_active;
517 atomic_t refcount; 518 atomic_t refcount;
518 struct task_struct *task; 519 struct task_struct *task;
@@ -522,6 +523,14 @@ struct perf_counter_context {
522 */ 523 */
523 u64 time; 524 u64 time;
524 u64 timestamp; 525 u64 timestamp;
526
527 /*
528 * These fields let us detect when two contexts have both
529 * been cloned (inherited) from a common ancestor.
530 */
531 struct perf_counter_context *parent_ctx;
532 u32 parent_gen;
533 u32 generation;
525}; 534};
526 535
527/** 536/**
@@ -552,7 +561,8 @@ extern int perf_max_counters;
552extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); 561extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
553 562
554extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); 563extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
555extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); 564extern void perf_counter_task_sched_out(struct task_struct *task,
565 struct task_struct *next, int cpu);
556extern void perf_counter_task_tick(struct task_struct *task, int cpu); 566extern void perf_counter_task_tick(struct task_struct *task, int cpu);
557extern void perf_counter_init_task(struct task_struct *child); 567extern void perf_counter_init_task(struct task_struct *child);
558extern void perf_counter_exit_task(struct task_struct *child); 568extern void perf_counter_exit_task(struct task_struct *child);