aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2014-09-24 07:48:42 -0400
committerIngo Molnar <mingo@kernel.org>2014-11-16 05:42:04 -0500
commit2565711fb7d7c28e0cd93c8971b520d1b10b857c (patch)
treeace1ace82ecd5f2490b65a20df5b1d781d8a62c3
parent4b6c51773d86883a2e80cffadbe4f178ac1babd8 (diff)
perf: Improve the perf_sample_data struct layout
This patch reorders fields in the perf_sample_data struct in order to minimize the number of cachelines touched in perf_sample_data_init(). It also removes some intializations which are redundant with the code in kernel/events/core.c Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: http://lkml.kernel.org/r/1411559322-16548-7-git-send-email-eranian@google.com Cc: cebbert.lkml@gmail.com Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: jolsa@redhat.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/perf_event.h34
-rw-r--r--kernel/events/core.c16
2 files changed, 25 insertions, 25 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 68d46d536e24..486e84ccb1f9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -580,35 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
580 580
581 581
582struct perf_sample_data { 582struct perf_sample_data {
583 u64 type; 583 /*
584 * Fields set by perf_sample_data_init(), group so as to
585 * minimize the cachelines touched.
586 */
587 u64 addr;
588 struct perf_raw_record *raw;
589 struct perf_branch_stack *br_stack;
590 u64 period;
591 u64 weight;
592 u64 txn;
593 union perf_mem_data_src data_src;
584 594
595 /*
596 * The other fields, optionally {set,used} by
597 * perf_{prepare,output}_sample().
598 */
599 u64 type;
585 u64 ip; 600 u64 ip;
586 struct { 601 struct {
587 u32 pid; 602 u32 pid;
588 u32 tid; 603 u32 tid;
589 } tid_entry; 604 } tid_entry;
590 u64 time; 605 u64 time;
591 u64 addr;
592 u64 id; 606 u64 id;
593 u64 stream_id; 607 u64 stream_id;
594 struct { 608 struct {
595 u32 cpu; 609 u32 cpu;
596 u32 reserved; 610 u32 reserved;
597 } cpu_entry; 611 } cpu_entry;
598 u64 period;
599 union perf_mem_data_src data_src;
600 struct perf_callchain_entry *callchain; 612 struct perf_callchain_entry *callchain;
601 struct perf_raw_record *raw;
602 struct perf_branch_stack *br_stack;
603 struct perf_regs regs_user; 613 struct perf_regs regs_user;
604 struct perf_regs regs_intr; 614 struct perf_regs regs_intr;
605 u64 stack_user_size; 615 u64 stack_user_size;
606 u64 weight; 616} ____cacheline_aligned;
607 /*
608 * Transaction flags for abort events:
609 */
610 u64 txn;
611};
612 617
613/* default value for data source */ 618/* default value for data source */
614#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ 619#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
@@ -625,14 +630,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
625 data->raw = NULL; 630 data->raw = NULL;
626 data->br_stack = NULL; 631 data->br_stack = NULL;
627 data->period = period; 632 data->period = period;
628 data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
629 data->regs_user.regs = NULL;
630 data->stack_user_size = 0;
631 data->weight = 0; 633 data->weight = 0;
632 data->data_src.val = PERF_MEM_NA; 634 data->data_src.val = PERF_MEM_NA;
633 data->txn = 0; 635 data->txn = 0;
634 data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
635 data->regs_intr.regs = NULL;
636} 636}
637 637
638extern void perf_output_sample(struct perf_output_handle *handle, 638extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c2be1597ece7..3e19d3ebc29c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4471,8 +4471,11 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
4471 } 4471 }
4472 4472
4473 if (regs) { 4473 if (regs) {
4474 regs_user->regs = regs;
4475 regs_user->abi = perf_reg_abi(current); 4474 regs_user->abi = perf_reg_abi(current);
4475 regs_user->regs = regs;
4476 } else {
4477 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
4478 regs_user->regs = NULL;
4476 } 4479 }
4477} 4480}
4478 4481
@@ -4947,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header,
4947 header->size += size; 4950 header->size += size;
4948 } 4951 }
4949 4952
4953 if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
4954 perf_sample_regs_user(&data->regs_user, regs);
4955
4950 if (sample_type & PERF_SAMPLE_REGS_USER) { 4956 if (sample_type & PERF_SAMPLE_REGS_USER) {
4951 /* regs dump ABI info */ 4957 /* regs dump ABI info */
4952 int size = sizeof(u64); 4958 int size = sizeof(u64);
4953 4959
4954 perf_sample_regs_user(&data->regs_user, regs);
4955
4956 if (data->regs_user.regs) { 4960 if (data->regs_user.regs) {
4957 u64 mask = event->attr.sample_regs_user; 4961 u64 mask = event->attr.sample_regs_user;
4958 size += hweight64(mask) * sizeof(u64); 4962 size += hweight64(mask) * sizeof(u64);
@@ -4968,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header,
4968 * in case new sample type is added, because we could eat 4972 * in case new sample type is added, because we could eat
4969 * up the rest of the sample size. 4973 * up the rest of the sample size.
4970 */ 4974 */
4971 struct perf_regs *uregs = &data->regs_user;
4972 u16 stack_size = event->attr.sample_stack_user; 4975 u16 stack_size = event->attr.sample_stack_user;
4973 u16 size = sizeof(u64); 4976 u16 size = sizeof(u64);
4974 4977
4975 if (!uregs->abi)
4976 perf_sample_regs_user(uregs, regs);
4977
4978 stack_size = perf_sample_ustack_size(stack_size, header->size, 4978 stack_size = perf_sample_ustack_size(stack_size, header->size,
4979 uregs->regs); 4979 data->regs_user.regs);
4980 4980
4981 /* 4981 /*
4982 * If there is something to dump, add space for the dump 4982 * If there is something to dump, add space for the dump