diff options
author | Peter Zijlstra <peterz@infradead.org> | 2014-09-24 07:48:42 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-11-16 05:42:04 -0500 |
commit | 2565711fb7d7c28e0cd93c8971b520d1b10b857c (patch) | |
tree | ace1ace82ecd5f2490b65a20df5b1d781d8a62c3 | |
parent | 4b6c51773d86883a2e80cffadbe4f178ac1babd8 (diff) |
perf: Improve the perf_sample_data struct layout
This patch reorders fields in the perf_sample_data struct in order to
minimize the number of cachelines touched in perf_sample_data_init().
It also removes some intializations which are redundant with the code
in kernel/events/core.c
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1411559322-16548-7-git-send-email-eranian@google.com
Cc: cebbert.lkml@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: jolsa@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/perf_event.h | 34 | ||||
-rw-r--r-- | kernel/events/core.c | 16 |
2 files changed, 25 insertions, 25 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 68d46d536e24..486e84ccb1f9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -580,35 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event, | |||
580 | 580 | ||
581 | 581 | ||
582 | struct perf_sample_data { | 582 | struct perf_sample_data { |
583 | u64 type; | 583 | /* |
584 | * Fields set by perf_sample_data_init(), group so as to | ||
585 | * minimize the cachelines touched. | ||
586 | */ | ||
587 | u64 addr; | ||
588 | struct perf_raw_record *raw; | ||
589 | struct perf_branch_stack *br_stack; | ||
590 | u64 period; | ||
591 | u64 weight; | ||
592 | u64 txn; | ||
593 | union perf_mem_data_src data_src; | ||
584 | 594 | ||
595 | /* | ||
596 | * The other fields, optionally {set,used} by | ||
597 | * perf_{prepare,output}_sample(). | ||
598 | */ | ||
599 | u64 type; | ||
585 | u64 ip; | 600 | u64 ip; |
586 | struct { | 601 | struct { |
587 | u32 pid; | 602 | u32 pid; |
588 | u32 tid; | 603 | u32 tid; |
589 | } tid_entry; | 604 | } tid_entry; |
590 | u64 time; | 605 | u64 time; |
591 | u64 addr; | ||
592 | u64 id; | 606 | u64 id; |
593 | u64 stream_id; | 607 | u64 stream_id; |
594 | struct { | 608 | struct { |
595 | u32 cpu; | 609 | u32 cpu; |
596 | u32 reserved; | 610 | u32 reserved; |
597 | } cpu_entry; | 611 | } cpu_entry; |
598 | u64 period; | ||
599 | union perf_mem_data_src data_src; | ||
600 | struct perf_callchain_entry *callchain; | 612 | struct perf_callchain_entry *callchain; |
601 | struct perf_raw_record *raw; | ||
602 | struct perf_branch_stack *br_stack; | ||
603 | struct perf_regs regs_user; | 613 | struct perf_regs regs_user; |
604 | struct perf_regs regs_intr; | 614 | struct perf_regs regs_intr; |
605 | u64 stack_user_size; | 615 | u64 stack_user_size; |
606 | u64 weight; | 616 | } ____cacheline_aligned; |
607 | /* | ||
608 | * Transaction flags for abort events: | ||
609 | */ | ||
610 | u64 txn; | ||
611 | }; | ||
612 | 617 | ||
613 | /* default value for data source */ | 618 | /* default value for data source */ |
614 | #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ | 619 | #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ |
@@ -625,14 +630,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, | |||
625 | data->raw = NULL; | 630 | data->raw = NULL; |
626 | data->br_stack = NULL; | 631 | data->br_stack = NULL; |
627 | data->period = period; | 632 | data->period = period; |
628 | data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; | ||
629 | data->regs_user.regs = NULL; | ||
630 | data->stack_user_size = 0; | ||
631 | data->weight = 0; | 633 | data->weight = 0; |
632 | data->data_src.val = PERF_MEM_NA; | 634 | data->data_src.val = PERF_MEM_NA; |
633 | data->txn = 0; | 635 | data->txn = 0; |
634 | data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE; | ||
635 | data->regs_intr.regs = NULL; | ||
636 | } | 636 | } |
637 | 637 | ||
638 | extern void perf_output_sample(struct perf_output_handle *handle, | 638 | extern void perf_output_sample(struct perf_output_handle *handle, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index c2be1597ece7..3e19d3ebc29c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -4471,8 +4471,11 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, | |||
4471 | } | 4471 | } |
4472 | 4472 | ||
4473 | if (regs) { | 4473 | if (regs) { |
4474 | regs_user->regs = regs; | ||
4475 | regs_user->abi = perf_reg_abi(current); | 4474 | regs_user->abi = perf_reg_abi(current); |
4475 | regs_user->regs = regs; | ||
4476 | } else { | ||
4477 | regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; | ||
4478 | regs_user->regs = NULL; | ||
4476 | } | 4479 | } |
4477 | } | 4480 | } |
4478 | 4481 | ||
@@ -4947,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4947 | header->size += size; | 4950 | header->size += size; |
4948 | } | 4951 | } |
4949 | 4952 | ||
4953 | if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) | ||
4954 | perf_sample_regs_user(&data->regs_user, regs); | ||
4955 | |||
4950 | if (sample_type & PERF_SAMPLE_REGS_USER) { | 4956 | if (sample_type & PERF_SAMPLE_REGS_USER) { |
4951 | /* regs dump ABI info */ | 4957 | /* regs dump ABI info */ |
4952 | int size = sizeof(u64); | 4958 | int size = sizeof(u64); |
4953 | 4959 | ||
4954 | perf_sample_regs_user(&data->regs_user, regs); | ||
4955 | |||
4956 | if (data->regs_user.regs) { | 4960 | if (data->regs_user.regs) { |
4957 | u64 mask = event->attr.sample_regs_user; | 4961 | u64 mask = event->attr.sample_regs_user; |
4958 | size += hweight64(mask) * sizeof(u64); | 4962 | size += hweight64(mask) * sizeof(u64); |
@@ -4968,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4968 | * in case new sample type is added, because we could eat | 4972 | * in case new sample type is added, because we could eat |
4969 | * up the rest of the sample size. | 4973 | * up the rest of the sample size. |
4970 | */ | 4974 | */ |
4971 | struct perf_regs *uregs = &data->regs_user; | ||
4972 | u16 stack_size = event->attr.sample_stack_user; | 4975 | u16 stack_size = event->attr.sample_stack_user; |
4973 | u16 size = sizeof(u64); | 4976 | u16 size = sizeof(u64); |
4974 | 4977 | ||
4975 | if (!uregs->abi) | ||
4976 | perf_sample_regs_user(uregs, regs); | ||
4977 | |||
4978 | stack_size = perf_sample_ustack_size(stack_size, header->size, | 4978 | stack_size = perf_sample_ustack_size(stack_size, header->size, |
4979 | uregs->regs); | 4979 | data->regs_user.regs); |
4980 | 4980 | ||
4981 | /* | 4981 | /* |
4982 | * If there is something to dump, add space for the dump | 4982 | * If there is something to dump, add space for the dump |