aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Hunter <ahh@google.com>2013-05-23 14:07:03 -0400
committerIngo Molnar <mingo@kernel.org>2013-06-19 06:50:44 -0400
commit43b4578071c0e6d87761e113e05d45776cc75437 (patch)
treeece20f517fe07b2c58e56f2909e6516813511b12
parent03d8e80beb7db78a13c192431205b9c83f7e0cd1 (diff)
perf/x86: Reduce stack usage of x86_schedule_events()
x86_schedule_events() caches event constraints on the stack during scheduling. Given the number of possible events, this is 512 bytes of stack; since it can be invoked under schedule() under god-knows-what, this is causing stack blowouts. Trade some space usage for stack safety: add a place to cache the constraint pointer to struct perf_event. For 8 bytes per event (1% of its size) we can save the giant stack frame. This shouldn't change any aspect of scheduling whatsoever and while in theory the locality's a tiny bit worse, I doubt we'll see any performance impact either. Tested: `perf stat whatever` does not blow up and produces results that aren't hugely obviously wrong. I'm not sure how to run particularly good tests of perf code, but this should not produce any functional change whatsoever. Signed-off-by: Andrew Hunter <ahh@google.com> Reviewed-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1369332423-4400-1-git-send-email-ahh@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--include/linux/perf_event.h4
4 files changed, 26 insertions, 18 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d20..e52a9e577783 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -568,7 +568,7 @@ struct sched_state {
568struct perf_sched { 568struct perf_sched {
569 int max_weight; 569 int max_weight;
570 int max_events; 570 int max_events;
571 struct event_constraint **constraints; 571 struct perf_event **events;
572 struct sched_state state; 572 struct sched_state state;
573 int saved_states; 573 int saved_states;
574 struct sched_state saved[SCHED_STATES_MAX]; 574 struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +577,7 @@ struct perf_sched {
577/* 577/*
578 * Initialize interator that runs through all events and counters. 578 * Initialize interator that runs through all events and counters.
579 */ 579 */
580static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, 580static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
581 int num, int wmin, int wmax) 581 int num, int wmin, int wmax)
582{ 582{
583 int idx; 583 int idx;
@@ -585,10 +585,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
585 memset(sched, 0, sizeof(*sched)); 585 memset(sched, 0, sizeof(*sched));
586 sched->max_events = num; 586 sched->max_events = num;
587 sched->max_weight = wmax; 587 sched->max_weight = wmax;
588 sched->constraints = c; 588 sched->events = events;
589 589
590 for (idx = 0; idx < num; idx++) { 590 for (idx = 0; idx < num; idx++) {
591 if (c[idx]->weight == wmin) 591 if (events[idx]->hw.constraint->weight == wmin)
592 break; 592 break;
593 } 593 }
594 594
@@ -635,8 +635,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
635 if (sched->state.event >= sched->max_events) 635 if (sched->state.event >= sched->max_events)
636 return false; 636 return false;
637 637
638 c = sched->constraints[sched->state.event]; 638 c = sched->events[sched->state.event]->hw.constraint;
639
640 /* Prefer fixed purpose counters */ 639 /* Prefer fixed purpose counters */
641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { 640 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
642 idx = INTEL_PMC_IDX_FIXED; 641 idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +693,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
694 if (sched->state.weight > sched->max_weight) 693 if (sched->state.weight > sched->max_weight)
695 return false; 694 return false;
696 } 695 }
697 c = sched->constraints[sched->state.event]; 696 c = sched->events[sched->state.event]->hw.constraint;
698 } while (c->weight != sched->state.weight); 697 } while (c->weight != sched->state.weight);
699 698
700 sched->state.counter = 0; /* start with first counter */ 699 sched->state.counter = 0; /* start with first counter */
@@ -705,12 +704,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
705/* 704/*
706 * Assign a counter for each event. 705 * Assign a counter for each event.
707 */ 706 */
708int perf_assign_events(struct event_constraint **constraints, int n, 707int perf_assign_events(struct perf_event **events, int n,
709 int wmin, int wmax, int *assign) 708 int wmin, int wmax, int *assign)
710{ 709{
711 struct perf_sched sched; 710 struct perf_sched sched;
712 711
713 perf_sched_init(&sched, constraints, n, wmin, wmax); 712 perf_sched_init(&sched, events, n, wmin, wmax);
714 713
715 do { 714 do {
716 if (!perf_sched_find_counter(&sched)) 715 if (!perf_sched_find_counter(&sched))
@@ -724,7 +723,7 @@ int perf_assign_events(struct event_constraint **constraints, int n,
724 723
725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 724int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
726{ 725{
727 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 726 struct event_constraint *c;
728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 727 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
729 int i, wmin, wmax, num = 0; 728 int i, wmin, wmax, num = 0;
730 struct hw_perf_event *hwc; 729 struct hw_perf_event *hwc;
@@ -732,8 +731,10 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
732 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 731 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
733 732
734 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 733 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
734 hwc = &cpuc->event_list[i]->hw;
735 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 735 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
736 constraints[i] = c; 736 hwc->constraint = c;
737
737 wmin = min(wmin, c->weight); 738 wmin = min(wmin, c->weight);
738 wmax = max(wmax, c->weight); 739 wmax = max(wmax, c->weight);
739 } 740 }
@@ -743,7 +744,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
743 */ 744 */
744 for (i = 0; i < n; i++) { 745 for (i = 0; i < n; i++) {
745 hwc = &cpuc->event_list[i]->hw; 746 hwc = &cpuc->event_list[i]->hw;
746 c = constraints[i]; 747 c = hwc->constraint;
747 748
748 /* never assigned */ 749 /* never assigned */
749 if (hwc->idx == -1) 750 if (hwc->idx == -1)
@@ -764,7 +765,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
764 765
765 /* slow path */ 766 /* slow path */
766 if (i != n) 767 if (i != n)
767 num = perf_assign_events(constraints, n, wmin, wmax, assign); 768 num = perf_assign_events(cpuc->event_list, n, wmin,
769 wmax, assign);
768 770
769 /* 771 /*
770 * scheduling failed or is just a simulation, 772 * scheduling failed or is just a simulation,
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683b..6a6ca01090f9 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -528,7 +528,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
528 528
529void x86_pmu_enable_all(int added); 529void x86_pmu_enable_all(int added);
530 530
531int perf_assign_events(struct event_constraint **constraints, int n, 531int perf_assign_events(struct perf_event **events, int n,
532 int wmin, int wmax, int *assign); 532 int wmin, int wmax, int *assign);
533int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 533int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
534 534
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index c0e356da7408..adabe6f1bb6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
2724{ 2724{
2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
2726 struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; 2726 struct event_constraint *c;
2727 int i, wmin, wmax, ret = 0; 2727 int i, wmin, wmax, ret = 0;
2728 struct hw_perf_event *hwc; 2728 struct hw_perf_event *hwc;
2729 2729
2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
2731 2731
2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
2733 hwc = &box->event_list[i]->hw;
2733 c = uncore_get_event_constraint(box, box->event_list[i]); 2734 c = uncore_get_event_constraint(box, box->event_list[i]);
2734 constraints[i] = c; 2735 hwc->constraint = c;
2735 wmin = min(wmin, c->weight); 2736 wmin = min(wmin, c->weight);
2736 wmax = max(wmax, c->weight); 2737 wmax = max(wmax, c->weight);
2737 } 2738 }
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2739 /* fastpath, try to reuse previous register */ 2740 /* fastpath, try to reuse previous register */
2740 for (i = 0; i < n; i++) { 2741 for (i = 0; i < n; i++) {
2741 hwc = &box->event_list[i]->hw; 2742 hwc = &box->event_list[i]->hw;
2742 c = constraints[i]; 2743 c = hwc->constraint;
2743 2744
2744 /* never assigned */ 2745 /* never assigned */
2745 if (hwc->idx == -1) 2746 if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2759 } 2760 }
2760 /* slow path */ 2761 /* slow path */
2761 if (i != n) 2762 if (i != n)
2762 ret = perf_assign_events(constraints, n, wmin, wmax, assign); 2763 ret = perf_assign_events(box->event_list, n,
2764 wmin, wmax, assign);
2763 2765
2764 if (!assign || ret) { 2766 if (!assign || ret) {
2765 for (i = 0; i < n; i++) 2767 for (i = 0; i < n; i++)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4bc57d017fc8..33e8d65836d6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -113,6 +113,8 @@ struct hw_perf_event_extra {
113 int idx; /* index in shared_regs->regs[] */ 113 int idx; /* index in shared_regs->regs[] */
114}; 114};
115 115
116struct event_constraint;
117
116/** 118/**
117 * struct hw_perf_event - performance event hardware details: 119 * struct hw_perf_event - performance event hardware details:
118 */ 120 */
@@ -131,6 +133,8 @@ struct hw_perf_event {
131 133
132 struct hw_perf_event_extra extra_reg; 134 struct hw_perf_event_extra extra_reg;
133 struct hw_perf_event_extra branch_reg; 135 struct hw_perf_event_extra branch_reg;
136
137 struct event_constraint *constraint;
134 }; 138 };
135 struct { /* software */ 139 struct { /* software */
136 struct hrtimer hrtimer; 140 struct hrtimer hrtimer;