diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 185 | ||||
-rw-r--r-- | include/linux/bitops.h | 10 |
2 files changed, 140 insertions, 55 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a001..5a469d3d0c6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -484,18 +484,145 @@ static inline int is_x86_event(struct perf_event *event) | |||
484 | return event->pmu == &pmu; | 484 | return event->pmu == &pmu; |
485 | } | 485 | } |
486 | 486 | ||
487 | /* | ||
488 | * Event scheduler state: | ||
489 | * | ||
490 | * Assign events iterating over all events and counters, beginning | ||
491 | * with events with least weights first. Keep the current iterator | ||
492 | * state in struct sched_state. | ||
493 | */ | ||
494 | struct sched_state { | ||
495 | int weight; | ||
496 | int event; /* event index */ | ||
497 | int counter; /* counter index */ | ||
498 | int unassigned; /* number of events to be assigned left */ | ||
499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
500 | }; | ||
501 | |||
502 | struct perf_sched { | ||
503 | int max_weight; | ||
504 | int max_events; | ||
505 | struct event_constraint **constraints; | ||
506 | struct sched_state state; | ||
507 | }; | ||
508 | |||
509 | /* | ||
510 | * Initialize interator that runs through all events and counters. | ||
511 | */ | ||
512 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | ||
513 | int num, int wmin, int wmax) | ||
514 | { | ||
515 | int idx; | ||
516 | |||
517 | memset(sched, 0, sizeof(*sched)); | ||
518 | sched->max_events = num; | ||
519 | sched->max_weight = wmax; | ||
520 | sched->constraints = c; | ||
521 | |||
522 | for (idx = 0; idx < num; idx++) { | ||
523 | if (c[idx]->weight == wmin) | ||
524 | break; | ||
525 | } | ||
526 | |||
527 | sched->state.event = idx; /* start with min weight */ | ||
528 | sched->state.weight = wmin; | ||
529 | sched->state.unassigned = num; | ||
530 | } | ||
531 | |||
532 | /* | ||
533 | * Select a counter for the current event to schedule. Return true on | ||
534 | * success. | ||
535 | */ | ||
536 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
537 | { | ||
538 | struct event_constraint *c; | ||
539 | int idx; | ||
540 | |||
541 | if (!sched->state.unassigned) | ||
542 | return false; | ||
543 | |||
544 | if (sched->state.event >= sched->max_events) | ||
545 | return false; | ||
546 | |||
547 | c = sched->constraints[sched->state.event]; | ||
548 | |||
549 | /* Grab the first unused counter starting with idx */ | ||
550 | idx = sched->state.counter; | ||
551 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | ||
552 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
553 | break; | ||
554 | } | ||
555 | sched->state.counter = idx; | ||
556 | |||
557 | if (idx >= X86_PMC_IDX_MAX) | ||
558 | return false; | ||
559 | |||
560 | return true; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Go through all unassigned events and find the next one to schedule. | ||
565 | * Take events with the least weight first. Return true on success. | ||
566 | */ | ||
567 | static bool perf_sched_next_event(struct perf_sched *sched) | ||
568 | { | ||
569 | struct event_constraint *c; | ||
570 | |||
571 | if (!sched->state.unassigned || !--sched->state.unassigned) | ||
572 | return false; | ||
573 | |||
574 | do { | ||
575 | /* next event */ | ||
576 | sched->state.event++; | ||
577 | if (sched->state.event >= sched->max_events) { | ||
578 | /* next weight */ | ||
579 | sched->state.event = 0; | ||
580 | sched->state.weight++; | ||
581 | if (sched->state.weight > sched->max_weight) | ||
582 | return false; | ||
583 | } | ||
584 | c = sched->constraints[sched->state.event]; | ||
585 | } while (c->weight != sched->state.weight); | ||
586 | |||
587 | sched->state.counter = 0; /* start with first counter */ | ||
588 | |||
589 | return true; | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * Assign a counter for each event. | ||
594 | */ | ||
595 | static int perf_assign_events(struct event_constraint **constraints, int n, | ||
596 | int wmin, int wmax, int *assign) | ||
597 | { | ||
598 | struct perf_sched sched; | ||
599 | |||
600 | perf_sched_init(&sched, constraints, n, wmin, wmax); | ||
601 | |||
602 | do { | ||
603 | if (!perf_sched_find_counter(&sched)) | ||
604 | break; /* failed */ | ||
605 | if (assign) | ||
606 | assign[sched.state.event] = sched.state.counter; | ||
607 | } while (perf_sched_next_event(&sched)); | ||
608 | |||
609 | return sched.state.unassigned; | ||
610 | } | ||
611 | |||
487 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 612 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
488 | { | 613 | { |
489 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 614 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
490 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 615 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
491 | int i, j, w, wmax, num = 0; | 616 | int i, wmin, wmax, num = 0; |
492 | struct hw_perf_event *hwc; | 617 | struct hw_perf_event *hwc; |
493 | 618 | ||
494 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 619 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
495 | 620 | ||
496 | for (i = 0; i < n; i++) { | 621 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
497 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 622 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
498 | constraints[i] = c; | 623 | constraints[i] = c; |
624 | wmin = min(wmin, c->weight); | ||
625 | wmax = max(wmax, c->weight); | ||
499 | } | 626 | } |
500 | 627 | ||
501 | /* | 628 | /* |
@@ -521,59 +648,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
521 | if (assign) | 648 | if (assign) |
522 | assign[i] = hwc->idx; | 649 | assign[i] = hwc->idx; |
523 | } | 650 | } |
524 | if (i == n) | ||
525 | goto done; | ||
526 | 651 | ||
527 | /* | 652 | /* slow path */ |
528 | * begin slow path | 653 | if (i != n) |
529 | */ | 654 | num = perf_assign_events(constraints, n, wmin, wmax, assign); |
530 | 655 | ||
531 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
532 | |||
533 | /* | ||
534 | * weight = number of possible counters | ||
535 | * | ||
536 | * 1 = most constrained, only works on one counter | ||
537 | * wmax = least constrained, works on any counter | ||
538 | * | ||
539 | * assign events to counters starting with most | ||
540 | * constrained events. | ||
541 | */ | ||
542 | wmax = x86_pmu.num_counters; | ||
543 | |||
544 | /* | ||
545 | * when fixed event counters are present, | ||
546 | * wmax is incremented by 1 to account | ||
547 | * for one more choice | ||
548 | */ | ||
549 | if (x86_pmu.num_counters_fixed) | ||
550 | wmax++; | ||
551 | |||
552 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
553 | /* for each event */ | ||
554 | for (i = 0; num && i < n; i++) { | ||
555 | c = constraints[i]; | ||
556 | hwc = &cpuc->event_list[i]->hw; | ||
557 | |||
558 | if (c->weight != w) | ||
559 | continue; | ||
560 | |||
561 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | ||
562 | if (!test_bit(j, used_mask)) | ||
563 | break; | ||
564 | } | ||
565 | |||
566 | if (j == X86_PMC_IDX_MAX) | ||
567 | break; | ||
568 | |||
569 | __set_bit(j, used_mask); | ||
570 | |||
571 | if (assign) | ||
572 | assign[i] = j; | ||
573 | num--; | ||
574 | } | ||
575 | } | ||
576 | done: | ||
577 | /* | 656 | /* |
578 | * scheduling failed or is just a simulation, | 657 | * scheduling failed or is just a simulation, |
579 | * free resources if necessary | 658 | * free resources if necessary |
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a3ef66a2a08..3c1063acb2a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h | |||
@@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w); | |||
22 | #include <asm/bitops.h> | 22 | #include <asm/bitops.h> |
23 | 23 | ||
24 | #define for_each_set_bit(bit, addr, size) \ | 24 | #define for_each_set_bit(bit, addr, size) \ |
25 | for ((bit) = find_first_bit((addr), (size)); \ | 25 | for ((bit) = find_first_bit((addr), (size)); \ |
26 | (bit) < (size); \ | 26 | (bit) < (size); \ |
27 | (bit) = find_next_bit((addr), (size), (bit) + 1)) | ||
28 | |||
29 | /* same as for_each_set_bit() but use bit as value to start with */ | ||
30 | #define for_each_set_bit_cont(bit, addr, size) \ | ||
31 | for ((bit) = find_next_bit((addr), (size), (bit)); \ | ||
32 | (bit) < (size); \ | ||
27 | (bit) = find_next_bit((addr), (size), (bit) + 1)) | 33 | (bit) = find_next_bit((addr), (size), (bit) + 1)) |
28 | 34 | ||
29 | static __inline__ int get_bitmask_order(unsigned int count) | 35 | static __inline__ int get_bitmask_order(unsigned int count) |