aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event.h24
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c34
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c504
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c136
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c178
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h4
-rw-r--r--arch/x86/kernel/nmi.c36
-rw-r--r--arch/x86/kernel/signal.c16
13 files changed, 1019 insertions, 111 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4a73aa..47b56a7e99cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
31 31
32ifdef CONFIG_PERF_EVENTS 32ifdef CONFIG_PERF_EVENTS
33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o 33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
34ifdef CONFIG_AMD_IOMMU
35obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
36endif
34obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o 37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 38obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o 39obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
37endif 40endif
38 41
42
39obj-$(CONFIG_X86_MCE) += mcheck/ 43obj-$(CONFIG_X86_MCE) += mcheck/
40obj-$(CONFIG_MTRR) += mtrr/ 44obj-$(CONFIG_MTRR) += mtrr/
41 45
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d20..9e581c5cf6d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
403 * check that PEBS LBR correction does not conflict with 403 * check that PEBS LBR correction does not conflict with
404 * whatever the user is asking with attr->branch_sample_type 404 * whatever the user is asking with attr->branch_sample_type
405 */ 405 */
406 if (event->attr.precise_ip > 1) { 406 if (event->attr.precise_ip > 1 &&
407 x86_pmu.intel_cap.pebs_format < 2) {
407 u64 *br_type = &event->attr.branch_sample_type; 408 u64 *br_type = &event->attr.branch_sample_type;
408 409
409 if (has_branch_stack(event)) { 410 if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
568struct perf_sched { 569struct perf_sched {
569 int max_weight; 570 int max_weight;
570 int max_events; 571 int max_events;
571 struct event_constraint **constraints; 572 struct perf_event **events;
572 struct sched_state state; 573 struct sched_state state;
573 int saved_states; 574 int saved_states;
574 struct sched_state saved[SCHED_STATES_MAX]; 575 struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
577/* 578/*
578 * Initialize interator that runs through all events and counters. 579 * Initialize interator that runs through all events and counters.
579 */ 580 */
580static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, 581static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
581 int num, int wmin, int wmax) 582 int num, int wmin, int wmax)
582{ 583{
583 int idx; 584 int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
585 memset(sched, 0, sizeof(*sched)); 586 memset(sched, 0, sizeof(*sched));
586 sched->max_events = num; 587 sched->max_events = num;
587 sched->max_weight = wmax; 588 sched->max_weight = wmax;
588 sched->constraints = c; 589 sched->events = events;
589 590
590 for (idx = 0; idx < num; idx++) { 591 for (idx = 0; idx < num; idx++) {
591 if (c[idx]->weight == wmin) 592 if (events[idx]->hw.constraint->weight == wmin)
592 break; 593 break;
593 } 594 }
594 595
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
635 if (sched->state.event >= sched->max_events) 636 if (sched->state.event >= sched->max_events)
636 return false; 637 return false;
637 638
638 c = sched->constraints[sched->state.event]; 639 c = sched->events[sched->state.event]->hw.constraint;
639
640 /* Prefer fixed purpose counters */ 640 /* Prefer fixed purpose counters */
641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { 641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
642 idx = INTEL_PMC_IDX_FIXED; 642 idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
694 if (sched->state.weight > sched->max_weight) 694 if (sched->state.weight > sched->max_weight)
695 return false; 695 return false;
696 } 696 }
697 c = sched->constraints[sched->state.event]; 697 c = sched->events[sched->state.event]->hw.constraint;
698 } while (c->weight != sched->state.weight); 698 } while (c->weight != sched->state.weight);
699 699
700 sched->state.counter = 0; /* start with first counter */ 700 sched->state.counter = 0; /* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
705/* 705/*
706 * Assign a counter for each event. 706 * Assign a counter for each event.
707 */ 707 */
708int perf_assign_events(struct event_constraint **constraints, int n, 708int perf_assign_events(struct perf_event **events, int n,
709 int wmin, int wmax, int *assign) 709 int wmin, int wmax, int *assign)
710{ 710{
711 struct perf_sched sched; 711 struct perf_sched sched;
712 712
713 perf_sched_init(&sched, constraints, n, wmin, wmax); 713 perf_sched_init(&sched, events, n, wmin, wmax);
714 714
715 do { 715 do {
716 if (!perf_sched_find_counter(&sched)) 716 if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
724 724
725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
726{ 726{
727 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 727 struct event_constraint *c;
728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
729 struct perf_event *e;
729 int i, wmin, wmax, num = 0; 730 int i, wmin, wmax, num = 0;
730 struct hw_perf_event *hwc; 731 struct hw_perf_event *hwc;
731 732
732 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 733 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
733 734
734 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 735 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
736 hwc = &cpuc->event_list[i]->hw;
735 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 737 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
736 constraints[i] = c; 738 hwc->constraint = c;
739
737 wmin = min(wmin, c->weight); 740 wmin = min(wmin, c->weight);
738 wmax = max(wmax, c->weight); 741 wmax = max(wmax, c->weight);
739 } 742 }
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
743 */ 746 */
744 for (i = 0; i < n; i++) { 747 for (i = 0; i < n; i++) {
745 hwc = &cpuc->event_list[i]->hw; 748 hwc = &cpuc->event_list[i]->hw;
746 c = constraints[i]; 749 c = hwc->constraint;
747 750
748 /* never assigned */ 751 /* never assigned */
749 if (hwc->idx == -1) 752 if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
764 767
765 /* slow path */ 768 /* slow path */
766 if (i != n) 769 if (i != n)
767 num = perf_assign_events(constraints, n, wmin, wmax, assign); 770 num = perf_assign_events(cpuc->event_list, n, wmin,
771 wmax, assign);
768 772
769 /* 773 /*
774 * Mark the event as committed, so we do not put_constraint()
775 * in case new events are added and fail scheduling.
776 */
777 if (!num && assign) {
778 for (i = 0; i < n; i++) {
779 e = cpuc->event_list[i];
780 e->hw.flags |= PERF_X86_EVENT_COMMITTED;
781 }
782 }
783 /*
770 * scheduling failed or is just a simulation, 784 * scheduling failed or is just a simulation,
771 * free resources if necessary 785 * free resources if necessary
772 */ 786 */
773 if (!assign || num) { 787 if (!assign || num) {
774 for (i = 0; i < n; i++) { 788 for (i = 0; i < n; i++) {
789 e = cpuc->event_list[i];
790 /*
791 * do not put_constraint() on comitted events,
792 * because they are good to go
793 */
794 if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
795 continue;
796
775 if (x86_pmu.put_event_constraints) 797 if (x86_pmu.put_event_constraints)
776 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); 798 x86_pmu.put_event_constraints(cpuc, e);
777 } 799 }
778 } 800 }
779 return num ? -EINVAL : 0; 801 return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1153 int i; 1175 int i;
1154 1176
1155 /* 1177 /*
1178 * event is descheduled
1179 */
1180 event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
1181
1182 /*
1156 * If we're called during a txn, we don't need to do anything. 1183 * If we're called during a txn, we don't need to do anything.
1157 * The events never got scheduled and ->cancel_txn will truncate 1184 * The events never got scheduled and ->cancel_txn will truncate
1158 * the event_list. 1185 * the event_list.
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void)
1249static int __kprobes 1276static int __kprobes
1250perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1251{ 1278{
1279 int ret;
1280 u64 start_clock;
1281 u64 finish_clock;
1282
1252 if (!atomic_read(&active_events)) 1283 if (!atomic_read(&active_events))
1253 return NMI_DONE; 1284 return NMI_DONE;
1254 1285
1255 return x86_pmu.handle_irq(regs); 1286 start_clock = local_clock();
1287 ret = x86_pmu.handle_irq(regs);
1288 finish_clock = local_clock();
1289
1290 perf_sample_event_took(finish_clock - start_clock);
1291
1292 return ret;
1256} 1293}
1257 1294
1258struct event_constraint emptyconstraint; 1295struct event_constraint emptyconstraint;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683b..97e557bc4c91 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
63 int flags; 63 int flags;
64}; 64};
65/* 65/*
66 * struct event_constraint flags 66 * struct hw_perf_event.flags flags
67 */ 67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ 68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ 69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
70#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
70 72
71struct amd_nb { 73struct amd_nb {
72 int nb_id; /* NorthBridge id */ 74 int nb_id; /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
227 * - inv 229 * - inv
228 * - edge 230 * - edge
229 * - cnt-mask 231 * - cnt-mask
232 * - in_tx
233 * - in_tx_checkpointed
230 * The other filters are supported by fixed counters. 234 * The other filters are supported by fixed counters.
231 * The any-thread option is supported starting with v3. 235 * The any-thread option is supported starting with v3.
232 */ 236 */
237#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
233#define FIXED_EVENT_CONSTRAINT(c, n) \ 238#define FIXED_EVENT_CONSTRAINT(c, n) \
234 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) 239 EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
235 240
236/* 241/*
237 * Constraint on the Event code + UMask 242 * Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
247 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ 252 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
248 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) 253 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
249 254
255/* DataLA version of store sampling without extra enable bit. */
256#define INTEL_PST_HSW_CONSTRAINT(c, n) \
257 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
258 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
259
250#define EVENT_CONSTRAINT_END \ 260#define EVENT_CONSTRAINT_END \
251 EVENT_CONSTRAINT(0, 0, 0) 261 EVENT_CONSTRAINT(0, 0, 0)
252 262
@@ -301,6 +311,11 @@ union perf_capabilities {
301 u64 pebs_arch_reg:1; 311 u64 pebs_arch_reg:1;
302 u64 pebs_format:4; 312 u64 pebs_format:4;
303 u64 smm_freeze:1; 313 u64 smm_freeze:1;
314 /*
315 * PMU supports separate counter range for writing
316 * values > 32bit.
317 */
318 u64 full_width_write:1;
304 }; 319 };
305 u64 capabilities; 320 u64 capabilities;
306}; 321};
@@ -375,6 +390,7 @@ struct x86_pmu {
375 struct event_constraint *event_constraints; 390 struct event_constraint *event_constraints;
376 struct x86_pmu_quirk *quirks; 391 struct x86_pmu_quirk *quirks;
377 int perfctr_second_write; 392 int perfctr_second_write;
393 bool late_ack;
378 394
379 /* 395 /*
380 * sysfs attrs 396 * sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
528 544
529void x86_pmu_enable_all(int added); 545void x86_pmu_enable_all(int added);
530 546
531int perf_assign_events(struct event_constraint **constraints, int n, 547int perf_assign_events(struct perf_event **events, int n,
532 int wmin, int wmax, int *assign); 548 int wmin, int wmax, int *assign);
533int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 549int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
534 550
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
633 649
634extern struct event_constraint intel_ivb_pebs_event_constraints[]; 650extern struct event_constraint intel_ivb_pebs_event_constraints[];
635 651
652extern struct event_constraint intel_hsw_pebs_event_constraints[];
653
636struct event_constraint *intel_pebs_constraints(struct perf_event *event); 654struct event_constraint *intel_pebs_constraints(struct perf_event *event);
637 655
638void intel_pmu_pebs_enable(struct perf_event *event); 656void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d9467bb4..4cbe03287b08 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
648 .cpu_dead = amd_pmu_cpu_dead, 648 .cpu_dead = amd_pmu_cpu_dead,
649}; 649};
650 650
651static int setup_event_constraints(void) 651static int __init amd_core_pmu_init(void)
652{ 652{
653 if (boot_cpu_data.x86 == 0x15) 653 if (!cpu_has_perfctr_core)
654 return 0;
655
656 switch (boot_cpu_data.x86) {
657 case 0x15:
658 pr_cont("Fam15h ");
654 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 659 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
655 return 0; 660 break;
656}
657 661
658static int setup_perfctr_core(void) 662 default:
659{ 663 pr_err("core perfctr but no constraints; unknown hardware!\n");
660 if (!cpu_has_perfctr_core) {
661 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
662 KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
663 return -ENODEV; 664 return -ENODEV;
664 } 665 }
665 666
666 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
667 KERN_ERR "hw perf events core counters need constraints handler!");
668
669 /* 667 /*
670 * If core performance counter extensions exists, we must use 668 * If core performance counter extensions exists, we must use
671 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 669 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
672 * x86_pmu_addr_offset(). 670 * amd_pmu_addr_offset().
673 */ 671 */
674 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 672 x86_pmu.eventsel = MSR_F15H_PERF_CTL;
675 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 673 x86_pmu.perfctr = MSR_F15H_PERF_CTR;
676 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; 674 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
677 675
678 printk(KERN_INFO "perf: AMD core performance counters detected\n"); 676 pr_cont("core perfctr, ");
679
680 return 0; 677 return 0;
681} 678}
682 679
683__init int amd_pmu_init(void) 680__init int amd_pmu_init(void)
684{ 681{
682 int ret;
683
685 /* Performance-monitoring supported from K7 and later: */ 684 /* Performance-monitoring supported from K7 and later: */
686 if (boot_cpu_data.x86 < 6) 685 if (boot_cpu_data.x86 < 6)
687 return -ENODEV; 686 return -ENODEV;
688 687
689 x86_pmu = amd_pmu; 688 x86_pmu = amd_pmu;
690 689
691 setup_event_constraints(); 690 ret = amd_core_pmu_init();
692 setup_perfctr_core(); 691 if (ret)
692 return ret;
693 693
694 /* Events are common for all AMDs */ 694 /* Events are common for all AMDs */
695 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 695 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 000000000000..0db655ef3918
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,504 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Steven Kinney <Steven.Kinney@amd.com>
5 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
6 *
7 * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/perf_event.h>
15#include <linux/module.h>
16#include <linux/cpumask.h>
17#include <linux/slab.h>
18
19#include "perf_event.h"
20#include "perf_event_amd_iommu.h"
21
22#define COUNTER_SHIFT 16
23
24#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
25#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
26
27/* iommu pmu config masks */
28#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
29#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
30#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
31#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
32#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
33#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
34#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
35
36static struct perf_amd_iommu __perf_iommu;
37
38struct perf_amd_iommu {
39 struct pmu pmu;
40 u8 max_banks;
41 u8 max_counters;
42 u64 cntr_assign_mask;
43 raw_spinlock_t lock;
44 const struct attribute_group *attr_groups[4];
45};
46
47#define format_group attr_groups[0]
48#define cpumask_group attr_groups[1]
49#define events_group attr_groups[2]
50#define null_group attr_groups[3]
51
52/*---------------------------------------------
53 * sysfs format attributes
54 *---------------------------------------------*/
55PMU_FORMAT_ATTR(csource, "config:0-7");
56PMU_FORMAT_ATTR(devid, "config:8-23");
57PMU_FORMAT_ATTR(pasid, "config:24-39");
58PMU_FORMAT_ATTR(domid, "config:40-55");
59PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
60PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
61PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
62
63static struct attribute *iommu_format_attrs[] = {
64 &format_attr_csource.attr,
65 &format_attr_devid.attr,
66 &format_attr_pasid.attr,
67 &format_attr_domid.attr,
68 &format_attr_devid_mask.attr,
69 &format_attr_pasid_mask.attr,
70 &format_attr_domid_mask.attr,
71 NULL,
72};
73
74static struct attribute_group amd_iommu_format_group = {
75 .name = "format",
76 .attrs = iommu_format_attrs,
77};
78
79/*---------------------------------------------
80 * sysfs events attributes
81 *---------------------------------------------*/
82struct amd_iommu_event_desc {
83 struct kobj_attribute attr;
84 const char *event;
85};
86
87static ssize_t _iommu_event_show(struct kobject *kobj,
88 struct kobj_attribute *attr, char *buf)
89{
90 struct amd_iommu_event_desc *event =
91 container_of(attr, struct amd_iommu_event_desc, attr);
92 return sprintf(buf, "%s\n", event->event);
93}
94
95#define AMD_IOMMU_EVENT_DESC(_name, _event) \
96{ \
97 .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
98 .event = _event, \
99}
100
101static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
102 AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
103 AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
104 AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
105 AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
106 AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
107 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
108 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
109 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
110 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
111 AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
112 AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
113 AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
114 AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
115 AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
116 AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
117 AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
118 AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
119 AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
120 AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
121 { /* end: all zeroes */ },
122};
123
124/*---------------------------------------------
125 * sysfs cpumask attributes
126 *---------------------------------------------*/
127static cpumask_t iommu_cpumask;
128
129static ssize_t _iommu_cpumask_show(struct device *dev,
130 struct device_attribute *attr,
131 char *buf)
132{
133 int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
134 buf[n++] = '\n';
135 buf[n] = '\0';
136 return n;
137}
138static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
139
140static struct attribute *iommu_cpumask_attrs[] = {
141 &dev_attr_cpumask.attr,
142 NULL,
143};
144
145static struct attribute_group amd_iommu_cpumask_group = {
146 .attrs = iommu_cpumask_attrs,
147};
148
149/*---------------------------------------------*/
150
151static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
152{
153 unsigned long flags;
154 int shift, bank, cntr, retval;
155 int max_banks = perf_iommu->max_banks;
156 int max_cntrs = perf_iommu->max_counters;
157
158 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
159
160 for (bank = 0, shift = 0; bank < max_banks; bank++) {
161 for (cntr = 0; cntr < max_cntrs; cntr++) {
162 shift = bank + (bank*3) + cntr;
163 if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
164 continue;
165 } else {
166 perf_iommu->cntr_assign_mask |= (1ULL<<shift);
167 retval = ((u16)((u16)bank<<8) | (u8)(cntr));
168 goto out;
169 }
170 }
171 }
172 retval = -ENOSPC;
173out:
174 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
175 return retval;
176}
177
178static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
179 u8 bank, u8 cntr)
180{
181 unsigned long flags;
182 int max_banks, max_cntrs;
183 int shift = 0;
184
185 max_banks = perf_iommu->max_banks;
186 max_cntrs = perf_iommu->max_counters;
187
188 if ((bank > max_banks) || (cntr > max_cntrs))
189 return -EINVAL;
190
191 shift = bank + cntr + (bank*3);
192
193 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
194 perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
195 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
196
197 return 0;
198}
199
200static int perf_iommu_event_init(struct perf_event *event)
201{
202 struct hw_perf_event *hwc = &event->hw;
203 struct perf_amd_iommu *perf_iommu;
204 u64 config, config1;
205
206 /* test the event attr type check for PMU enumeration */
207 if (event->attr.type != event->pmu->type)
208 return -ENOENT;
209
210 /*
211 * IOMMU counters are shared across all cores.
212 * Therefore, it does not support per-process mode.
213 * Also, it does not support event sampling mode.
214 */
215 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
216 return -EINVAL;
217
218 /* IOMMU counters do not have usr/os/guest/host bits */
219 if (event->attr.exclude_user || event->attr.exclude_kernel ||
220 event->attr.exclude_host || event->attr.exclude_guest)
221 return -EINVAL;
222
223 if (event->cpu < 0)
224 return -EINVAL;
225
226 perf_iommu = &__perf_iommu;
227
228 if (event->pmu != &perf_iommu->pmu)
229 return -ENOENT;
230
231 if (perf_iommu) {
232 config = event->attr.config;
233 config1 = event->attr.config1;
234 } else {
235 return -EINVAL;
236 }
237
238 /* integrate with iommu base devid (0000), assume one iommu */
239 perf_iommu->max_banks =
240 amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
241 perf_iommu->max_counters =
242 amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
243 if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
244 return -EINVAL;
245
246 /* update the hw_perf_event struct with the iommu config data */
247 hwc->config = config;
248 hwc->extra_reg.config = config1;
249
250 return 0;
251}
252
253static void perf_iommu_enable_event(struct perf_event *ev)
254{
255 u8 csource = _GET_CSOURCE(ev);
256 u16 devid = _GET_DEVID(ev);
257 u64 reg = 0ULL;
258
259 reg = csource;
260 amd_iommu_pc_get_set_reg_val(devid,
261 _GET_BANK(ev), _GET_CNTR(ev) ,
262 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
263
264 reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
265 if (reg)
266 reg |= (1UL << 31);
267 amd_iommu_pc_get_set_reg_val(devid,
268 _GET_BANK(ev), _GET_CNTR(ev) ,
269 IOMMU_PC_DEVID_MATCH_REG, &reg, true);
270
271 reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
272 if (reg)
273 reg |= (1UL << 31);
274 amd_iommu_pc_get_set_reg_val(devid,
275 _GET_BANK(ev), _GET_CNTR(ev) ,
276 IOMMU_PC_PASID_MATCH_REG, &reg, true);
277
278 reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
279 if (reg)
280 reg |= (1UL << 31);
281 amd_iommu_pc_get_set_reg_val(devid,
282 _GET_BANK(ev), _GET_CNTR(ev) ,
283 IOMMU_PC_DOMID_MATCH_REG, &reg, true);
284}
285
286static void perf_iommu_disable_event(struct perf_event *event)
287{
288 u64 reg = 0ULL;
289
290 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
291 _GET_BANK(event), _GET_CNTR(event),
292 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
293}
294
295static void perf_iommu_start(struct perf_event *event, int flags)
296{
297 struct hw_perf_event *hwc = &event->hw;
298
299 pr_debug("perf: amd_iommu:perf_iommu_start\n");
300 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
301 return;
302
303 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
304 hwc->state = 0;
305
306 if (flags & PERF_EF_RELOAD) {
307 u64 prev_raw_count = local64_read(&hwc->prev_count);
308 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
309 _GET_BANK(event), _GET_CNTR(event),
310 IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
311 }
312
313 perf_iommu_enable_event(event);
314 perf_event_update_userpage(event);
315
316}
317
318static void perf_iommu_read(struct perf_event *event)
319{
320 u64 count = 0ULL;
321 u64 prev_raw_count = 0ULL;
322 u64 delta = 0ULL;
323 struct hw_perf_event *hwc = &event->hw;
324 pr_debug("perf: amd_iommu:perf_iommu_read\n");
325
326 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
327 _GET_BANK(event), _GET_CNTR(event),
328 IOMMU_PC_COUNTER_REG, &count, false);
329
330 /* IOMMU pc counter register is only 48 bits */
331 count &= 0xFFFFFFFFFFFFULL;
332
333 prev_raw_count = local64_read(&hwc->prev_count);
334 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
335 count) != prev_raw_count)
336 return;
337
338 /* Handling 48-bit counter overflowing */
339 delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
340 delta >>= COUNTER_SHIFT;
341 local64_add(delta, &event->count);
342
343}
344
345static void perf_iommu_stop(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 u64 config;
349
350 pr_debug("perf: amd_iommu:perf_iommu_stop\n");
351
352 if (hwc->state & PERF_HES_UPTODATE)
353 return;
354
355 perf_iommu_disable_event(event);
356 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
357 hwc->state |= PERF_HES_STOPPED;
358
359 if (hwc->state & PERF_HES_UPTODATE)
360 return;
361
362 config = hwc->config;
363 perf_iommu_read(event);
364 hwc->state |= PERF_HES_UPTODATE;
365}
366
367static int perf_iommu_add(struct perf_event *event, int flags)
368{
369 int retval;
370 struct perf_amd_iommu *perf_iommu =
371 container_of(event->pmu, struct perf_amd_iommu, pmu);
372
373 pr_debug("perf: amd_iommu:perf_iommu_add\n");
374 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
375
376 /* request an iommu bank/counter */
377 retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
378 if (retval != -ENOSPC)
379 event->hw.extra_reg.reg = (u16)retval;
380 else
381 return retval;
382
383 if (flags & PERF_EF_START)
384 perf_iommu_start(event, PERF_EF_RELOAD);
385
386 return 0;
387}
388
389static void perf_iommu_del(struct perf_event *event, int flags)
390{
391 struct perf_amd_iommu *perf_iommu =
392 container_of(event->pmu, struct perf_amd_iommu, pmu);
393
394 pr_debug("perf: amd_iommu:perf_iommu_del\n");
395 perf_iommu_stop(event, PERF_EF_UPDATE);
396
397 /* clear the assigned iommu bank/counter */
398 clear_avail_iommu_bnk_cntr(perf_iommu,
399 _GET_BANK(event),
400 _GET_CNTR(event));
401
402 perf_event_update_userpage(event);
403}
404
405static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
406{
407 struct attribute **attrs;
408 struct attribute_group *attr_group;
409 int i = 0, j;
410
411 while (amd_iommu_v2_event_descs[i].attr.attr.name)
412 i++;
413
414 attr_group = kzalloc(sizeof(struct attribute *)
415 * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
416 if (!attr_group)
417 return -ENOMEM;
418
419 attrs = (struct attribute **)(attr_group + 1);
420 for (j = 0; j < i; j++)
421 attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
422
423 attr_group->name = "events";
424 attr_group->attrs = attrs;
425 perf_iommu->events_group = attr_group;
426
427 return 0;
428}
429
430static __init void amd_iommu_pc_exit(void)
431{
432 if (__perf_iommu.events_group != NULL) {
433 kfree(__perf_iommu.events_group);
434 __perf_iommu.events_group = NULL;
435 }
436}
437
438static __init int _init_perf_amd_iommu(
439 struct perf_amd_iommu *perf_iommu, char *name)
440{
441 int ret;
442
443 raw_spin_lock_init(&perf_iommu->lock);
444
445 /* Init format attributes */
446 perf_iommu->format_group = &amd_iommu_format_group;
447
448 /* Init cpumask attributes to only core 0 */
449 cpumask_set_cpu(0, &iommu_cpumask);
450 perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
451
452 /* Init events attributes */
453 if (_init_events_attrs(perf_iommu) != 0)
454 pr_err("perf: amd_iommu: Only support raw events.\n");
455
456 /* Init null attributes */
457 perf_iommu->null_group = NULL;
458 perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
459
460 ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
461 if (ret) {
462 pr_err("perf: amd_iommu: Failed to initialized.\n");
463 amd_iommu_pc_exit();
464 } else {
465 pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
466 amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
467 amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
468 }
469
470 return ret;
471}
472
473static struct perf_amd_iommu __perf_iommu = {
474 .pmu = {
475 .event_init = perf_iommu_event_init,
476 .add = perf_iommu_add,
477 .del = perf_iommu_del,
478 .start = perf_iommu_start,
479 .stop = perf_iommu_stop,
480 .read = perf_iommu_read,
481 },
482 .max_banks = 0x00,
483 .max_counters = 0x00,
484 .cntr_assign_mask = 0ULL,
485 .format_group = NULL,
486 .cpumask_group = NULL,
487 .events_group = NULL,
488 .null_group = NULL,
489};
490
491static __init int amd_iommu_pc_init(void)
492{
493 /* Make sure the IOMMU PC resource is available */
494 if (!amd_iommu_pc_supported()) {
495 pr_err("perf: amd_iommu PMU not installed. No support!\n");
496 return -ENODEV;
497 }
498
499 _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
500
501 return 0;
502}
503
504device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 000000000000..845d173278e3
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Steven Kinney <Steven.Kinney@amd.com>
5 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#ifndef _PERF_EVENT_AMD_IOMMU_H_
13#define _PERF_EVENT_AMD_IOMMU_H_
14
15/* iommu pc mmio region register indexes */
16#define IOMMU_PC_COUNTER_REG 0x00
17#define IOMMU_PC_COUNTER_SRC_REG 0x08
18#define IOMMU_PC_PASID_MATCH_REG 0x10
19#define IOMMU_PC_DOMID_MATCH_REG 0x18
20#define IOMMU_PC_DEVID_MATCH_REG 0x20
21#define IOMMU_PC_COUNTER_REPORT_REG 0x28
22
23/* maximun specified bank/counters */
24#define PC_MAX_SPEC_BNKS 64
25#define PC_MAX_SPEC_CNTRS 16
26
27/* iommu pc reg masks*/
28#define IOMMU_BASE_DEVID 0x0000
29
30/* amd_iommu_init.c external support functions */
31extern bool amd_iommu_pc_supported(void);
32
33extern u8 amd_iommu_pc_get_max_banks(u16 devid);
34
35extern u8 amd_iommu_pc_get_max_counters(u16 devid);
36
37extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
38 u8 fxn, u64 *value, bool is_write);
39
40#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a9e22073bd56..fbc9210b45bc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/export.h> 14#include <linux/export.h>
15 15
16#include <asm/cpufeature.h>
16#include <asm/hardirq.h> 17#include <asm/hardirq.h>
17#include <asm/apic.h> 18#include <asm/apic.h>
18 19
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
190 NULL, 191 NULL,
191}; 192};
192 193
194static struct event_constraint intel_hsw_event_constraints[] = {
195 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
196 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
197 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
198 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
199 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
200 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
201 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
202 INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
203 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
204 INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
205 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
206 INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
207 EVENT_CONSTRAINT_END
208};
209
193static u64 intel_pmu_event_map(int hw_event) 210static u64 intel_pmu_event_map(int hw_event)
194{ 211{
195 return intel_perfmon_event_map[hw_event]; 212 return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
872 return true; 889 return true;
873 890
874 /* implicit branch sampling to correct PEBS skid */ 891 /* implicit branch sampling to correct PEBS skid */
875 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 892 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
893 x86_pmu.intel_cap.pebs_format < 2)
876 return true; 894 return true;
877 895
878 return false; 896 return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1167 cpuc = &__get_cpu_var(cpu_hw_events); 1185 cpuc = &__get_cpu_var(cpu_hw_events);
1168 1186
1169 /* 1187 /*
1170 * Some chipsets need to unmask the LVTPC in a particular spot 1188 * No known reason to not always do late ACK,
1171 * inside the nmi handler. As a result, the unmasking was pushed 1189 * but just in case do it opt-in.
1172 * into all the nmi handlers.
1173 *
1174 * This handler doesn't seem to have any issues with the unmasking
1175 * so it was left at the top.
1176 */ 1190 */
1177 apic_write(APIC_LVTPC, APIC_DM_NMI); 1191 if (!x86_pmu.late_ack)
1178 1192 apic_write(APIC_LVTPC, APIC_DM_NMI);
1179 intel_pmu_disable_all(); 1193 intel_pmu_disable_all();
1180 handled = intel_pmu_drain_bts_buffer(); 1194 handled = intel_pmu_drain_bts_buffer();
1181 status = intel_pmu_get_status(); 1195 status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1188again: 1202again:
1189 intel_pmu_ack_status(status); 1203 intel_pmu_ack_status(status);
1190 if (++loops > 100) { 1204 if (++loops > 100) {
1191 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 1205 static bool warned = false;
1192 perf_event_print_debug(); 1206 if (!warned) {
1207 WARN(1, "perfevents: irq loop stuck!\n");
1208 perf_event_print_debug();
1209 warned = true;
1210 }
1193 intel_pmu_reset(); 1211 intel_pmu_reset();
1194 goto done; 1212 goto done;
1195 } 1213 }
@@ -1235,6 +1253,13 @@ again:
1235 1253
1236done: 1254done:
1237 intel_pmu_enable_all(0); 1255 intel_pmu_enable_all(0);
1256 /*
1257 * Only unmask the NMI after the overflow counters
1258 * have been reset. This avoids spurious NMIs on
1259 * Haswell CPUs.
1260 */
1261 if (x86_pmu.late_ack)
1262 apic_write(APIC_LVTPC, APIC_DM_NMI);
1238 return handled; 1263 return handled;
1239} 1264}
1240 1265
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1425 if (x86_pmu.event_constraints) { 1450 if (x86_pmu.event_constraints) {
1426 for_each_event_constraint(c, x86_pmu.event_constraints) { 1451 for_each_event_constraint(c, x86_pmu.event_constraints) {
1427 if ((event->hw.config & c->cmask) == c->code) { 1452 if ((event->hw.config & c->cmask) == c->code) {
1428 /* hw.flags zeroed at initialization */
1429 event->hw.flags |= c->flags; 1453 event->hw.flags |= c->flags;
1430 return c; 1454 return c;
1431 } 1455 }
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1473static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1497static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1474 struct perf_event *event) 1498 struct perf_event *event)
1475{ 1499{
1476 event->hw.flags = 0;
1477 intel_put_shared_regs_event_constraints(cpuc, event); 1500 intel_put_shared_regs_event_constraints(cpuc, event);
1478} 1501}
1479 1502
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
1646 } 1669 }
1647} 1670}
1648 1671
1672static int hsw_hw_config(struct perf_event *event)
1673{
1674 int ret = intel_pmu_hw_config(event);
1675
1676 if (ret)
1677 return ret;
1678 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
1679 return 0;
1680 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
1681
1682 /*
1683 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
1684 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
1685 * this combination.
1686 */
1687 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
1688 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
1689 event->attr.precise_ip > 0))
1690 return -EOPNOTSUPP;
1691
1692 return 0;
1693}
1694
1695static struct event_constraint counter2_constraint =
1696 EVENT_CONSTRAINT(0, 0x4, 0);
1697
1698static struct event_constraint *
1699hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1700{
1701 struct event_constraint *c = intel_get_event_constraints(cpuc, event);
1702
1703 /* Handle special quirk on in_tx_checkpointed only in counter 2 */
1704 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
1705 if (c->idxmsk64 & (1U << 2))
1706 return &counter2_constraint;
1707 return &emptyconstraint;
1708 }
1709
1710 return c;
1711}
1712
1649PMU_FORMAT_ATTR(event, "config:0-7" ); 1713PMU_FORMAT_ATTR(event, "config:0-7" );
1650PMU_FORMAT_ATTR(umask, "config:8-15" ); 1714PMU_FORMAT_ATTR(umask, "config:8-15" );
1651PMU_FORMAT_ATTR(edge, "config:18" ); 1715PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
1653PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 1717PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1654PMU_FORMAT_ATTR(inv, "config:23" ); 1718PMU_FORMAT_ATTR(inv, "config:23" );
1655PMU_FORMAT_ATTR(cmask, "config:24-31" ); 1719PMU_FORMAT_ATTR(cmask, "config:24-31" );
1720PMU_FORMAT_ATTR(in_tx, "config:32");
1721PMU_FORMAT_ATTR(in_tx_cp, "config:33");
1656 1722
1657static struct attribute *intel_arch_formats_attr[] = { 1723static struct attribute *intel_arch_formats_attr[] = {
1658 &format_attr_event.attr, 1724 &format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
1807 &format_attr_any.attr, 1873 &format_attr_any.attr,
1808 &format_attr_inv.attr, 1874 &format_attr_inv.attr,
1809 &format_attr_cmask.attr, 1875 &format_attr_cmask.attr,
1876 &format_attr_in_tx.attr,
1877 &format_attr_in_tx_cp.attr,
1810 1878
1811 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 1879 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1812 &format_attr_ldlat.attr, /* PEBS load latency */ 1880 &format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
1966 } 2034 }
1967} 2035}
1968 2036
2037EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
2038EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
2039
2040static struct attribute *hsw_events_attrs[] = {
2041 EVENT_PTR(mem_ld_hsw),
2042 EVENT_PTR(mem_st_hsw),
2043 NULL
2044};
2045
1969__init int intel_pmu_init(void) 2046__init int intel_pmu_init(void)
1970{ 2047{
1971 union cpuid10_edx edx; 2048 union cpuid10_edx edx;
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
2189 break; 2266 break;
2190 2267
2191 2268
2269 case 60: /* Haswell Client */
2270 case 70:
2271 case 71:
2272 case 63:
2273 x86_pmu.late_ack = true;
2274 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2275 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2276
2277 intel_pmu_lbr_init_snb();
2278
2279 x86_pmu.event_constraints = intel_hsw_event_constraints;
2280 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2281 x86_pmu.extra_regs = intel_snb_extra_regs;
2282 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2283 /* all extra regs are per-cpu when HT is on */
2284 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2285 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2286
2287 x86_pmu.hw_config = hsw_hw_config;
2288 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2289 x86_pmu.cpu_events = hsw_events_attrs;
2290 pr_cont("Haswell events, ");
2291 break;
2292
2192 default: 2293 default:
2193 switch (x86_pmu.version) { 2294 switch (x86_pmu.version) {
2194 case 1: 2295 case 1:
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
2227 * counter, so do not extend mask to generic counters 2328 * counter, so do not extend mask to generic counters
2228 */ 2329 */
2229 for_each_event_constraint(c, x86_pmu.event_constraints) { 2330 for_each_event_constraint(c, x86_pmu.event_constraints) {
2230 if (c->cmask != X86_RAW_EVENT_MASK 2331 if (c->cmask != FIXED_EVENT_FLAGS
2231 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { 2332 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2232 continue; 2333 continue;
2233 } 2334 }
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
2237 } 2338 }
2238 } 2339 }
2239 2340
2341 /* Support full width counters using alternative MSR range */
2342 if (x86_pmu.intel_cap.full_width_write) {
2343 x86_pmu.max_period = x86_pmu.cntval_mask;
2344 x86_pmu.perfctr = MSR_IA32_PMC0;
2345 pr_cont("full-width counters, ");
2346 }
2347
2240 return 0; 2348 return 0;
2241} 2349}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f687052..3065c57a63c1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
107 return val; 107 return val;
108} 108}
109 109
110static u64 precise_store_data_hsw(u64 status)
111{
112 union perf_mem_data_src dse;
113
114 dse.val = 0;
115 dse.mem_op = PERF_MEM_OP_STORE;
116 dse.mem_lvl = PERF_MEM_LVL_NA;
117 if (status & 1)
118 dse.mem_lvl = PERF_MEM_LVL_L1;
119 /* Nothing else supported. Sorry. */
120 return dse.val;
121}
122
110static u64 load_latency_data(u64 status) 123static u64 load_latency_data(u64 status)
111{ 124{
112 union intel_x86_pebs_dse dse; 125 union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
165 u64 status, dla, dse, lat; 178 u64 status, dla, dse, lat;
166}; 179};
167 180
181/*
182 * Same as pebs_record_nhm, with two additional fields.
183 */
184struct pebs_record_hsw {
185 struct pebs_record_nhm nhm;
186 /*
187 * Real IP of the event. In the Intel documentation this
188 * is called eventingrip.
189 */
190 u64 real_ip;
191 /*
192 * TSX tuning information field: abort cycles and abort flags.
193 */
194 u64 tsx_tuning;
195};
196
168void init_debug_store_on_cpu(int cpu) 197void init_debug_store_on_cpu(int cpu)
169{ 198{
170 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 199 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
548 EVENT_CONSTRAINT_END 577 EVENT_CONSTRAINT_END
549}; 578};
550 579
580struct event_constraint intel_hsw_pebs_event_constraints[] = {
581 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
582 INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
583 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
584 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
585 INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
586 INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
587 INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
588 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
589 /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
590 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
591 /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
592 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
593 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
594 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
595 /* MEM_UOPS_RETIRED.SPLIT_STORES */
596 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
597 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
598 INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
599 INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
600 INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
601 INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
602 /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
603 INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
604 /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
605 INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
606 /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
607 INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
608 /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
609 INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
610 INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
611 INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
612
613 EVENT_CONSTRAINT_END
614};
615
551struct event_constraint *intel_pebs_constraints(struct perf_event *event) 616struct event_constraint *intel_pebs_constraints(struct perf_event *event)
552{ 617{
553 struct event_constraint *c; 618 struct event_constraint *c;
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
588 struct hw_perf_event *hwc = &event->hw; 653 struct hw_perf_event *hwc = &event->hw;
589 654
590 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 655 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
656
657 if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
658 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
659 else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
660 cpuc->pebs_enabled &= ~(1ULL << 63);
661
591 if (cpuc->enabled) 662 if (cpuc->enabled)
592 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 663 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
593 664
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
697 */ 768 */
698 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 769 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
699 struct pebs_record_nhm *pebs = __pebs; 770 struct pebs_record_nhm *pebs = __pebs;
771 struct pebs_record_hsw *pebs_hsw = __pebs;
700 struct perf_sample_data data; 772 struct perf_sample_data data;
701 struct pt_regs regs; 773 struct pt_regs regs;
702 u64 sample_type; 774 u64 sample_type;
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
706 return; 778 return;
707 779
708 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; 780 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
709 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; 781 fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
782 PERF_X86_EVENT_PEBS_ST_HSW);
710 783
711 perf_sample_data_init(&data, 0, event->hw.last_period); 784 perf_sample_data_init(&data, 0, event->hw.last_period);
712 785
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
717 * if PEBS-LL or PreciseStore 790 * if PEBS-LL or PreciseStore
718 */ 791 */
719 if (fll || fst) { 792 if (fll || fst) {
720 if (sample_type & PERF_SAMPLE_ADDR)
721 data.addr = pebs->dla;
722
723 /* 793 /*
724 * Use latency for weight (only avail with PEBS-LL) 794 * Use latency for weight (only avail with PEBS-LL)
725 */ 795 */
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
732 if (sample_type & PERF_SAMPLE_DATA_SRC) { 802 if (sample_type & PERF_SAMPLE_DATA_SRC) {
733 if (fll) 803 if (fll)
734 data.data_src.val = load_latency_data(pebs->dse); 804 data.data_src.val = load_latency_data(pebs->dse);
805 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
806 data.data_src.val =
807 precise_store_data_hsw(pebs->dse);
735 else 808 else
736 data.data_src.val = precise_store_data(pebs->dse); 809 data.data_src.val = precise_store_data(pebs->dse);
737 } 810 }
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
753 regs.bp = pebs->bp; 826 regs.bp = pebs->bp;
754 regs.sp = pebs->sp; 827 regs.sp = pebs->sp;
755 828
756 if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 829 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
830 regs.ip = pebs_hsw->real_ip;
831 regs.flags |= PERF_EFLAGS_EXACT;
832 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
757 regs.flags |= PERF_EFLAGS_EXACT; 833 regs.flags |= PERF_EFLAGS_EXACT;
758 else 834 else
759 regs.flags &= ~PERF_EFLAGS_EXACT; 835 regs.flags &= ~PERF_EFLAGS_EXACT;
760 836
837 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
838 x86_pmu.intel_cap.pebs_format >= 1)
839 data.addr = pebs->dla;
840
761 if (has_branch_stack(event)) 841 if (has_branch_stack(event))
762 data.br_stack = &cpuc->lbr_stack; 842 data.br_stack = &cpuc->lbr_stack;
763 843
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
806 __intel_pmu_pebs_event(event, iregs, at); 886 __intel_pmu_pebs_event(event, iregs, at);
807} 887}
808 888
809static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 889static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
890 void *top)
810{ 891{
811 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 892 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
812 struct debug_store *ds = cpuc->ds; 893 struct debug_store *ds = cpuc->ds;
813 struct pebs_record_nhm *at, *top;
814 struct perf_event *event = NULL; 894 struct perf_event *event = NULL;
815 u64 status = 0; 895 u64 status = 0;
816 int bit, n; 896 int bit;
817
818 if (!x86_pmu.pebs_active)
819 return;
820
821 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
822 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
823 897
824 ds->pebs_index = ds->pebs_buffer_base; 898 ds->pebs_index = ds->pebs_buffer_base;
825 899
826 n = top - at; 900 for (; at < top; at += x86_pmu.pebs_record_size) {
827 if (n <= 0) 901 struct pebs_record_nhm *p = at;
828 return;
829
830 /*
831 * Should not happen, we program the threshold at 1 and do not
832 * set a reset value.
833 */
834 WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
835 902
836 for ( ; at < top; at++) { 903 for_each_set_bit(bit, (unsigned long *)&p->status,
837 for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { 904 x86_pmu.max_pebs_events) {
838 event = cpuc->events[bit]; 905 event = cpuc->events[bit];
839 if (!test_bit(bit, cpuc->active_mask)) 906 if (!test_bit(bit, cpuc->active_mask))
840 continue; 907 continue;
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
857 } 924 }
858} 925}
859 926
927static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
928{
929 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
930 struct debug_store *ds = cpuc->ds;
931 struct pebs_record_nhm *at, *top;
932 int n;
933
934 if (!x86_pmu.pebs_active)
935 return;
936
937 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
938 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
939
940 ds->pebs_index = ds->pebs_buffer_base;
941
942 n = top - at;
943 if (n <= 0)
944 return;
945
946 /*
947 * Should not happen, we program the threshold at 1 and do not
948 * set a reset value.
949 */
950 WARN_ONCE(n > x86_pmu.max_pebs_events,
951 "Unexpected number of pebs records %d\n", n);
952
953 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
954}
955
956static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
957{
958 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
959 struct debug_store *ds = cpuc->ds;
960 struct pebs_record_hsw *at, *top;
961 int n;
962
963 if (!x86_pmu.pebs_active)
964 return;
965
966 at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
967 top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
968
969 n = top - at;
970 if (n <= 0)
971 return;
972 /*
973 * Should not happen, we program the threshold at 1 and do not
974 * set a reset value.
975 */
976 WARN_ONCE(n > x86_pmu.max_pebs_events,
977 "Unexpected number of pebs records %d\n", n);
978
979 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
980}
981
860/* 982/*
861 * BTS, PEBS probe and setup 983 * BTS, PEBS probe and setup
862 */ 984 */
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
888 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1010 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
889 break; 1011 break;
890 1012
1013 case 2:
1014 pr_cont("PEBS fmt2%c, ", pebs_type);
1015 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1016 x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
1017 break;
1018
891 default: 1019 default:
892 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 1020 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
893 x86_pmu.pebs = 0; 1021 x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353c939b..d5be06a5005e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
12 LBR_FORMAT_LIP = 0x01, 12 LBR_FORMAT_LIP = 0x01,
13 LBR_FORMAT_EIP = 0x02, 13 LBR_FORMAT_EIP = 0x02,
14 LBR_FORMAT_EIP_FLAGS = 0x03, 14 LBR_FORMAT_EIP_FLAGS = 0x03,
15 LBR_FORMAT_EIP_FLAGS2 = 0x04,
16 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
17};
18
19static enum {
20 LBR_EIP_FLAGS = 1,
21 LBR_TSX = 2,
22} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
23 [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
24 [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
15}; 25};
16 26
17/* 27/*
@@ -56,6 +66,8 @@ enum {
56 LBR_FAR) 66 LBR_FAR)
57 67
58#define LBR_FROM_FLAG_MISPRED (1ULL << 63) 68#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
69#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
70#define LBR_FROM_FLAG_ABORT (1ULL << 61)
59 71
60#define for_each_branch_sample_type(x) \ 72#define for_each_branch_sample_type(x) \
61 for ((x) = PERF_SAMPLE_BRANCH_USER; \ 73 for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
81 X86_BR_JMP = 1 << 9, /* jump */ 93 X86_BR_JMP = 1 << 9, /* jump */
82 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ 94 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
83 X86_BR_IND_CALL = 1 << 11,/* indirect calls */ 95 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
96 X86_BR_ABORT = 1 << 12,/* transaction abort */
97 X86_BR_IN_TX = 1 << 13,/* in transaction */
98 X86_BR_NO_TX = 1 << 14,/* not in transaction */
84}; 99};
85 100
86#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 101#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
102#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
87 103
88#define X86_BR_ANY \ 104#define X86_BR_ANY \
89 (X86_BR_CALL |\ 105 (X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
95 X86_BR_JCC |\ 111 X86_BR_JCC |\
96 X86_BR_JMP |\ 112 X86_BR_JMP |\
97 X86_BR_IRQ |\ 113 X86_BR_IRQ |\
114 X86_BR_ABORT |\
98 X86_BR_IND_CALL) 115 X86_BR_IND_CALL)
99 116
100#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 117#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
270 287
271 for (i = 0; i < x86_pmu.lbr_nr; i++) { 288 for (i = 0; i < x86_pmu.lbr_nr; i++) {
272 unsigned long lbr_idx = (tos - i) & mask; 289 unsigned long lbr_idx = (tos - i) & mask;
273 u64 from, to, mis = 0, pred = 0; 290 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
291 int skip = 0;
292 int lbr_flags = lbr_desc[lbr_format];
274 293
275 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 294 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
276 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 295 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
277 296
278 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 297 if (lbr_flags & LBR_EIP_FLAGS) {
279 mis = !!(from & LBR_FROM_FLAG_MISPRED); 298 mis = !!(from & LBR_FROM_FLAG_MISPRED);
280 pred = !mis; 299 pred = !mis;
281 from = (u64)((((s64)from) << 1) >> 1); 300 skip = 1;
301 }
302 if (lbr_flags & LBR_TSX) {
303 in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
304 abort = !!(from & LBR_FROM_FLAG_ABORT);
305 skip = 3;
282 } 306 }
307 from = (u64)((((s64)from) << skip) >> skip);
283 308
284 cpuc->lbr_entries[i].from = from; 309 cpuc->lbr_entries[i].from = from;
285 cpuc->lbr_entries[i].to = to; 310 cpuc->lbr_entries[i].to = to;
286 cpuc->lbr_entries[i].mispred = mis; 311 cpuc->lbr_entries[i].mispred = mis;
287 cpuc->lbr_entries[i].predicted = pred; 312 cpuc->lbr_entries[i].predicted = pred;
313 cpuc->lbr_entries[i].in_tx = in_tx;
314 cpuc->lbr_entries[i].abort = abort;
288 cpuc->lbr_entries[i].reserved = 0; 315 cpuc->lbr_entries[i].reserved = 0;
289 } 316 }
290 cpuc->lbr_stack.nr = i; 317 cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
310 * - in case there is no HW filter 337 * - in case there is no HW filter
311 * - in case the HW filter has errata or limitations 338 * - in case the HW filter has errata or limitations
312 */ 339 */
313static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 340static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
314{ 341{
315 u64 br_type = event->attr.branch_sample_type; 342 u64 br_type = event->attr.branch_sample_type;
316 int mask = 0; 343 int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
318 if (br_type & PERF_SAMPLE_BRANCH_USER) 345 if (br_type & PERF_SAMPLE_BRANCH_USER)
319 mask |= X86_BR_USER; 346 mask |= X86_BR_USER;
320 347
321 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { 348 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
322 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
323 return -EACCES;
324 mask |= X86_BR_KERNEL; 349 mask |= X86_BR_KERNEL;
325 }
326 350
327 /* we ignore BRANCH_HV here */ 351 /* we ignore BRANCH_HV here */
328 352
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
337 361
338 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 362 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
339 mask |= X86_BR_IND_CALL; 363 mask |= X86_BR_IND_CALL;
364
365 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
366 mask |= X86_BR_ABORT;
367
368 if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
369 mask |= X86_BR_IN_TX;
370
371 if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
372 mask |= X86_BR_NO_TX;
373
340 /* 374 /*
341 * stash actual user request into reg, it may 375 * stash actual user request into reg, it may
342 * be used by fixup code for some CPU 376 * be used by fixup code for some CPU
343 */ 377 */
344 event->hw.branch_reg.reg = mask; 378 event->hw.branch_reg.reg = mask;
345
346 return 0;
347} 379}
348 380
349/* 381/*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
391 /* 423 /*
392 * setup SW LBR filter 424 * setup SW LBR filter
393 */ 425 */
394 ret = intel_pmu_setup_sw_lbr_filter(event); 426 intel_pmu_setup_sw_lbr_filter(event);
395 if (ret)
396 return ret;
397 427
398 /* 428 /*
399 * setup HW LBR filter, if any 429 * setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
415 * decoded (e.g., text page not present), then X86_BR_NONE is 445 * decoded (e.g., text page not present), then X86_BR_NONE is
416 * returned. 446 * returned.
417 */ 447 */
418static int branch_type(unsigned long from, unsigned long to) 448static int branch_type(unsigned long from, unsigned long to, int abort)
419{ 449{
420 struct insn insn; 450 struct insn insn;
421 void *addr; 451 void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
435 if (from == 0 || to == 0) 465 if (from == 0 || to == 0)
436 return X86_BR_NONE; 466 return X86_BR_NONE;
437 467
468 if (abort)
469 return X86_BR_ABORT | to_plm;
470
438 if (from_plm == X86_BR_USER) { 471 if (from_plm == X86_BR_USER) {
439 /* 472 /*
440 * can happen if measuring at the user level only 473 * can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
581 from = cpuc->lbr_entries[i].from; 614 from = cpuc->lbr_entries[i].from;
582 to = cpuc->lbr_entries[i].to; 615 to = cpuc->lbr_entries[i].to;
583 616
584 type = branch_type(from, to); 617 type = branch_type(from, to, cpuc->lbr_entries[i].abort);
618 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
619 if (cpuc->lbr_entries[i].in_tx)
620 type |= X86_BR_IN_TX;
621 else
622 type |= X86_BR_NO_TX;
623 }
585 624
586 /* if type does not correspond, then discard */ 625 /* if type does not correspond, then discard */
587 if (type == X86_BR_NONE || (br_sel & type) != type) { 626 if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af538..9dd99751ccf9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
536 if (!uncore_box_is_fake(box)) 536 if (!uncore_box_is_fake(box))
537 reg1->alloc |= alloc; 537 reg1->alloc |= alloc;
538 538
539 return 0; 539 return NULL;
540fail: 540fail:
541 for (; i >= 0; i--) { 541 for (; i >= 0; i--) {
542 if (alloc & (0x1 << i)) 542 if (alloc & (0x1 << i))
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
644 (!uncore_box_is_fake(box) && reg1->alloc)) 644 (!uncore_box_is_fake(box) && reg1->alloc))
645 return NULL; 645 return NULL;
646again: 646again:
647 mask = 0xff << (idx * 8); 647 mask = 0xffULL << (idx * 8);
648 raw_spin_lock_irqsave(&er->lock, flags); 648 raw_spin_lock_irqsave(&er->lock, flags);
649 if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || 649 if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
650 !((config1 ^ er->config) & mask)) { 650 !((config1 ^ er->config) & mask)) {
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
1923{ 1923{
1924 struct hw_perf_event *hwc = &event->hw; 1924 struct hw_perf_event *hwc = &event->hw;
1925 struct hw_perf_event_extra *reg1 = &hwc->extra_reg; 1925 struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
1926 int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); 1926 u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
1927 u64 config = reg1->config; 1927 u64 config = reg1->config;
1928 1928
1929 /* get the non-shared control bits and shift them */ 1929 /* get the non-shared control bits and shift them */
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
2724{ 2724{
2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
2726 struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; 2726 struct event_constraint *c;
2727 int i, wmin, wmax, ret = 0; 2727 int i, wmin, wmax, ret = 0;
2728 struct hw_perf_event *hwc; 2728 struct hw_perf_event *hwc;
2729 2729
2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
2731 2731
2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
2733 hwc = &box->event_list[i]->hw;
2733 c = uncore_get_event_constraint(box, box->event_list[i]); 2734 c = uncore_get_event_constraint(box, box->event_list[i]);
2734 constraints[i] = c; 2735 hwc->constraint = c;
2735 wmin = min(wmin, c->weight); 2736 wmin = min(wmin, c->weight);
2736 wmax = max(wmax, c->weight); 2737 wmax = max(wmax, c->weight);
2737 } 2738 }
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2739 /* fastpath, try to reuse previous register */ 2740 /* fastpath, try to reuse previous register */
2740 for (i = 0; i < n; i++) { 2741 for (i = 0; i < n; i++) {
2741 hwc = &box->event_list[i]->hw; 2742 hwc = &box->event_list[i]->hw;
2742 c = constraints[i]; 2743 c = hwc->constraint;
2743 2744
2744 /* never assigned */ 2745 /* never assigned */
2745 if (hwc->idx == -1) 2746 if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2759 } 2760 }
2760 /* slow path */ 2761 /* slow path */
2761 if (i != n) 2762 if (i != n)
2762 ret = perf_assign_events(constraints, n, wmin, wmax, assign); 2763 ret = perf_assign_events(box->event_list, n,
2764 wmin, wmax, assign);
2763 2765
2764 if (!assign || ret) { 2766 if (!assign || ret) {
2765 for (i = 0; i < n; i++) 2767 for (i = 0; i < n; i++)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f9528917f6e8..47b3d00c9d89 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -337,10 +337,10 @@
337 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) 337 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
338 338
339#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) 339#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
340#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) 340#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
341 341
342#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) 342#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
343#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n))) 343#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
344 344
345/* 345/*
346 * use the 9~13 bits to select event If the 7th bit is not set, 346 * use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 60308053fdb2..0920212e6159 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
14#include <linux/kprobes.h> 14#include <linux/kprobes.h>
15#include <linux/kdebug.h> 15#include <linux/kdebug.h>
16#include <linux/nmi.h> 16#include <linux/nmi.h>
17#include <linux/debugfs.h>
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/hardirq.h> 19#include <linux/hardirq.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
@@ -29,6 +30,9 @@
29#include <asm/nmi.h> 30#include <asm/nmi.h>
30#include <asm/x86_init.h> 31#include <asm/x86_init.h>
31 32
33#define CREATE_TRACE_POINTS
34#include <trace/events/nmi.h>
35
32struct nmi_desc { 36struct nmi_desc {
33 spinlock_t lock; 37 spinlock_t lock;
34 struct list_head head; 38 struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
82 86
83#define nmi_to_desc(type) (&nmi_desc[type]) 87#define nmi_to_desc(type) (&nmi_desc[type])
84 88
89static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
90static int __init nmi_warning_debugfs(void)
91{
92 debugfs_create_u64("nmi_longest_ns", 0644,
93 arch_debugfs_dir, &nmi_longest_ns);
94 return 0;
95}
96fs_initcall(nmi_warning_debugfs);
97
85static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 98static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
86{ 99{
87 struct nmi_desc *desc = nmi_to_desc(type); 100 struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
96 * can be latched at any given time. Walk the whole list 109 * can be latched at any given time. Walk the whole list
97 * to handle those situations. 110 * to handle those situations.
98 */ 111 */
99 list_for_each_entry_rcu(a, &desc->head, list) 112 list_for_each_entry_rcu(a, &desc->head, list) {
100 handled += a->handler(type, regs); 113 u64 before, delta, whole_msecs;
114 int decimal_msecs, thishandled;
115
116 before = local_clock();
117 thishandled = a->handler(type, regs);
118 handled += thishandled;
119 delta = local_clock() - before;
120 trace_nmi_handler(a->handler, (int)delta, thishandled);
121
122 if (delta < nmi_longest_ns)
123 continue;
124
125 nmi_longest_ns = delta;
126 whole_msecs = do_div(delta, (1000 * 1000));
127 decimal_msecs = do_div(delta, 1000) % 1000;
128 printk_ratelimited(KERN_INFO
129 "INFO: NMI handler (%ps) took too long to run: "
130 "%lld.%03d msecs\n", a->handler, whole_msecs,
131 decimal_msecs);
132 }
101 133
102 rcu_read_unlock(); 134 rcu_read_unlock();
103 135
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e457..cf913587d4dd 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
43 43
44#include <asm/sigframe.h> 44#include <asm/sigframe.h>
45 45
46#ifdef CONFIG_X86_32
47# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
48#else
49# define FIX_EFLAGS __FIX_EFLAGS
50#endif
51
52#define COPY(x) do { \ 46#define COPY(x) do { \
53 get_user_ex(regs->x, &sc->x); \ 47 get_user_ex(regs->x, &sc->x); \
54} while (0) 48} while (0)
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
668 if (!failed) { 662 if (!failed) {
669 /* 663 /*
670 * Clear the direction flag as per the ABI for function entry. 664 * Clear the direction flag as per the ABI for function entry.
671 */ 665 *
672 regs->flags &= ~X86_EFLAGS_DF; 666 * Clear RF when entering the signal handler, because
673 /* 667 * it might disable possible debug exception from the
668 * signal handler.
669 *
674 * Clear TF when entering the signal handler, but 670 * Clear TF when entering the signal handler, but
675 * notify any tracer that was single-stepping it. 671 * notify any tracer that was single-stepping it.
676 * The tracer may want to single-step inside the 672 * The tracer may want to single-step inside the
677 * handler too. 673 * handler too.
678 */ 674 */
679 regs->flags &= ~X86_EFLAGS_TF; 675 regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
680 } 676 }
681 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); 677 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
682} 678}