diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_pt.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94 | ||||
-rw-r--r-- | include/trace/events/filemap.h | 8 | ||||
-rw-r--r-- | include/trace/events/kmem.h | 42 | ||||
-rw-r--r-- | include/trace/events/vmscan.h | 8 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-kmem.txt | 8 | ||||
-rw-r--r-- | tools/perf/builtin-kmem.c | 500 | ||||
-rw-r--r-- | tools/perf/util/probe-event.c | 60 | ||||
-rw-r--r-- | tools/perf/util/probe-finder.c | 73 | ||||
-rw-r--r-- | tools/perf/util/probe-finder.h | 4 |
13 files changed, 702 insertions, 156 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 329f0356ad4a..6ac5cb7a9e14 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -65,15 +65,15 @@ struct event_constraint { | |||
65 | /* | 65 | /* |
66 | * struct hw_perf_event.flags flags | 66 | * struct hw_perf_event.flags flags |
67 | */ | 67 | */ |
68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ | 68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ |
69 | #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ | 69 | #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ |
70 | #define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ | 70 | #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ |
71 | #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ | 71 | #define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ |
72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ | 72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ |
73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ | 73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ |
74 | #define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ | 74 | #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ |
75 | #define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */ | 75 | #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ |
76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ | 76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ |
77 | 77 | ||
78 | 78 | ||
79 | struct amd_nb { | 79 | struct amd_nb { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9da2400c2ec3..219d3fb423a1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -3275,7 +3275,7 @@ __init int intel_pmu_init(void) | |||
3275 | hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| | 3275 | hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| |
3276 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; | 3276 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; |
3277 | 3277 | ||
3278 | intel_pmu_lbr_init_snb(); | 3278 | intel_pmu_lbr_init_hsw(); |
3279 | 3279 | ||
3280 | x86_pmu.event_constraints = intel_bdw_event_constraints; | 3280 | x86_pmu.event_constraints = intel_bdw_event_constraints; |
3281 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; | 3281 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ca69ea56c712..813f75d71175 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -558,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = { | |||
558 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | 558 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ |
559 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | 559 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ |
560 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | 560 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ |
561 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
562 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), | ||
561 | EVENT_CONSTRAINT_END | 563 | EVENT_CONSTRAINT_END |
562 | }; | 564 | }; |
563 | 565 | ||
@@ -565,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { | |||
565 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | 567 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ |
566 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ | 568 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ |
567 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | 569 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ |
570 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
571 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), | ||
568 | EVENT_CONSTRAINT_END | 572 | EVENT_CONSTRAINT_END |
569 | }; | 573 | }; |
570 | 574 | ||
@@ -588,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { | |||
588 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ | 592 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
589 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | 593 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ |
590 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | 594 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ |
595 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
596 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), | ||
591 | EVENT_CONSTRAINT_END | 597 | EVENT_CONSTRAINT_END |
592 | }; | 598 | }; |
593 | 599 | ||
@@ -603,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = { | |||
603 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ | 609 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
604 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | 610 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ |
605 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | 611 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ |
612 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
613 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), | ||
606 | EVENT_CONSTRAINT_END | 614 | EVENT_CONSTRAINT_END |
607 | }; | 615 | }; |
608 | 616 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index f2770641c0fd..ffe666c2c6b5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c | |||
@@ -988,39 +988,36 @@ static int pt_event_add(struct perf_event *event, int mode) | |||
988 | int ret = -EBUSY; | 988 | int ret = -EBUSY; |
989 | 989 | ||
990 | if (pt->handle.event) | 990 | if (pt->handle.event) |
991 | goto out; | 991 | goto fail; |
992 | 992 | ||
993 | buf = perf_aux_output_begin(&pt->handle, event); | 993 | buf = perf_aux_output_begin(&pt->handle, event); |
994 | if (!buf) { | 994 | ret = -EINVAL; |
995 | ret = -EINVAL; | 995 | if (!buf) |
996 | goto out; | 996 | goto fail_stop; |
997 | } | ||
998 | 997 | ||
999 | pt_buffer_reset_offsets(buf, pt->handle.head); | 998 | pt_buffer_reset_offsets(buf, pt->handle.head); |
1000 | if (!buf->snapshot) { | 999 | if (!buf->snapshot) { |
1001 | ret = pt_buffer_reset_markers(buf, &pt->handle); | 1000 | ret = pt_buffer_reset_markers(buf, &pt->handle); |
1002 | if (ret) { | 1001 | if (ret) |
1003 | perf_aux_output_end(&pt->handle, 0, true); | 1002 | goto fail_end_stop; |
1004 | goto out; | ||
1005 | } | ||
1006 | } | 1003 | } |
1007 | 1004 | ||
1008 | if (mode & PERF_EF_START) { | 1005 | if (mode & PERF_EF_START) { |
1009 | pt_event_start(event, 0); | 1006 | pt_event_start(event, 0); |
1010 | if (hwc->state == PERF_HES_STOPPED) { | 1007 | ret = -EBUSY; |
1011 | pt_event_del(event, 0); | 1008 | if (hwc->state == PERF_HES_STOPPED) |
1012 | ret = -EBUSY; | 1009 | goto fail_end_stop; |
1013 | } | ||
1014 | } else { | 1010 | } else { |
1015 | hwc->state = PERF_HES_STOPPED; | 1011 | hwc->state = PERF_HES_STOPPED; |
1016 | } | 1012 | } |
1017 | 1013 | ||
1018 | ret = 0; | 1014 | return 0; |
1019 | out: | ||
1020 | |||
1021 | if (ret) | ||
1022 | hwc->state = PERF_HES_STOPPED; | ||
1023 | 1015 | ||
1016 | fail_end_stop: | ||
1017 | perf_aux_output_end(&pt->handle, 0, true); | ||
1018 | fail_stop: | ||
1019 | hwc->state = PERF_HES_STOPPED; | ||
1020 | fail: | ||
1024 | return ret; | 1021 | return ret; |
1025 | } | 1022 | } |
1026 | 1023 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index c4bb8b8e5017..999289b94025 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -62,6 +62,14 @@ | |||
62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ | 62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ |
63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ | 63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ |
64 | 64 | ||
65 | #define NR_RAPL_DOMAINS 0x4 | ||
66 | static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { | ||
67 | "pp0-core", | ||
68 | "package", | ||
69 | "dram", | ||
70 | "pp1-gpu", | ||
71 | }; | ||
72 | |||
65 | /* Clients have PP0, PKG */ | 73 | /* Clients have PP0, PKG */ |
66 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ | 74 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ |
67 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | 75 | 1<<RAPL_IDX_PKG_NRG_STAT|\ |
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \ | |||
112 | 120 | ||
113 | struct rapl_pmu { | 121 | struct rapl_pmu { |
114 | spinlock_t lock; | 122 | spinlock_t lock; |
115 | int hw_unit; /* 1/2^hw_unit Joule */ | ||
116 | int n_active; /* number of active events */ | 123 | int n_active; /* number of active events */ |
117 | struct list_head active_list; | 124 | struct list_head active_list; |
118 | struct pmu *pmu; /* pointer to rapl_pmu_class */ | 125 | struct pmu *pmu; /* pointer to rapl_pmu_class */ |
@@ -120,6 +127,7 @@ struct rapl_pmu { | |||
120 | struct hrtimer hrtimer; | 127 | struct hrtimer hrtimer; |
121 | }; | 128 | }; |
122 | 129 | ||
130 | static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */ | ||
123 | static struct pmu rapl_pmu_class; | 131 | static struct pmu rapl_pmu_class; |
124 | static cpumask_t rapl_cpu_mask; | 132 | static cpumask_t rapl_cpu_mask; |
125 | static int rapl_cntr_mask; | 133 | static int rapl_cntr_mask; |
@@ -127,6 +135,7 @@ static int rapl_cntr_mask; | |||
127 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); | 135 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); |
128 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); | 136 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); |
129 | 137 | ||
138 | static struct x86_pmu_quirk *rapl_quirks; | ||
130 | static inline u64 rapl_read_counter(struct perf_event *event) | 139 | static inline u64 rapl_read_counter(struct perf_event *event) |
131 | { | 140 | { |
132 | u64 raw; | 141 | u64 raw; |
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event) | |||
134 | return raw; | 143 | return raw; |
135 | } | 144 | } |
136 | 145 | ||
137 | static inline u64 rapl_scale(u64 v) | 146 | #define rapl_add_quirk(func_) \ |
147 | do { \ | ||
148 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
149 | .func = func_, \ | ||
150 | }; \ | ||
151 | __quirk.next = rapl_quirks; \ | ||
152 | rapl_quirks = &__quirk; \ | ||
153 | } while (0) | ||
154 | |||
155 | static inline u64 rapl_scale(u64 v, int cfg) | ||
138 | { | 156 | { |
157 | if (cfg > NR_RAPL_DOMAINS) { | ||
158 | pr_warn("invalid domain %d, failed to scale data\n", cfg); | ||
159 | return v; | ||
160 | } | ||
139 | /* | 161 | /* |
140 | * scale delta to smallest unit (1/2^32) | 162 | * scale delta to smallest unit (1/2^32) |
141 | * users must then scale back: count * 1/(1e9*2^32) to get Joules | 163 | * users must then scale back: count * 1/(1e9*2^32) to get Joules |
142 | * or use ldexp(count, -32). | 164 | * or use ldexp(count, -32). |
143 | * Watts = Joules/Time delta | 165 | * Watts = Joules/Time delta |
144 | */ | 166 | */ |
145 | return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); | 167 | return v << (32 - rapl_hw_unit[cfg - 1]); |
146 | } | 168 | } |
147 | 169 | ||
148 | static u64 rapl_event_update(struct perf_event *event) | 170 | static u64 rapl_event_update(struct perf_event *event) |
@@ -173,7 +195,7 @@ again: | |||
173 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 195 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
174 | delta >>= shift; | 196 | delta >>= shift; |
175 | 197 | ||
176 | sdelta = rapl_scale(delta); | 198 | sdelta = rapl_scale(delta, event->hw.config); |
177 | 199 | ||
178 | local64_add(sdelta, &event->count); | 200 | local64_add(sdelta, &event->count); |
179 | 201 | ||
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu) | |||
546 | cpumask_set_cpu(cpu, &rapl_cpu_mask); | 568 | cpumask_set_cpu(cpu, &rapl_cpu_mask); |
547 | } | 569 | } |
548 | 570 | ||
571 | static __init void rapl_hsw_server_quirk(void) | ||
572 | { | ||
573 | /* | ||
574 | * DRAM domain on HSW server has fixed energy unit which can be | ||
575 | * different than the unit from power unit MSR. | ||
576 | * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 | ||
577 | * of 2. Datasheet, September 2014, Reference Number: 330784-001 " | ||
578 | */ | ||
579 | rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; | ||
580 | } | ||
581 | |||
549 | static int rapl_cpu_prepare(int cpu) | 582 | static int rapl_cpu_prepare(int cpu) |
550 | { | 583 | { |
551 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | 584 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); |
552 | int phys_id = topology_physical_package_id(cpu); | 585 | int phys_id = topology_physical_package_id(cpu); |
553 | u64 ms; | 586 | u64 ms; |
554 | u64 msr_rapl_power_unit_bits; | ||
555 | 587 | ||
556 | if (pmu) | 588 | if (pmu) |
557 | return 0; | 589 | return 0; |
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu) | |||
559 | if (phys_id < 0) | 591 | if (phys_id < 0) |
560 | return -1; | 592 | return -1; |
561 | 593 | ||
562 | /* protect rdmsrl() to handle virtualization */ | ||
563 | if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) | ||
564 | return -1; | ||
565 | |||
566 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); | 594 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); |
567 | if (!pmu) | 595 | if (!pmu) |
568 | return -1; | 596 | return -1; |
569 | |||
570 | spin_lock_init(&pmu->lock); | 597 | spin_lock_init(&pmu->lock); |
571 | 598 | ||
572 | INIT_LIST_HEAD(&pmu->active_list); | 599 | INIT_LIST_HEAD(&pmu->active_list); |
573 | 600 | ||
574 | /* | ||
575 | * grab power unit as: 1/2^unit Joules | ||
576 | * | ||
577 | * we cache in local PMU instance | ||
578 | */ | ||
579 | pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; | ||
580 | pmu->pmu = &rapl_pmu_class; | 601 | pmu->pmu = &rapl_pmu_class; |
581 | 602 | ||
582 | /* | 603 | /* |
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu) | |||
586 | * divide interval by 2 to avoid lockstep (2 * 100) | 607 | * divide interval by 2 to avoid lockstep (2 * 100) |
587 | * if hw unit is 32, then we use 2 ms 1/200/2 | 608 | * if hw unit is 32, then we use 2 ms 1/200/2 |
588 | */ | 609 | */ |
589 | if (pmu->hw_unit < 32) | 610 | if (rapl_hw_unit[0] < 32) |
590 | ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); | 611 | ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1)); |
591 | else | 612 | else |
592 | ms = 2; | 613 | ms = 2; |
593 | 614 | ||
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self, | |||
655 | return NOTIFY_OK; | 676 | return NOTIFY_OK; |
656 | } | 677 | } |
657 | 678 | ||
679 | static int rapl_check_hw_unit(void) | ||
680 | { | ||
681 | u64 msr_rapl_power_unit_bits; | ||
682 | int i; | ||
683 | |||
684 | /* protect rdmsrl() to handle virtualization */ | ||
685 | if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) | ||
686 | return -1; | ||
687 | for (i = 0; i < NR_RAPL_DOMAINS; i++) | ||
688 | rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; | ||
689 | |||
690 | return 0; | ||
691 | } | ||
692 | |||
658 | static const struct x86_cpu_id rapl_cpu_match[] = { | 693 | static const struct x86_cpu_id rapl_cpu_match[] = { |
659 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, | 694 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, |
660 | [1] = {}, | 695 | [1] = {}, |
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void) | |||
664 | { | 699 | { |
665 | struct rapl_pmu *pmu; | 700 | struct rapl_pmu *pmu; |
666 | int cpu, ret; | 701 | int cpu, ret; |
702 | struct x86_pmu_quirk *quirk; | ||
703 | int i; | ||
667 | 704 | ||
668 | /* | 705 | /* |
669 | * check for Intel processor family 6 | 706 | * check for Intel processor family 6 |
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void) | |||
678 | rapl_cntr_mask = RAPL_IDX_CLN; | 715 | rapl_cntr_mask = RAPL_IDX_CLN; |
679 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; | 716 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; |
680 | break; | 717 | break; |
718 | case 63: /* Haswell-Server */ | ||
719 | rapl_add_quirk(rapl_hsw_server_quirk); | ||
720 | rapl_cntr_mask = RAPL_IDX_SRV; | ||
721 | rapl_pmu_events_group.attrs = rapl_events_srv_attr; | ||
722 | break; | ||
681 | case 60: /* Haswell */ | 723 | case 60: /* Haswell */ |
682 | case 69: /* Haswell-Celeron */ | 724 | case 69: /* Haswell-Celeron */ |
683 | rapl_cntr_mask = RAPL_IDX_HSW; | 725 | rapl_cntr_mask = RAPL_IDX_HSW; |
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void) | |||
693 | /* unsupported */ | 735 | /* unsupported */ |
694 | return 0; | 736 | return 0; |
695 | } | 737 | } |
738 | ret = rapl_check_hw_unit(); | ||
739 | if (ret) | ||
740 | return ret; | ||
696 | 741 | ||
742 | /* run cpu model quirks */ | ||
743 | for (quirk = rapl_quirks; quirk; quirk = quirk->next) | ||
744 | quirk->func(); | ||
697 | cpu_notifier_register_begin(); | 745 | cpu_notifier_register_begin(); |
698 | 746 | ||
699 | for_each_online_cpu(cpu) { | 747 | for_each_online_cpu(cpu) { |
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void) | |||
714 | 762 | ||
715 | pmu = __this_cpu_read(rapl_pmu); | 763 | pmu = __this_cpu_read(rapl_pmu); |
716 | 764 | ||
717 | pr_info("RAPL PMU detected, hw unit 2^-%d Joules," | 765 | pr_info("RAPL PMU detected," |
718 | " API unit is 2^-32 Joules," | 766 | " API unit is 2^-32 Joules," |
719 | " %d fixed counters" | 767 | " %d fixed counters" |
720 | " %llu ms ovfl timer\n", | 768 | " %llu ms ovfl timer\n", |
721 | pmu->hw_unit, | ||
722 | hweight32(rapl_cntr_mask), | 769 | hweight32(rapl_cntr_mask), |
723 | ktime_to_ms(pmu->timer_interval)); | 770 | ktime_to_ms(pmu->timer_interval)); |
724 | 771 | for (i = 0; i < NR_RAPL_DOMAINS; i++) { | |
772 | if (rapl_cntr_mask & (1 << i)) { | ||
773 | pr_info("hw unit of domain %s 2^-%d Joules\n", | ||
774 | rapl_domain_names[i], rapl_hw_unit[i]); | ||
775 | } | ||
776 | } | ||
725 | out: | 777 | out: |
726 | cpu_notifier_register_done(); | 778 | cpu_notifier_register_done(); |
727 | 779 | ||
diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 0421f49a20f7..42febb6bc1d5 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h | |||
@@ -18,14 +18,14 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, | |||
18 | TP_ARGS(page), | 18 | TP_ARGS(page), |
19 | 19 | ||
20 | TP_STRUCT__entry( | 20 | TP_STRUCT__entry( |
21 | __field(struct page *, page) | 21 | __field(unsigned long, pfn) |
22 | __field(unsigned long, i_ino) | 22 | __field(unsigned long, i_ino) |
23 | __field(unsigned long, index) | 23 | __field(unsigned long, index) |
24 | __field(dev_t, s_dev) | 24 | __field(dev_t, s_dev) |
25 | ), | 25 | ), |
26 | 26 | ||
27 | TP_fast_assign( | 27 | TP_fast_assign( |
28 | __entry->page = page; | 28 | __entry->pfn = page_to_pfn(page); |
29 | __entry->i_ino = page->mapping->host->i_ino; | 29 | __entry->i_ino = page->mapping->host->i_ino; |
30 | __entry->index = page->index; | 30 | __entry->index = page->index; |
31 | if (page->mapping->host->i_sb) | 31 | if (page->mapping->host->i_sb) |
@@ -37,8 +37,8 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, | |||
37 | TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu", | 37 | TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu", |
38 | MAJOR(__entry->s_dev), MINOR(__entry->s_dev), | 38 | MAJOR(__entry->s_dev), MINOR(__entry->s_dev), |
39 | __entry->i_ino, | 39 | __entry->i_ino, |
40 | __entry->page, | 40 | pfn_to_page(__entry->pfn), |
41 | page_to_pfn(__entry->page), | 41 | __entry->pfn, |
42 | __entry->index << PAGE_SHIFT) | 42 | __entry->index << PAGE_SHIFT) |
43 | ); | 43 | ); |
44 | 44 | ||
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 4ad10baecd4d..81ea59812117 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h | |||
@@ -154,18 +154,18 @@ TRACE_EVENT(mm_page_free, | |||
154 | TP_ARGS(page, order), | 154 | TP_ARGS(page, order), |
155 | 155 | ||
156 | TP_STRUCT__entry( | 156 | TP_STRUCT__entry( |
157 | __field( struct page *, page ) | 157 | __field( unsigned long, pfn ) |
158 | __field( unsigned int, order ) | 158 | __field( unsigned int, order ) |
159 | ), | 159 | ), |
160 | 160 | ||
161 | TP_fast_assign( | 161 | TP_fast_assign( |
162 | __entry->page = page; | 162 | __entry->pfn = page_to_pfn(page); |
163 | __entry->order = order; | 163 | __entry->order = order; |
164 | ), | 164 | ), |
165 | 165 | ||
166 | TP_printk("page=%p pfn=%lu order=%d", | 166 | TP_printk("page=%p pfn=%lu order=%d", |
167 | __entry->page, | 167 | pfn_to_page(__entry->pfn), |
168 | page_to_pfn(__entry->page), | 168 | __entry->pfn, |
169 | __entry->order) | 169 | __entry->order) |
170 | ); | 170 | ); |
171 | 171 | ||
@@ -176,18 +176,18 @@ TRACE_EVENT(mm_page_free_batched, | |||
176 | TP_ARGS(page, cold), | 176 | TP_ARGS(page, cold), |
177 | 177 | ||
178 | TP_STRUCT__entry( | 178 | TP_STRUCT__entry( |
179 | __field( struct page *, page ) | 179 | __field( unsigned long, pfn ) |
180 | __field( int, cold ) | 180 | __field( int, cold ) |
181 | ), | 181 | ), |
182 | 182 | ||
183 | TP_fast_assign( | 183 | TP_fast_assign( |
184 | __entry->page = page; | 184 | __entry->pfn = page_to_pfn(page); |
185 | __entry->cold = cold; | 185 | __entry->cold = cold; |
186 | ), | 186 | ), |
187 | 187 | ||
188 | TP_printk("page=%p pfn=%lu order=0 cold=%d", | 188 | TP_printk("page=%p pfn=%lu order=0 cold=%d", |
189 | __entry->page, | 189 | pfn_to_page(__entry->pfn), |
190 | page_to_pfn(__entry->page), | 190 | __entry->pfn, |
191 | __entry->cold) | 191 | __entry->cold) |
192 | ); | 192 | ); |
193 | 193 | ||
@@ -199,22 +199,22 @@ TRACE_EVENT(mm_page_alloc, | |||
199 | TP_ARGS(page, order, gfp_flags, migratetype), | 199 | TP_ARGS(page, order, gfp_flags, migratetype), |
200 | 200 | ||
201 | TP_STRUCT__entry( | 201 | TP_STRUCT__entry( |
202 | __field( struct page *, page ) | 202 | __field( unsigned long, pfn ) |
203 | __field( unsigned int, order ) | 203 | __field( unsigned int, order ) |
204 | __field( gfp_t, gfp_flags ) | 204 | __field( gfp_t, gfp_flags ) |
205 | __field( int, migratetype ) | 205 | __field( int, migratetype ) |
206 | ), | 206 | ), |
207 | 207 | ||
208 | TP_fast_assign( | 208 | TP_fast_assign( |
209 | __entry->page = page; | 209 | __entry->pfn = page ? page_to_pfn(page) : -1UL; |
210 | __entry->order = order; | 210 | __entry->order = order; |
211 | __entry->gfp_flags = gfp_flags; | 211 | __entry->gfp_flags = gfp_flags; |
212 | __entry->migratetype = migratetype; | 212 | __entry->migratetype = migratetype; |
213 | ), | 213 | ), |
214 | 214 | ||
215 | TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", | 215 | TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", |
216 | __entry->page, | 216 | __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, |
217 | __entry->page ? page_to_pfn(__entry->page) : 0, | 217 | __entry->pfn != -1UL ? __entry->pfn : 0, |
218 | __entry->order, | 218 | __entry->order, |
219 | __entry->migratetype, | 219 | __entry->migratetype, |
220 | show_gfp_flags(__entry->gfp_flags)) | 220 | show_gfp_flags(__entry->gfp_flags)) |
@@ -227,20 +227,20 @@ DECLARE_EVENT_CLASS(mm_page, | |||
227 | TP_ARGS(page, order, migratetype), | 227 | TP_ARGS(page, order, migratetype), |
228 | 228 | ||
229 | TP_STRUCT__entry( | 229 | TP_STRUCT__entry( |
230 | __field( struct page *, page ) | 230 | __field( unsigned long, pfn ) |
231 | __field( unsigned int, order ) | 231 | __field( unsigned int, order ) |
232 | __field( int, migratetype ) | 232 | __field( int, migratetype ) |
233 | ), | 233 | ), |
234 | 234 | ||
235 | TP_fast_assign( | 235 | TP_fast_assign( |
236 | __entry->page = page; | 236 | __entry->pfn = page ? page_to_pfn(page) : -1UL; |
237 | __entry->order = order; | 237 | __entry->order = order; |
238 | __entry->migratetype = migratetype; | 238 | __entry->migratetype = migratetype; |
239 | ), | 239 | ), |
240 | 240 | ||
241 | TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", | 241 | TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", |
242 | __entry->page, | 242 | __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, |
243 | __entry->page ? page_to_pfn(__entry->page) : 0, | 243 | __entry->pfn != -1UL ? __entry->pfn : 0, |
244 | __entry->order, | 244 | __entry->order, |
245 | __entry->migratetype, | 245 | __entry->migratetype, |
246 | __entry->order == 0) | 246 | __entry->order == 0) |
@@ -260,7 +260,7 @@ DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, | |||
260 | TP_ARGS(page, order, migratetype), | 260 | TP_ARGS(page, order, migratetype), |
261 | 261 | ||
262 | TP_printk("page=%p pfn=%lu order=%d migratetype=%d", | 262 | TP_printk("page=%p pfn=%lu order=%d migratetype=%d", |
263 | __entry->page, page_to_pfn(__entry->page), | 263 | pfn_to_page(__entry->pfn), __entry->pfn, |
264 | __entry->order, __entry->migratetype) | 264 | __entry->order, __entry->migratetype) |
265 | ); | 265 | ); |
266 | 266 | ||
@@ -275,7 +275,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
275 | alloc_migratetype, fallback_migratetype), | 275 | alloc_migratetype, fallback_migratetype), |
276 | 276 | ||
277 | TP_STRUCT__entry( | 277 | TP_STRUCT__entry( |
278 | __field( struct page *, page ) | 278 | __field( unsigned long, pfn ) |
279 | __field( int, alloc_order ) | 279 | __field( int, alloc_order ) |
280 | __field( int, fallback_order ) | 280 | __field( int, fallback_order ) |
281 | __field( int, alloc_migratetype ) | 281 | __field( int, alloc_migratetype ) |
@@ -284,7 +284,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
284 | ), | 284 | ), |
285 | 285 | ||
286 | TP_fast_assign( | 286 | TP_fast_assign( |
287 | __entry->page = page; | 287 | __entry->pfn = page_to_pfn(page); |
288 | __entry->alloc_order = alloc_order; | 288 | __entry->alloc_order = alloc_order; |
289 | __entry->fallback_order = fallback_order; | 289 | __entry->fallback_order = fallback_order; |
290 | __entry->alloc_migratetype = alloc_migratetype; | 290 | __entry->alloc_migratetype = alloc_migratetype; |
@@ -294,8 +294,8 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
294 | ), | 294 | ), |
295 | 295 | ||
296 | TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", | 296 | TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", |
297 | __entry->page, | 297 | pfn_to_page(__entry->pfn), |
298 | page_to_pfn(__entry->page), | 298 | __entry->pfn, |
299 | __entry->alloc_order, | 299 | __entry->alloc_order, |
300 | __entry->fallback_order, | 300 | __entry->fallback_order, |
301 | pageblock_order, | 301 | pageblock_order, |
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 69590b6ffc09..f66476b96264 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -336,18 +336,18 @@ TRACE_EVENT(mm_vmscan_writepage, | |||
336 | TP_ARGS(page, reclaim_flags), | 336 | TP_ARGS(page, reclaim_flags), |
337 | 337 | ||
338 | TP_STRUCT__entry( | 338 | TP_STRUCT__entry( |
339 | __field(struct page *, page) | 339 | __field(unsigned long, pfn) |
340 | __field(int, reclaim_flags) | 340 | __field(int, reclaim_flags) |
341 | ), | 341 | ), |
342 | 342 | ||
343 | TP_fast_assign( | 343 | TP_fast_assign( |
344 | __entry->page = page; | 344 | __entry->pfn = page_to_pfn(page); |
345 | __entry->reclaim_flags = reclaim_flags; | 345 | __entry->reclaim_flags = reclaim_flags; |
346 | ), | 346 | ), |
347 | 347 | ||
348 | TP_printk("page=%p pfn=%lu flags=%s", | 348 | TP_printk("page=%p pfn=%lu flags=%s", |
349 | __entry->page, | 349 | pfn_to_page(__entry->pfn), |
350 | page_to_pfn(__entry->page), | 350 | __entry->pfn, |
351 | show_reclaim_flags(__entry->reclaim_flags)) | 351 | show_reclaim_flags(__entry->reclaim_flags)) |
352 | ); | 352 | ); |
353 | 353 | ||
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 150253cc3c97..23219c65c16f 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt | |||
@@ -3,7 +3,7 @@ perf-kmem(1) | |||
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-kmem - Tool to trace/measure kernel memory(slab) properties | 6 | perf-kmem - Tool to trace/measure kernel memory properties |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
@@ -46,6 +46,12 @@ OPTIONS | |||
46 | --raw-ip:: | 46 | --raw-ip:: |
47 | Print raw ip instead of symbol | 47 | Print raw ip instead of symbol |
48 | 48 | ||
49 | --slab:: | ||
50 | Analyze SLAB allocator events. | ||
51 | |||
52 | --page:: | ||
53 | Analyze page allocator events | ||
54 | |||
49 | SEE ALSO | 55 | SEE ALSO |
50 | -------- | 56 | -------- |
51 | linkperf:perf-record[1] | 57 | linkperf:perf-record[1] |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4ebf65c79434..63ea01349b6e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -22,6 +22,11 @@ | |||
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <locale.h> | 23 | #include <locale.h> |
24 | 24 | ||
25 | static int kmem_slab; | ||
26 | static int kmem_page; | ||
27 | |||
28 | static long kmem_page_size; | ||
29 | |||
25 | struct alloc_stat; | 30 | struct alloc_stat; |
26 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); | 31 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); |
27 | 32 | ||
@@ -226,6 +231,244 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel, | |||
226 | return 0; | 231 | return 0; |
227 | } | 232 | } |
228 | 233 | ||
234 | static u64 total_page_alloc_bytes; | ||
235 | static u64 total_page_free_bytes; | ||
236 | static u64 total_page_nomatch_bytes; | ||
237 | static u64 total_page_fail_bytes; | ||
238 | static unsigned long nr_page_allocs; | ||
239 | static unsigned long nr_page_frees; | ||
240 | static unsigned long nr_page_fails; | ||
241 | static unsigned long nr_page_nomatch; | ||
242 | |||
243 | static bool use_pfn; | ||
244 | |||
245 | #define MAX_MIGRATE_TYPES 6 | ||
246 | #define MAX_PAGE_ORDER 11 | ||
247 | |||
248 | static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; | ||
249 | |||
250 | struct page_stat { | ||
251 | struct rb_node node; | ||
252 | u64 page; | ||
253 | int order; | ||
254 | unsigned gfp_flags; | ||
255 | unsigned migrate_type; | ||
256 | u64 alloc_bytes; | ||
257 | u64 free_bytes; | ||
258 | int nr_alloc; | ||
259 | int nr_free; | ||
260 | }; | ||
261 | |||
262 | static struct rb_root page_tree; | ||
263 | static struct rb_root page_alloc_tree; | ||
264 | static struct rb_root page_alloc_sorted; | ||
265 | |||
266 | static struct page_stat *search_page(unsigned long page, bool create) | ||
267 | { | ||
268 | struct rb_node **node = &page_tree.rb_node; | ||
269 | struct rb_node *parent = NULL; | ||
270 | struct page_stat *data; | ||
271 | |||
272 | while (*node) { | ||
273 | s64 cmp; | ||
274 | |||
275 | parent = *node; | ||
276 | data = rb_entry(*node, struct page_stat, node); | ||
277 | |||
278 | cmp = data->page - page; | ||
279 | if (cmp < 0) | ||
280 | node = &parent->rb_left; | ||
281 | else if (cmp > 0) | ||
282 | node = &parent->rb_right; | ||
283 | else | ||
284 | return data; | ||
285 | } | ||
286 | |||
287 | if (!create) | ||
288 | return NULL; | ||
289 | |||
290 | data = zalloc(sizeof(*data)); | ||
291 | if (data != NULL) { | ||
292 | data->page = page; | ||
293 | |||
294 | rb_link_node(&data->node, parent, node); | ||
295 | rb_insert_color(&data->node, &page_tree); | ||
296 | } | ||
297 | |||
298 | return data; | ||
299 | } | ||
300 | |||
301 | static int page_stat_cmp(struct page_stat *a, struct page_stat *b) | ||
302 | { | ||
303 | if (a->page > b->page) | ||
304 | return -1; | ||
305 | if (a->page < b->page) | ||
306 | return 1; | ||
307 | if (a->order > b->order) | ||
308 | return -1; | ||
309 | if (a->order < b->order) | ||
310 | return 1; | ||
311 | if (a->migrate_type > b->migrate_type) | ||
312 | return -1; | ||
313 | if (a->migrate_type < b->migrate_type) | ||
314 | return 1; | ||
315 | if (a->gfp_flags > b->gfp_flags) | ||
316 | return -1; | ||
317 | if (a->gfp_flags < b->gfp_flags) | ||
318 | return 1; | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create) | ||
323 | { | ||
324 | struct rb_node **node = &page_alloc_tree.rb_node; | ||
325 | struct rb_node *parent = NULL; | ||
326 | struct page_stat *data; | ||
327 | |||
328 | while (*node) { | ||
329 | s64 cmp; | ||
330 | |||
331 | parent = *node; | ||
332 | data = rb_entry(*node, struct page_stat, node); | ||
333 | |||
334 | cmp = page_stat_cmp(data, stat); | ||
335 | if (cmp < 0) | ||
336 | node = &parent->rb_left; | ||
337 | else if (cmp > 0) | ||
338 | node = &parent->rb_right; | ||
339 | else | ||
340 | return data; | ||
341 | } | ||
342 | |||
343 | if (!create) | ||
344 | return NULL; | ||
345 | |||
346 | data = zalloc(sizeof(*data)); | ||
347 | if (data != NULL) { | ||
348 | data->page = stat->page; | ||
349 | data->order = stat->order; | ||
350 | data->gfp_flags = stat->gfp_flags; | ||
351 | data->migrate_type = stat->migrate_type; | ||
352 | |||
353 | rb_link_node(&data->node, parent, node); | ||
354 | rb_insert_color(&data->node, &page_alloc_tree); | ||
355 | } | ||
356 | |||
357 | return data; | ||
358 | } | ||
359 | |||
360 | static bool valid_page(u64 pfn_or_page) | ||
361 | { | ||
362 | if (use_pfn && pfn_or_page == -1UL) | ||
363 | return false; | ||
364 | if (!use_pfn && pfn_or_page == 0) | ||
365 | return false; | ||
366 | return true; | ||
367 | } | ||
368 | |||
369 | static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, | ||
370 | struct perf_sample *sample) | ||
371 | { | ||
372 | u64 page; | ||
373 | unsigned int order = perf_evsel__intval(evsel, sample, "order"); | ||
374 | unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); | ||
375 | unsigned int migrate_type = perf_evsel__intval(evsel, sample, | ||
376 | "migratetype"); | ||
377 | u64 bytes = kmem_page_size << order; | ||
378 | struct page_stat *stat; | ||
379 | struct page_stat this = { | ||
380 | .order = order, | ||
381 | .gfp_flags = gfp_flags, | ||
382 | .migrate_type = migrate_type, | ||
383 | }; | ||
384 | |||
385 | if (use_pfn) | ||
386 | page = perf_evsel__intval(evsel, sample, "pfn"); | ||
387 | else | ||
388 | page = perf_evsel__intval(evsel, sample, "page"); | ||
389 | |||
390 | nr_page_allocs++; | ||
391 | total_page_alloc_bytes += bytes; | ||
392 | |||
393 | if (!valid_page(page)) { | ||
394 | nr_page_fails++; | ||
395 | total_page_fail_bytes += bytes; | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * This is to find the current page (with correct gfp flags and | ||
402 | * migrate type) at free event. | ||
403 | */ | ||
404 | stat = search_page(page, true); | ||
405 | if (stat == NULL) | ||
406 | return -ENOMEM; | ||
407 | |||
408 | stat->order = order; | ||
409 | stat->gfp_flags = gfp_flags; | ||
410 | stat->migrate_type = migrate_type; | ||
411 | |||
412 | this.page = page; | ||
413 | stat = search_page_alloc_stat(&this, true); | ||
414 | if (stat == NULL) | ||
415 | return -ENOMEM; | ||
416 | |||
417 | stat->nr_alloc++; | ||
418 | stat->alloc_bytes += bytes; | ||
419 | |||
420 | order_stats[order][migrate_type]++; | ||
421 | |||
422 | return 0; | ||
423 | } | ||
424 | |||
425 | static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, | ||
426 | struct perf_sample *sample) | ||
427 | { | ||
428 | u64 page; | ||
429 | unsigned int order = perf_evsel__intval(evsel, sample, "order"); | ||
430 | u64 bytes = kmem_page_size << order; | ||
431 | struct page_stat *stat; | ||
432 | struct page_stat this = { | ||
433 | .order = order, | ||
434 | }; | ||
435 | |||
436 | if (use_pfn) | ||
437 | page = perf_evsel__intval(evsel, sample, "pfn"); | ||
438 | else | ||
439 | page = perf_evsel__intval(evsel, sample, "page"); | ||
440 | |||
441 | nr_page_frees++; | ||
442 | total_page_free_bytes += bytes; | ||
443 | |||
444 | stat = search_page(page, false); | ||
445 | if (stat == NULL) { | ||
446 | pr_debug2("missing free at page %"PRIx64" (order: %d)\n", | ||
447 | page, order); | ||
448 | |||
449 | nr_page_nomatch++; | ||
450 | total_page_nomatch_bytes += bytes; | ||
451 | |||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | this.page = page; | ||
456 | this.gfp_flags = stat->gfp_flags; | ||
457 | this.migrate_type = stat->migrate_type; | ||
458 | |||
459 | rb_erase(&stat->node, &page_tree); | ||
460 | free(stat); | ||
461 | |||
462 | stat = search_page_alloc_stat(&this, false); | ||
463 | if (stat == NULL) | ||
464 | return -ENOENT; | ||
465 | |||
466 | stat->nr_free++; | ||
467 | stat->free_bytes += bytes; | ||
468 | |||
469 | return 0; | ||
470 | } | ||
471 | |||
229 | typedef int (*tracepoint_handler)(struct perf_evsel *evsel, | 472 | typedef int (*tracepoint_handler)(struct perf_evsel *evsel, |
230 | struct perf_sample *sample); | 473 | struct perf_sample *sample); |
231 | 474 | ||
@@ -270,8 +513,9 @@ static double fragmentation(unsigned long n_req, unsigned long n_alloc) | |||
270 | return 100.0 - (100.0 * n_req / n_alloc); | 513 | return 100.0 - (100.0 * n_req / n_alloc); |
271 | } | 514 | } |
272 | 515 | ||
273 | static void __print_result(struct rb_root *root, struct perf_session *session, | 516 | static void __print_slab_result(struct rb_root *root, |
274 | int n_lines, int is_caller) | 517 | struct perf_session *session, |
518 | int n_lines, int is_caller) | ||
275 | { | 519 | { |
276 | struct rb_node *next; | 520 | struct rb_node *next; |
277 | struct machine *machine = &session->machines.host; | 521 | struct machine *machine = &session->machines.host; |
@@ -323,9 +567,56 @@ static void __print_result(struct rb_root *root, struct perf_session *session, | |||
323 | printf("%.105s\n", graph_dotted_line); | 567 | printf("%.105s\n", graph_dotted_line); |
324 | } | 568 | } |
325 | 569 | ||
326 | static void print_summary(void) | 570 | static const char * const migrate_type_str[] = { |
571 | "UNMOVABL", | ||
572 | "RECLAIM", | ||
573 | "MOVABLE", | ||
574 | "RESERVED", | ||
575 | "CMA/ISLT", | ||
576 | "UNKNOWN", | ||
577 | }; | ||
578 | |||
579 | static void __print_page_result(struct rb_root *root, | ||
580 | struct perf_session *session __maybe_unused, | ||
581 | int n_lines) | ||
582 | { | ||
583 | struct rb_node *next = rb_first(root); | ||
584 | const char *format; | ||
585 | |||
586 | printf("\n%.80s\n", graph_dotted_line); | ||
587 | printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n", | ||
588 | use_pfn ? "PFN" : "Page"); | ||
589 | printf("%.80s\n", graph_dotted_line); | ||
590 | |||
591 | if (use_pfn) | ||
592 | format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n"; | ||
593 | else | ||
594 | format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n"; | ||
595 | |||
596 | while (next && n_lines--) { | ||
597 | struct page_stat *data; | ||
598 | |||
599 | data = rb_entry(next, struct page_stat, node); | ||
600 | |||
601 | printf(format, (unsigned long long)data->page, | ||
602 | (unsigned long long)data->alloc_bytes / 1024, | ||
603 | data->nr_alloc, data->order, | ||
604 | migrate_type_str[data->migrate_type], | ||
605 | (unsigned long)data->gfp_flags); | ||
606 | |||
607 | next = rb_next(next); | ||
608 | } | ||
609 | |||
610 | if (n_lines == -1) | ||
611 | printf(" ... | ... | ... | ... | ... | ... \n"); | ||
612 | |||
613 | printf("%.80s\n", graph_dotted_line); | ||
614 | } | ||
615 | |||
616 | static void print_slab_summary(void) | ||
327 | { | 617 | { |
328 | printf("\nSUMMARY\n=======\n"); | 618 | printf("\nSUMMARY (SLAB allocator)"); |
619 | printf("\n========================\n"); | ||
329 | printf("Total bytes requested: %'lu\n", total_requested); | 620 | printf("Total bytes requested: %'lu\n", total_requested); |
330 | printf("Total bytes allocated: %'lu\n", total_allocated); | 621 | printf("Total bytes allocated: %'lu\n", total_allocated); |
331 | printf("Total bytes wasted on internal fragmentation: %'lu\n", | 622 | printf("Total bytes wasted on internal fragmentation: %'lu\n", |
@@ -335,13 +626,73 @@ static void print_summary(void) | |||
335 | printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); | 626 | printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); |
336 | } | 627 | } |
337 | 628 | ||
338 | static void print_result(struct perf_session *session) | 629 | static void print_page_summary(void) |
630 | { | ||
631 | int o, m; | ||
632 | u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch; | ||
633 | u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes; | ||
634 | |||
635 | printf("\nSUMMARY (page allocator)"); | ||
636 | printf("\n========================\n"); | ||
637 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests", | ||
638 | nr_page_allocs, total_page_alloc_bytes / 1024); | ||
639 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests", | ||
640 | nr_page_frees, total_page_free_bytes / 1024); | ||
641 | printf("\n"); | ||
642 | |||
643 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", | ||
644 | nr_alloc_freed, (total_alloc_freed_bytes) / 1024); | ||
645 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", | ||
646 | nr_page_allocs - nr_alloc_freed, | ||
647 | (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); | ||
648 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", | ||
649 | nr_page_nomatch, total_page_nomatch_bytes / 1024); | ||
650 | printf("\n"); | ||
651 | |||
652 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures", | ||
653 | nr_page_fails, total_page_fail_bytes / 1024); | ||
654 | printf("\n"); | ||
655 | |||
656 | printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable", | ||
657 | "Reclaimable", "Movable", "Reserved", "CMA/Isolated"); | ||
658 | printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line, | ||
659 | graph_dotted_line, graph_dotted_line, graph_dotted_line, | ||
660 | graph_dotted_line, graph_dotted_line); | ||
661 | |||
662 | for (o = 0; o < MAX_PAGE_ORDER; o++) { | ||
663 | printf("%5d", o); | ||
664 | for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) { | ||
665 | if (order_stats[o][m]) | ||
666 | printf(" %'12d", order_stats[o][m]); | ||
667 | else | ||
668 | printf(" %12c", '.'); | ||
669 | } | ||
670 | printf("\n"); | ||
671 | } | ||
672 | } | ||
673 | |||
674 | static void print_slab_result(struct perf_session *session) | ||
339 | { | 675 | { |
340 | if (caller_flag) | 676 | if (caller_flag) |
341 | __print_result(&root_caller_sorted, session, caller_lines, 1); | 677 | __print_slab_result(&root_caller_sorted, session, caller_lines, 1); |
678 | if (alloc_flag) | ||
679 | __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0); | ||
680 | print_slab_summary(); | ||
681 | } | ||
682 | |||
683 | static void print_page_result(struct perf_session *session) | ||
684 | { | ||
342 | if (alloc_flag) | 685 | if (alloc_flag) |
343 | __print_result(&root_alloc_sorted, session, alloc_lines, 0); | 686 | __print_page_result(&page_alloc_sorted, session, alloc_lines); |
344 | print_summary(); | 687 | print_page_summary(); |
688 | } | ||
689 | |||
690 | static void print_result(struct perf_session *session) | ||
691 | { | ||
692 | if (kmem_slab) | ||
693 | print_slab_result(session); | ||
694 | if (kmem_page) | ||
695 | print_page_result(session); | ||
345 | } | 696 | } |
346 | 697 | ||
347 | struct sort_dimension { | 698 | struct sort_dimension { |
@@ -353,8 +704,8 @@ struct sort_dimension { | |||
353 | static LIST_HEAD(caller_sort); | 704 | static LIST_HEAD(caller_sort); |
354 | static LIST_HEAD(alloc_sort); | 705 | static LIST_HEAD(alloc_sort); |
355 | 706 | ||
356 | static void sort_insert(struct rb_root *root, struct alloc_stat *data, | 707 | static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, |
357 | struct list_head *sort_list) | 708 | struct list_head *sort_list) |
358 | { | 709 | { |
359 | struct rb_node **new = &(root->rb_node); | 710 | struct rb_node **new = &(root->rb_node); |
360 | struct rb_node *parent = NULL; | 711 | struct rb_node *parent = NULL; |
@@ -383,8 +734,8 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data, | |||
383 | rb_insert_color(&data->node, root); | 734 | rb_insert_color(&data->node, root); |
384 | } | 735 | } |
385 | 736 | ||
386 | static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, | 737 | static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted, |
387 | struct list_head *sort_list) | 738 | struct list_head *sort_list) |
388 | { | 739 | { |
389 | struct rb_node *node; | 740 | struct rb_node *node; |
390 | struct alloc_stat *data; | 741 | struct alloc_stat *data; |
@@ -396,26 +747,79 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, | |||
396 | 747 | ||
397 | rb_erase(node, root); | 748 | rb_erase(node, root); |
398 | data = rb_entry(node, struct alloc_stat, node); | 749 | data = rb_entry(node, struct alloc_stat, node); |
399 | sort_insert(root_sorted, data, sort_list); | 750 | sort_slab_insert(root_sorted, data, sort_list); |
751 | } | ||
752 | } | ||
753 | |||
754 | static void sort_page_insert(struct rb_root *root, struct page_stat *data) | ||
755 | { | ||
756 | struct rb_node **new = &root->rb_node; | ||
757 | struct rb_node *parent = NULL; | ||
758 | |||
759 | while (*new) { | ||
760 | struct page_stat *this; | ||
761 | int cmp = 0; | ||
762 | |||
763 | this = rb_entry(*new, struct page_stat, node); | ||
764 | parent = *new; | ||
765 | |||
766 | /* TODO: support more sort key */ | ||
767 | cmp = data->alloc_bytes - this->alloc_bytes; | ||
768 | |||
769 | if (cmp > 0) | ||
770 | new = &parent->rb_left; | ||
771 | else | ||
772 | new = &parent->rb_right; | ||
773 | } | ||
774 | |||
775 | rb_link_node(&data->node, parent, new); | ||
776 | rb_insert_color(&data->node, root); | ||
777 | } | ||
778 | |||
779 | static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted) | ||
780 | { | ||
781 | struct rb_node *node; | ||
782 | struct page_stat *data; | ||
783 | |||
784 | for (;;) { | ||
785 | node = rb_first(root); | ||
786 | if (!node) | ||
787 | break; | ||
788 | |||
789 | rb_erase(node, root); | ||
790 | data = rb_entry(node, struct page_stat, node); | ||
791 | sort_page_insert(root_sorted, data); | ||
400 | } | 792 | } |
401 | } | 793 | } |
402 | 794 | ||
403 | static void sort_result(void) | 795 | static void sort_result(void) |
404 | { | 796 | { |
405 | __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); | 797 | if (kmem_slab) { |
406 | __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); | 798 | __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, |
799 | &alloc_sort); | ||
800 | __sort_slab_result(&root_caller_stat, &root_caller_sorted, | ||
801 | &caller_sort); | ||
802 | } | ||
803 | if (kmem_page) { | ||
804 | __sort_page_result(&page_alloc_tree, &page_alloc_sorted); | ||
805 | } | ||
407 | } | 806 | } |
408 | 807 | ||
409 | static int __cmd_kmem(struct perf_session *session) | 808 | static int __cmd_kmem(struct perf_session *session) |
410 | { | 809 | { |
411 | int err = -EINVAL; | 810 | int err = -EINVAL; |
811 | struct perf_evsel *evsel; | ||
412 | const struct perf_evsel_str_handler kmem_tracepoints[] = { | 812 | const struct perf_evsel_str_handler kmem_tracepoints[] = { |
813 | /* slab allocator */ | ||
413 | { "kmem:kmalloc", perf_evsel__process_alloc_event, }, | 814 | { "kmem:kmalloc", perf_evsel__process_alloc_event, }, |
414 | { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, | 815 | { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, |
415 | { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, | 816 | { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, |
416 | { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, | 817 | { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, |
417 | { "kmem:kfree", perf_evsel__process_free_event, }, | 818 | { "kmem:kfree", perf_evsel__process_free_event, }, |
418 | { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, | 819 | { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, |
820 | /* page allocator */ | ||
821 | { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, | ||
822 | { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, | ||
419 | }; | 823 | }; |
420 | 824 | ||
421 | if (!perf_session__has_traces(session, "kmem record")) | 825 | if (!perf_session__has_traces(session, "kmem record")) |
@@ -426,10 +830,20 @@ static int __cmd_kmem(struct perf_session *session) | |||
426 | goto out; | 830 | goto out; |
427 | } | 831 | } |
428 | 832 | ||
833 | evlist__for_each(session->evlist, evsel) { | ||
834 | if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && | ||
835 | perf_evsel__field(evsel, "pfn")) { | ||
836 | use_pfn = true; | ||
837 | break; | ||
838 | } | ||
839 | } | ||
840 | |||
429 | setup_pager(); | 841 | setup_pager(); |
430 | err = perf_session__process_events(session); | 842 | err = perf_session__process_events(session); |
431 | if (err != 0) | 843 | if (err != 0) { |
844 | pr_err("error during process events: %d\n", err); | ||
432 | goto out; | 845 | goto out; |
846 | } | ||
433 | sort_result(); | 847 | sort_result(); |
434 | print_result(session); | 848 | print_result(session); |
435 | out: | 849 | out: |
@@ -612,6 +1026,22 @@ static int parse_alloc_opt(const struct option *opt __maybe_unused, | |||
612 | return 0; | 1026 | return 0; |
613 | } | 1027 | } |
614 | 1028 | ||
1029 | static int parse_slab_opt(const struct option *opt __maybe_unused, | ||
1030 | const char *arg __maybe_unused, | ||
1031 | int unset __maybe_unused) | ||
1032 | { | ||
1033 | kmem_slab = (kmem_page + 1); | ||
1034 | return 0; | ||
1035 | } | ||
1036 | |||
1037 | static int parse_page_opt(const struct option *opt __maybe_unused, | ||
1038 | const char *arg __maybe_unused, | ||
1039 | int unset __maybe_unused) | ||
1040 | { | ||
1041 | kmem_page = (kmem_slab + 1); | ||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
615 | static int parse_line_opt(const struct option *opt __maybe_unused, | 1045 | static int parse_line_opt(const struct option *opt __maybe_unused, |
616 | const char *arg, int unset __maybe_unused) | 1046 | const char *arg, int unset __maybe_unused) |
617 | { | 1047 | { |
@@ -634,6 +1064,8 @@ static int __cmd_record(int argc, const char **argv) | |||
634 | { | 1064 | { |
635 | const char * const record_args[] = { | 1065 | const char * const record_args[] = { |
636 | "record", "-a", "-R", "-c", "1", | 1066 | "record", "-a", "-R", "-c", "1", |
1067 | }; | ||
1068 | const char * const slab_events[] = { | ||
637 | "-e", "kmem:kmalloc", | 1069 | "-e", "kmem:kmalloc", |
638 | "-e", "kmem:kmalloc_node", | 1070 | "-e", "kmem:kmalloc_node", |
639 | "-e", "kmem:kfree", | 1071 | "-e", "kmem:kfree", |
@@ -641,10 +1073,19 @@ static int __cmd_record(int argc, const char **argv) | |||
641 | "-e", "kmem:kmem_cache_alloc_node", | 1073 | "-e", "kmem:kmem_cache_alloc_node", |
642 | "-e", "kmem:kmem_cache_free", | 1074 | "-e", "kmem:kmem_cache_free", |
643 | }; | 1075 | }; |
1076 | const char * const page_events[] = { | ||
1077 | "-e", "kmem:mm_page_alloc", | ||
1078 | "-e", "kmem:mm_page_free", | ||
1079 | }; | ||
644 | unsigned int rec_argc, i, j; | 1080 | unsigned int rec_argc, i, j; |
645 | const char **rec_argv; | 1081 | const char **rec_argv; |
646 | 1082 | ||
647 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 1083 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
1084 | if (kmem_slab) | ||
1085 | rec_argc += ARRAY_SIZE(slab_events); | ||
1086 | if (kmem_page) | ||
1087 | rec_argc += ARRAY_SIZE(page_events); | ||
1088 | |||
648 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 1089 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
649 | 1090 | ||
650 | if (rec_argv == NULL) | 1091 | if (rec_argv == NULL) |
@@ -653,6 +1094,15 @@ static int __cmd_record(int argc, const char **argv) | |||
653 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 1094 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
654 | rec_argv[i] = strdup(record_args[i]); | 1095 | rec_argv[i] = strdup(record_args[i]); |
655 | 1096 | ||
1097 | if (kmem_slab) { | ||
1098 | for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) | ||
1099 | rec_argv[i] = strdup(slab_events[j]); | ||
1100 | } | ||
1101 | if (kmem_page) { | ||
1102 | for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) | ||
1103 | rec_argv[i] = strdup(page_events[j]); | ||
1104 | } | ||
1105 | |||
656 | for (j = 1; j < (unsigned int)argc; j++, i++) | 1106 | for (j = 1; j < (unsigned int)argc; j++, i++) |
657 | rec_argv[i] = argv[j]; | 1107 | rec_argv[i] = argv[j]; |
658 | 1108 | ||
@@ -679,6 +1129,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
679 | OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), | 1129 | OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), |
680 | OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), | 1130 | OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), |
681 | OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), | 1131 | OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), |
1132 | OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", | ||
1133 | parse_slab_opt), | ||
1134 | OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", | ||
1135 | parse_page_opt), | ||
682 | OPT_END() | 1136 | OPT_END() |
683 | }; | 1137 | }; |
684 | const char *const kmem_subcommands[] = { "record", "stat", NULL }; | 1138 | const char *const kmem_subcommands[] = { "record", "stat", NULL }; |
@@ -695,6 +1149,9 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
695 | if (!argc) | 1149 | if (!argc) |
696 | usage_with_options(kmem_usage, kmem_options); | 1150 | usage_with_options(kmem_usage, kmem_options); |
697 | 1151 | ||
1152 | if (kmem_slab == 0 && kmem_page == 0) | ||
1153 | kmem_slab = 1; /* for backward compatibility */ | ||
1154 | |||
698 | if (!strncmp(argv[0], "rec", 3)) { | 1155 | if (!strncmp(argv[0], "rec", 3)) { |
699 | symbol__init(NULL); | 1156 | symbol__init(NULL); |
700 | return __cmd_record(argc, argv); | 1157 | return __cmd_record(argc, argv); |
@@ -706,6 +1163,17 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
706 | if (session == NULL) | 1163 | if (session == NULL) |
707 | return -1; | 1164 | return -1; |
708 | 1165 | ||
1166 | if (kmem_page) { | ||
1167 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | ||
1168 | |||
1169 | if (evsel == NULL || evsel->tp_format == NULL) { | ||
1170 | pr_err("invalid event found.. aborting\n"); | ||
1171 | return -1; | ||
1172 | } | ||
1173 | |||
1174 | kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); | ||
1175 | } | ||
1176 | |||
709 | symbol__init(&session->header.env); | 1177 | symbol__init(&session->header.env); |
710 | 1178 | ||
711 | if (!strcmp(argv[0], "stat")) { | 1179 | if (!strcmp(argv[0], "stat")) { |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 30545ce2c712..d8bb616ff57c 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -332,6 +332,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, | |||
332 | else { | 332 | else { |
333 | result->offset += pp->offset; | 333 | result->offset += pp->offset; |
334 | result->line += pp->line; | 334 | result->line += pp->line; |
335 | result->retprobe = pp->retprobe; | ||
335 | ret = 0; | 336 | ret = 0; |
336 | } | 337 | } |
337 | 338 | ||
@@ -654,65 +655,6 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | |||
654 | return ntevs; | 655 | return ntevs; |
655 | } | 656 | } |
656 | 657 | ||
657 | /* | ||
658 | * Find a src file from a DWARF tag path. Prepend optional source path prefix | ||
659 | * and chop off leading directories that do not exist. Result is passed back as | ||
660 | * a newly allocated path on success. | ||
661 | * Return 0 if file was found and readable, -errno otherwise. | ||
662 | */ | ||
663 | static int get_real_path(const char *raw_path, const char *comp_dir, | ||
664 | char **new_path) | ||
665 | { | ||
666 | const char *prefix = symbol_conf.source_prefix; | ||
667 | |||
668 | if (!prefix) { | ||
669 | if (raw_path[0] != '/' && comp_dir) | ||
670 | /* If not an absolute path, try to use comp_dir */ | ||
671 | prefix = comp_dir; | ||
672 | else { | ||
673 | if (access(raw_path, R_OK) == 0) { | ||
674 | *new_path = strdup(raw_path); | ||
675 | return *new_path ? 0 : -ENOMEM; | ||
676 | } else | ||
677 | return -errno; | ||
678 | } | ||
679 | } | ||
680 | |||
681 | *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); | ||
682 | if (!*new_path) | ||
683 | return -ENOMEM; | ||
684 | |||
685 | for (;;) { | ||
686 | sprintf(*new_path, "%s/%s", prefix, raw_path); | ||
687 | |||
688 | if (access(*new_path, R_OK) == 0) | ||
689 | return 0; | ||
690 | |||
691 | if (!symbol_conf.source_prefix) { | ||
692 | /* In case of searching comp_dir, don't retry */ | ||
693 | zfree(new_path); | ||
694 | return -errno; | ||
695 | } | ||
696 | |||
697 | switch (errno) { | ||
698 | case ENAMETOOLONG: | ||
699 | case ENOENT: | ||
700 | case EROFS: | ||
701 | case EFAULT: | ||
702 | raw_path = strchr(++raw_path, '/'); | ||
703 | if (!raw_path) { | ||
704 | zfree(new_path); | ||
705 | return -ENOENT; | ||
706 | } | ||
707 | continue; | ||
708 | |||
709 | default: | ||
710 | zfree(new_path); | ||
711 | return -errno; | ||
712 | } | ||
713 | } | ||
714 | } | ||
715 | |||
716 | #define LINEBUF_SIZE 256 | 658 | #define LINEBUF_SIZE 256 |
717 | #define NR_ADDITIONAL_LINES 2 | 659 | #define NR_ADDITIONAL_LINES 2 |
718 | 660 | ||
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e3074230f236..b5bf9d5efeaf 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -855,11 +855,22 @@ static int probe_point_lazy_walker(const char *fname, int lineno, | |||
855 | static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) | 855 | static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) |
856 | { | 856 | { |
857 | int ret = 0; | 857 | int ret = 0; |
858 | char *fpath; | ||
858 | 859 | ||
859 | if (intlist__empty(pf->lcache)) { | 860 | if (intlist__empty(pf->lcache)) { |
861 | const char *comp_dir; | ||
862 | |||
863 | comp_dir = cu_get_comp_dir(&pf->cu_die); | ||
864 | ret = get_real_path(pf->fname, comp_dir, &fpath); | ||
865 | if (ret < 0) { | ||
866 | pr_warning("Failed to find source file path.\n"); | ||
867 | return ret; | ||
868 | } | ||
869 | |||
860 | /* Matching lazy line pattern */ | 870 | /* Matching lazy line pattern */ |
861 | ret = find_lazy_match_lines(pf->lcache, pf->fname, | 871 | ret = find_lazy_match_lines(pf->lcache, fpath, |
862 | pf->pev->point.lazy_line); | 872 | pf->pev->point.lazy_line); |
873 | free(fpath); | ||
863 | if (ret <= 0) | 874 | if (ret <= 0) |
864 | return ret; | 875 | return ret; |
865 | } | 876 | } |
@@ -1055,7 +1066,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, | |||
1055 | if (pp->function) | 1066 | if (pp->function) |
1056 | ret = find_probe_point_by_func(pf); | 1067 | ret = find_probe_point_by_func(pf); |
1057 | else if (pp->lazy_line) | 1068 | else if (pp->lazy_line) |
1058 | ret = find_probe_point_lazy(NULL, pf); | 1069 | ret = find_probe_point_lazy(&pf->cu_die, pf); |
1059 | else { | 1070 | else { |
1060 | pf->lno = pp->line; | 1071 | pf->lno = pp->line; |
1061 | ret = find_probe_point_by_line(pf); | 1072 | ret = find_probe_point_by_line(pf); |
@@ -1622,3 +1633,61 @@ found: | |||
1622 | return (ret < 0) ? ret : lf.found; | 1633 | return (ret < 0) ? ret : lf.found; |
1623 | } | 1634 | } |
1624 | 1635 | ||
1636 | /* | ||
1637 | * Find a src file from a DWARF tag path. Prepend optional source path prefix | ||
1638 | * and chop off leading directories that do not exist. Result is passed back as | ||
1639 | * a newly allocated path on success. | ||
1640 | * Return 0 if file was found and readable, -errno otherwise. | ||
1641 | */ | ||
1642 | int get_real_path(const char *raw_path, const char *comp_dir, | ||
1643 | char **new_path) | ||
1644 | { | ||
1645 | const char *prefix = symbol_conf.source_prefix; | ||
1646 | |||
1647 | if (!prefix) { | ||
1648 | if (raw_path[0] != '/' && comp_dir) | ||
1649 | /* If not an absolute path, try to use comp_dir */ | ||
1650 | prefix = comp_dir; | ||
1651 | else { | ||
1652 | if (access(raw_path, R_OK) == 0) { | ||
1653 | *new_path = strdup(raw_path); | ||
1654 | return *new_path ? 0 : -ENOMEM; | ||
1655 | } else | ||
1656 | return -errno; | ||
1657 | } | ||
1658 | } | ||
1659 | |||
1660 | *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); | ||
1661 | if (!*new_path) | ||
1662 | return -ENOMEM; | ||
1663 | |||
1664 | for (;;) { | ||
1665 | sprintf(*new_path, "%s/%s", prefix, raw_path); | ||
1666 | |||
1667 | if (access(*new_path, R_OK) == 0) | ||
1668 | return 0; | ||
1669 | |||
1670 | if (!symbol_conf.source_prefix) { | ||
1671 | /* In case of searching comp_dir, don't retry */ | ||
1672 | zfree(new_path); | ||
1673 | return -errno; | ||
1674 | } | ||
1675 | |||
1676 | switch (errno) { | ||
1677 | case ENAMETOOLONG: | ||
1678 | case ENOENT: | ||
1679 | case EROFS: | ||
1680 | case EFAULT: | ||
1681 | raw_path = strchr(++raw_path, '/'); | ||
1682 | if (!raw_path) { | ||
1683 | zfree(new_path); | ||
1684 | return -ENOENT; | ||
1685 | } | ||
1686 | continue; | ||
1687 | |||
1688 | default: | ||
1689 | zfree(new_path); | ||
1690 | return -errno; | ||
1691 | } | ||
1692 | } | ||
1693 | } | ||
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 92590b2c7e1c..ebf8c8c81453 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h | |||
@@ -55,6 +55,10 @@ extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, | |||
55 | struct variable_list **vls, | 55 | struct variable_list **vls, |
56 | int max_points, bool externs); | 56 | int max_points, bool externs); |
57 | 57 | ||
58 | /* Find a src file from a DWARF tag path */ | ||
59 | int get_real_path(const char *raw_path, const char *comp_dir, | ||
60 | char **new_path); | ||
61 | |||
58 | struct probe_finder { | 62 | struct probe_finder { |
59 | struct perf_probe_event *pev; /* Target probe event */ | 63 | struct perf_probe_event *pev; /* Target probe event */ |
60 | 64 | ||