aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.h18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c33
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c94
-rw-r--r--include/trace/events/filemap.h8
-rw-r--r--include/trace/events/kmem.h42
-rw-r--r--include/trace/events/vmscan.h8
-rw-r--r--tools/perf/Documentation/perf-kmem.txt8
-rw-r--r--tools/perf/builtin-kmem.c500
-rw-r--r--tools/perf/util/probe-event.c60
-rw-r--r--tools/perf/util/probe-finder.c73
-rw-r--r--tools/perf/util/probe-finder.h4
13 files changed, 702 insertions, 156 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 329f0356ad4a..6ac5cb7a9e14 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -65,15 +65,15 @@ struct event_constraint {
65/* 65/*
66 * struct hw_perf_event.flags flags 66 * struct hw_perf_event.flags flags
67 */ 67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ 68#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ 69#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
70#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ 70#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ 71#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
72#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ 72#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
73#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ 73#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
74#define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ 74#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
75#define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */ 75#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
76#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ 76#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
77 77
78 78
79struct amd_nb { 79struct amd_nb {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9da2400c2ec3..219d3fb423a1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -3275,7 +3275,7 @@ __init int intel_pmu_init(void)
3275 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| 3275 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
3276 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3276 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
3277 3277
3278 intel_pmu_lbr_init_snb(); 3278 intel_pmu_lbr_init_hsw();
3279 3279
3280 x86_pmu.event_constraints = intel_bdw_event_constraints; 3280 x86_pmu.event_constraints = intel_bdw_event_constraints;
3281 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; 3281 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ca69ea56c712..813f75d71175 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -558,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
558 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 558 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
559 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 559 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
560 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 560 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
561 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
562 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
561 EVENT_CONSTRAINT_END 563 EVENT_CONSTRAINT_END
562}; 564};
563 565
@@ -565,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
565 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 567 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
566 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 568 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
567 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 569 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
570 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
571 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
568 EVENT_CONSTRAINT_END 572 EVENT_CONSTRAINT_END
569}; 573};
570 574
@@ -588,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
588 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 592 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
589 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 593 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
590 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 594 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
595 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
596 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
591 EVENT_CONSTRAINT_END 597 EVENT_CONSTRAINT_END
592}; 598};
593 599
@@ -603,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
603 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 609 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
604 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 610 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
605 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 611 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
612 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
613 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
606 EVENT_CONSTRAINT_END 614 EVENT_CONSTRAINT_END
607}; 615};
608 616
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index f2770641c0fd..ffe666c2c6b5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -988,39 +988,36 @@ static int pt_event_add(struct perf_event *event, int mode)
988 int ret = -EBUSY; 988 int ret = -EBUSY;
989 989
990 if (pt->handle.event) 990 if (pt->handle.event)
991 goto out; 991 goto fail;
992 992
993 buf = perf_aux_output_begin(&pt->handle, event); 993 buf = perf_aux_output_begin(&pt->handle, event);
994 if (!buf) { 994 ret = -EINVAL;
995 ret = -EINVAL; 995 if (!buf)
996 goto out; 996 goto fail_stop;
997 }
998 997
999 pt_buffer_reset_offsets(buf, pt->handle.head); 998 pt_buffer_reset_offsets(buf, pt->handle.head);
1000 if (!buf->snapshot) { 999 if (!buf->snapshot) {
1001 ret = pt_buffer_reset_markers(buf, &pt->handle); 1000 ret = pt_buffer_reset_markers(buf, &pt->handle);
1002 if (ret) { 1001 if (ret)
1003 perf_aux_output_end(&pt->handle, 0, true); 1002 goto fail_end_stop;
1004 goto out;
1005 }
1006 } 1003 }
1007 1004
1008 if (mode & PERF_EF_START) { 1005 if (mode & PERF_EF_START) {
1009 pt_event_start(event, 0); 1006 pt_event_start(event, 0);
1010 if (hwc->state == PERF_HES_STOPPED) { 1007 ret = -EBUSY;
1011 pt_event_del(event, 0); 1008 if (hwc->state == PERF_HES_STOPPED)
1012 ret = -EBUSY; 1009 goto fail_end_stop;
1013 }
1014 } else { 1010 } else {
1015 hwc->state = PERF_HES_STOPPED; 1011 hwc->state = PERF_HES_STOPPED;
1016 } 1012 }
1017 1013
1018 ret = 0; 1014 return 0;
1019out:
1020
1021 if (ret)
1022 hwc->state = PERF_HES_STOPPED;
1023 1015
1016fail_end_stop:
1017 perf_aux_output_end(&pt->handle, 0, true);
1018fail_stop:
1019 hwc->state = PERF_HES_STOPPED;
1020fail:
1024 return ret; 1021 return ret;
1025} 1022}
1026 1023
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8e5017..999289b94025 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
62#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ 62#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
63#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ 63#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
64 64
65#define NR_RAPL_DOMAINS 0x4
66static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
67 "pp0-core",
68 "package",
69 "dram",
70 "pp1-gpu",
71};
72
65/* Clients have PP0, PKG */ 73/* Clients have PP0, PKG */
66#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ 74#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
67 1<<RAPL_IDX_PKG_NRG_STAT|\ 75 1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \
112 120
113struct rapl_pmu { 121struct rapl_pmu {
114 spinlock_t lock; 122 spinlock_t lock;
115 int hw_unit; /* 1/2^hw_unit Joule */
116 int n_active; /* number of active events */ 123 int n_active; /* number of active events */
117 struct list_head active_list; 124 struct list_head active_list;
118 struct pmu *pmu; /* pointer to rapl_pmu_class */ 125 struct pmu *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
120 struct hrtimer hrtimer; 127 struct hrtimer hrtimer;
121}; 128};
122 129
130static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
123static struct pmu rapl_pmu_class; 131static struct pmu rapl_pmu_class;
124static cpumask_t rapl_cpu_mask; 132static cpumask_t rapl_cpu_mask;
125static int rapl_cntr_mask; 133static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
127static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); 135static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
128static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); 136static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
129 137
138static struct x86_pmu_quirk *rapl_quirks;
130static inline u64 rapl_read_counter(struct perf_event *event) 139static inline u64 rapl_read_counter(struct perf_event *event)
131{ 140{
132 u64 raw; 141 u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
134 return raw; 143 return raw;
135} 144}
136 145
137static inline u64 rapl_scale(u64 v) 146#define rapl_add_quirk(func_) \
147do { \
148 static struct x86_pmu_quirk __quirk __initdata = { \
149 .func = func_, \
150 }; \
151 __quirk.next = rapl_quirks; \
152 rapl_quirks = &__quirk; \
153} while (0)
154
155static inline u64 rapl_scale(u64 v, int cfg)
138{ 156{
157 if (cfg > NR_RAPL_DOMAINS) {
158 pr_warn("invalid domain %d, failed to scale data\n", cfg);
159 return v;
160 }
139 /* 161 /*
140 * scale delta to smallest unit (1/2^32) 162 * scale delta to smallest unit (1/2^32)
141 * users must then scale back: count * 1/(1e9*2^32) to get Joules 163 * users must then scale back: count * 1/(1e9*2^32) to get Joules
142 * or use ldexp(count, -32). 164 * or use ldexp(count, -32).
143 * Watts = Joules/Time delta 165 * Watts = Joules/Time delta
144 */ 166 */
145 return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); 167 return v << (32 - rapl_hw_unit[cfg - 1]);
146} 168}
147 169
148static u64 rapl_event_update(struct perf_event *event) 170static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
173 delta = (new_raw_count << shift) - (prev_raw_count << shift); 195 delta = (new_raw_count << shift) - (prev_raw_count << shift);
174 delta >>= shift; 196 delta >>= shift;
175 197
176 sdelta = rapl_scale(delta); 198 sdelta = rapl_scale(delta, event->hw.config);
177 199
178 local64_add(sdelta, &event->count); 200 local64_add(sdelta, &event->count);
179 201
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
546 cpumask_set_cpu(cpu, &rapl_cpu_mask); 568 cpumask_set_cpu(cpu, &rapl_cpu_mask);
547} 569}
548 570
571static __init void rapl_hsw_server_quirk(void)
572{
573 /*
574 * DRAM domain on HSW server has fixed energy unit which can be
575 * different than the unit from power unit MSR.
576 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
577 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
578 */
579 rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
580}
581
549static int rapl_cpu_prepare(int cpu) 582static int rapl_cpu_prepare(int cpu)
550{ 583{
551 struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); 584 struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
552 int phys_id = topology_physical_package_id(cpu); 585 int phys_id = topology_physical_package_id(cpu);
553 u64 ms; 586 u64 ms;
554 u64 msr_rapl_power_unit_bits;
555 587
556 if (pmu) 588 if (pmu)
557 return 0; 589 return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
559 if (phys_id < 0) 591 if (phys_id < 0)
560 return -1; 592 return -1;
561 593
562 /* protect rdmsrl() to handle virtualization */
563 if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
564 return -1;
565
566 pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); 594 pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
567 if (!pmu) 595 if (!pmu)
568 return -1; 596 return -1;
569
570 spin_lock_init(&pmu->lock); 597 spin_lock_init(&pmu->lock);
571 598
572 INIT_LIST_HEAD(&pmu->active_list); 599 INIT_LIST_HEAD(&pmu->active_list);
573 600
574 /*
575 * grab power unit as: 1/2^unit Joules
576 *
577 * we cache in local PMU instance
578 */
579 pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
580 pmu->pmu = &rapl_pmu_class; 601 pmu->pmu = &rapl_pmu_class;
581 602
582 /* 603 /*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
586 * divide interval by 2 to avoid lockstep (2 * 100) 607 * divide interval by 2 to avoid lockstep (2 * 100)
587 * if hw unit is 32, then we use 2 ms 1/200/2 608 * if hw unit is 32, then we use 2 ms 1/200/2
588 */ 609 */
589 if (pmu->hw_unit < 32) 610 if (rapl_hw_unit[0] < 32)
590 ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); 611 ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
591 else 612 else
592 ms = 2; 613 ms = 2;
593 614
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
655 return NOTIFY_OK; 676 return NOTIFY_OK;
656} 677}
657 678
679static int rapl_check_hw_unit(void)
680{
681 u64 msr_rapl_power_unit_bits;
682 int i;
683
684 /* protect rdmsrl() to handle virtualization */
685 if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
686 return -1;
687 for (i = 0; i < NR_RAPL_DOMAINS; i++)
688 rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
689
690 return 0;
691}
692
658static const struct x86_cpu_id rapl_cpu_match[] = { 693static const struct x86_cpu_id rapl_cpu_match[] = {
659 [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, 694 [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
660 [1] = {}, 695 [1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
664{ 699{
665 struct rapl_pmu *pmu; 700 struct rapl_pmu *pmu;
666 int cpu, ret; 701 int cpu, ret;
702 struct x86_pmu_quirk *quirk;
703 int i;
667 704
668 /* 705 /*
669 * check for Intel processor family 6 706 * check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
678 rapl_cntr_mask = RAPL_IDX_CLN; 715 rapl_cntr_mask = RAPL_IDX_CLN;
679 rapl_pmu_events_group.attrs = rapl_events_cln_attr; 716 rapl_pmu_events_group.attrs = rapl_events_cln_attr;
680 break; 717 break;
718 case 63: /* Haswell-Server */
719 rapl_add_quirk(rapl_hsw_server_quirk);
720 rapl_cntr_mask = RAPL_IDX_SRV;
721 rapl_pmu_events_group.attrs = rapl_events_srv_attr;
722 break;
681 case 60: /* Haswell */ 723 case 60: /* Haswell */
682 case 69: /* Haswell-Celeron */ 724 case 69: /* Haswell-Celeron */
683 rapl_cntr_mask = RAPL_IDX_HSW; 725 rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
693 /* unsupported */ 735 /* unsupported */
694 return 0; 736 return 0;
695 } 737 }
738 ret = rapl_check_hw_unit();
739 if (ret)
740 return ret;
696 741
742 /* run cpu model quirks */
743 for (quirk = rapl_quirks; quirk; quirk = quirk->next)
744 quirk->func();
697 cpu_notifier_register_begin(); 745 cpu_notifier_register_begin();
698 746
699 for_each_online_cpu(cpu) { 747 for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
714 762
715 pmu = __this_cpu_read(rapl_pmu); 763 pmu = __this_cpu_read(rapl_pmu);
716 764
717 pr_info("RAPL PMU detected, hw unit 2^-%d Joules," 765 pr_info("RAPL PMU detected,"
718 " API unit is 2^-32 Joules," 766 " API unit is 2^-32 Joules,"
719 " %d fixed counters" 767 " %d fixed counters"
720 " %llu ms ovfl timer\n", 768 " %llu ms ovfl timer\n",
721 pmu->hw_unit,
722 hweight32(rapl_cntr_mask), 769 hweight32(rapl_cntr_mask),
723 ktime_to_ms(pmu->timer_interval)); 770 ktime_to_ms(pmu->timer_interval));
724 771 for (i = 0; i < NR_RAPL_DOMAINS; i++) {
772 if (rapl_cntr_mask & (1 << i)) {
773 pr_info("hw unit of domain %s 2^-%d Joules\n",
774 rapl_domain_names[i], rapl_hw_unit[i]);
775 }
776 }
725out: 777out:
726 cpu_notifier_register_done(); 778 cpu_notifier_register_done();
727 779
diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
index 0421f49a20f7..42febb6bc1d5 100644
--- a/include/trace/events/filemap.h
+++ b/include/trace/events/filemap.h
@@ -18,14 +18,14 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
18 TP_ARGS(page), 18 TP_ARGS(page),
19 19
20 TP_STRUCT__entry( 20 TP_STRUCT__entry(
21 __field(struct page *, page) 21 __field(unsigned long, pfn)
22 __field(unsigned long, i_ino) 22 __field(unsigned long, i_ino)
23 __field(unsigned long, index) 23 __field(unsigned long, index)
24 __field(dev_t, s_dev) 24 __field(dev_t, s_dev)
25 ), 25 ),
26 26
27 TP_fast_assign( 27 TP_fast_assign(
28 __entry->page = page; 28 __entry->pfn = page_to_pfn(page);
29 __entry->i_ino = page->mapping->host->i_ino; 29 __entry->i_ino = page->mapping->host->i_ino;
30 __entry->index = page->index; 30 __entry->index = page->index;
31 if (page->mapping->host->i_sb) 31 if (page->mapping->host->i_sb)
@@ -37,8 +37,8 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
37 TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu", 37 TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu",
38 MAJOR(__entry->s_dev), MINOR(__entry->s_dev), 38 MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
39 __entry->i_ino, 39 __entry->i_ino,
40 __entry->page, 40 pfn_to_page(__entry->pfn),
41 page_to_pfn(__entry->page), 41 __entry->pfn,
42 __entry->index << PAGE_SHIFT) 42 __entry->index << PAGE_SHIFT)
43); 43);
44 44
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 4ad10baecd4d..81ea59812117 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -154,18 +154,18 @@ TRACE_EVENT(mm_page_free,
154 TP_ARGS(page, order), 154 TP_ARGS(page, order),
155 155
156 TP_STRUCT__entry( 156 TP_STRUCT__entry(
157 __field( struct page *, page ) 157 __field( unsigned long, pfn )
158 __field( unsigned int, order ) 158 __field( unsigned int, order )
159 ), 159 ),
160 160
161 TP_fast_assign( 161 TP_fast_assign(
162 __entry->page = page; 162 __entry->pfn = page_to_pfn(page);
163 __entry->order = order; 163 __entry->order = order;
164 ), 164 ),
165 165
166 TP_printk("page=%p pfn=%lu order=%d", 166 TP_printk("page=%p pfn=%lu order=%d",
167 __entry->page, 167 pfn_to_page(__entry->pfn),
168 page_to_pfn(__entry->page), 168 __entry->pfn,
169 __entry->order) 169 __entry->order)
170); 170);
171 171
@@ -176,18 +176,18 @@ TRACE_EVENT(mm_page_free_batched,
176 TP_ARGS(page, cold), 176 TP_ARGS(page, cold),
177 177
178 TP_STRUCT__entry( 178 TP_STRUCT__entry(
179 __field( struct page *, page ) 179 __field( unsigned long, pfn )
180 __field( int, cold ) 180 __field( int, cold )
181 ), 181 ),
182 182
183 TP_fast_assign( 183 TP_fast_assign(
184 __entry->page = page; 184 __entry->pfn = page_to_pfn(page);
185 __entry->cold = cold; 185 __entry->cold = cold;
186 ), 186 ),
187 187
188 TP_printk("page=%p pfn=%lu order=0 cold=%d", 188 TP_printk("page=%p pfn=%lu order=0 cold=%d",
189 __entry->page, 189 pfn_to_page(__entry->pfn),
190 page_to_pfn(__entry->page), 190 __entry->pfn,
191 __entry->cold) 191 __entry->cold)
192); 192);
193 193
@@ -199,22 +199,22 @@ TRACE_EVENT(mm_page_alloc,
199 TP_ARGS(page, order, gfp_flags, migratetype), 199 TP_ARGS(page, order, gfp_flags, migratetype),
200 200
201 TP_STRUCT__entry( 201 TP_STRUCT__entry(
202 __field( struct page *, page ) 202 __field( unsigned long, pfn )
203 __field( unsigned int, order ) 203 __field( unsigned int, order )
204 __field( gfp_t, gfp_flags ) 204 __field( gfp_t, gfp_flags )
205 __field( int, migratetype ) 205 __field( int, migratetype )
206 ), 206 ),
207 207
208 TP_fast_assign( 208 TP_fast_assign(
209 __entry->page = page; 209 __entry->pfn = page ? page_to_pfn(page) : -1UL;
210 __entry->order = order; 210 __entry->order = order;
211 __entry->gfp_flags = gfp_flags; 211 __entry->gfp_flags = gfp_flags;
212 __entry->migratetype = migratetype; 212 __entry->migratetype = migratetype;
213 ), 213 ),
214 214
215 TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", 215 TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
216 __entry->page, 216 __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
217 __entry->page ? page_to_pfn(__entry->page) : 0, 217 __entry->pfn != -1UL ? __entry->pfn : 0,
218 __entry->order, 218 __entry->order,
219 __entry->migratetype, 219 __entry->migratetype,
220 show_gfp_flags(__entry->gfp_flags)) 220 show_gfp_flags(__entry->gfp_flags))
@@ -227,20 +227,20 @@ DECLARE_EVENT_CLASS(mm_page,
227 TP_ARGS(page, order, migratetype), 227 TP_ARGS(page, order, migratetype),
228 228
229 TP_STRUCT__entry( 229 TP_STRUCT__entry(
230 __field( struct page *, page ) 230 __field( unsigned long, pfn )
231 __field( unsigned int, order ) 231 __field( unsigned int, order )
232 __field( int, migratetype ) 232 __field( int, migratetype )
233 ), 233 ),
234 234
235 TP_fast_assign( 235 TP_fast_assign(
236 __entry->page = page; 236 __entry->pfn = page ? page_to_pfn(page) : -1UL;
237 __entry->order = order; 237 __entry->order = order;
238 __entry->migratetype = migratetype; 238 __entry->migratetype = migratetype;
239 ), 239 ),
240 240
241 TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", 241 TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
242 __entry->page, 242 __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
243 __entry->page ? page_to_pfn(__entry->page) : 0, 243 __entry->pfn != -1UL ? __entry->pfn : 0,
244 __entry->order, 244 __entry->order,
245 __entry->migratetype, 245 __entry->migratetype,
246 __entry->order == 0) 246 __entry->order == 0)
@@ -260,7 +260,7 @@ DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
260 TP_ARGS(page, order, migratetype), 260 TP_ARGS(page, order, migratetype),
261 261
262 TP_printk("page=%p pfn=%lu order=%d migratetype=%d", 262 TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
263 __entry->page, page_to_pfn(__entry->page), 263 pfn_to_page(__entry->pfn), __entry->pfn,
264 __entry->order, __entry->migratetype) 264 __entry->order, __entry->migratetype)
265); 265);
266 266
@@ -275,7 +275,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
275 alloc_migratetype, fallback_migratetype), 275 alloc_migratetype, fallback_migratetype),
276 276
277 TP_STRUCT__entry( 277 TP_STRUCT__entry(
278 __field( struct page *, page ) 278 __field( unsigned long, pfn )
279 __field( int, alloc_order ) 279 __field( int, alloc_order )
280 __field( int, fallback_order ) 280 __field( int, fallback_order )
281 __field( int, alloc_migratetype ) 281 __field( int, alloc_migratetype )
@@ -284,7 +284,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
284 ), 284 ),
285 285
286 TP_fast_assign( 286 TP_fast_assign(
287 __entry->page = page; 287 __entry->pfn = page_to_pfn(page);
288 __entry->alloc_order = alloc_order; 288 __entry->alloc_order = alloc_order;
289 __entry->fallback_order = fallback_order; 289 __entry->fallback_order = fallback_order;
290 __entry->alloc_migratetype = alloc_migratetype; 290 __entry->alloc_migratetype = alloc_migratetype;
@@ -294,8 +294,8 @@ TRACE_EVENT(mm_page_alloc_extfrag,
294 ), 294 ),
295 295
296 TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", 296 TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
297 __entry->page, 297 pfn_to_page(__entry->pfn),
298 page_to_pfn(__entry->page), 298 __entry->pfn,
299 __entry->alloc_order, 299 __entry->alloc_order,
300 __entry->fallback_order, 300 __entry->fallback_order,
301 pageblock_order, 301 pageblock_order,
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 69590b6ffc09..f66476b96264 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -336,18 +336,18 @@ TRACE_EVENT(mm_vmscan_writepage,
336 TP_ARGS(page, reclaim_flags), 336 TP_ARGS(page, reclaim_flags),
337 337
338 TP_STRUCT__entry( 338 TP_STRUCT__entry(
339 __field(struct page *, page) 339 __field(unsigned long, pfn)
340 __field(int, reclaim_flags) 340 __field(int, reclaim_flags)
341 ), 341 ),
342 342
343 TP_fast_assign( 343 TP_fast_assign(
344 __entry->page = page; 344 __entry->pfn = page_to_pfn(page);
345 __entry->reclaim_flags = reclaim_flags; 345 __entry->reclaim_flags = reclaim_flags;
346 ), 346 ),
347 347
348 TP_printk("page=%p pfn=%lu flags=%s", 348 TP_printk("page=%p pfn=%lu flags=%s",
349 __entry->page, 349 pfn_to_page(__entry->pfn),
350 page_to_pfn(__entry->page), 350 __entry->pfn,
351 show_reclaim_flags(__entry->reclaim_flags)) 351 show_reclaim_flags(__entry->reclaim_flags))
352); 352);
353 353
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 150253cc3c97..23219c65c16f 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -3,7 +3,7 @@ perf-kmem(1)
3 3
4NAME 4NAME
5---- 5----
6perf-kmem - Tool to trace/measure kernel memory(slab) properties 6perf-kmem - Tool to trace/measure kernel memory properties
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
@@ -46,6 +46,12 @@ OPTIONS
46--raw-ip:: 46--raw-ip::
47 Print raw ip instead of symbol 47 Print raw ip instead of symbol
48 48
49--slab::
50 Analyze SLAB allocator events.
51
52--page::
53 Analyze page allocator events
54
49SEE ALSO 55SEE ALSO
50-------- 56--------
51linkperf:perf-record[1] 57linkperf:perf-record[1]
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4ebf65c79434..63ea01349b6e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -22,6 +22,11 @@
22#include <linux/string.h> 22#include <linux/string.h>
23#include <locale.h> 23#include <locale.h>
24 24
25static int kmem_slab;
26static int kmem_page;
27
28static long kmem_page_size;
29
25struct alloc_stat; 30struct alloc_stat;
26typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 31typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
27 32
@@ -226,6 +231,244 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
226 return 0; 231 return 0;
227} 232}
228 233
234static u64 total_page_alloc_bytes;
235static u64 total_page_free_bytes;
236static u64 total_page_nomatch_bytes;
237static u64 total_page_fail_bytes;
238static unsigned long nr_page_allocs;
239static unsigned long nr_page_frees;
240static unsigned long nr_page_fails;
241static unsigned long nr_page_nomatch;
242
243static bool use_pfn;
244
245#define MAX_MIGRATE_TYPES 6
246#define MAX_PAGE_ORDER 11
247
248static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
249
250struct page_stat {
251 struct rb_node node;
252 u64 page;
253 int order;
254 unsigned gfp_flags;
255 unsigned migrate_type;
256 u64 alloc_bytes;
257 u64 free_bytes;
258 int nr_alloc;
259 int nr_free;
260};
261
262static struct rb_root page_tree;
263static struct rb_root page_alloc_tree;
264static struct rb_root page_alloc_sorted;
265
266static struct page_stat *search_page(unsigned long page, bool create)
267{
268 struct rb_node **node = &page_tree.rb_node;
269 struct rb_node *parent = NULL;
270 struct page_stat *data;
271
272 while (*node) {
273 s64 cmp;
274
275 parent = *node;
276 data = rb_entry(*node, struct page_stat, node);
277
278 cmp = data->page - page;
279 if (cmp < 0)
280 node = &parent->rb_left;
281 else if (cmp > 0)
282 node = &parent->rb_right;
283 else
284 return data;
285 }
286
287 if (!create)
288 return NULL;
289
290 data = zalloc(sizeof(*data));
291 if (data != NULL) {
292 data->page = page;
293
294 rb_link_node(&data->node, parent, node);
295 rb_insert_color(&data->node, &page_tree);
296 }
297
298 return data;
299}
300
301static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
302{
303 if (a->page > b->page)
304 return -1;
305 if (a->page < b->page)
306 return 1;
307 if (a->order > b->order)
308 return -1;
309 if (a->order < b->order)
310 return 1;
311 if (a->migrate_type > b->migrate_type)
312 return -1;
313 if (a->migrate_type < b->migrate_type)
314 return 1;
315 if (a->gfp_flags > b->gfp_flags)
316 return -1;
317 if (a->gfp_flags < b->gfp_flags)
318 return 1;
319 return 0;
320}
321
322static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
323{
324 struct rb_node **node = &page_alloc_tree.rb_node;
325 struct rb_node *parent = NULL;
326 struct page_stat *data;
327
328 while (*node) {
329 s64 cmp;
330
331 parent = *node;
332 data = rb_entry(*node, struct page_stat, node);
333
334 cmp = page_stat_cmp(data, stat);
335 if (cmp < 0)
336 node = &parent->rb_left;
337 else if (cmp > 0)
338 node = &parent->rb_right;
339 else
340 return data;
341 }
342
343 if (!create)
344 return NULL;
345
346 data = zalloc(sizeof(*data));
347 if (data != NULL) {
348 data->page = stat->page;
349 data->order = stat->order;
350 data->gfp_flags = stat->gfp_flags;
351 data->migrate_type = stat->migrate_type;
352
353 rb_link_node(&data->node, parent, node);
354 rb_insert_color(&data->node, &page_alloc_tree);
355 }
356
357 return data;
358}
359
360static bool valid_page(u64 pfn_or_page)
361{
362 if (use_pfn && pfn_or_page == -1UL)
363 return false;
364 if (!use_pfn && pfn_or_page == 0)
365 return false;
366 return true;
367}
368
369static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
370 struct perf_sample *sample)
371{
372 u64 page;
373 unsigned int order = perf_evsel__intval(evsel, sample, "order");
374 unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
375 unsigned int migrate_type = perf_evsel__intval(evsel, sample,
376 "migratetype");
377 u64 bytes = kmem_page_size << order;
378 struct page_stat *stat;
379 struct page_stat this = {
380 .order = order,
381 .gfp_flags = gfp_flags,
382 .migrate_type = migrate_type,
383 };
384
385 if (use_pfn)
386 page = perf_evsel__intval(evsel, sample, "pfn");
387 else
388 page = perf_evsel__intval(evsel, sample, "page");
389
390 nr_page_allocs++;
391 total_page_alloc_bytes += bytes;
392
393 if (!valid_page(page)) {
394 nr_page_fails++;
395 total_page_fail_bytes += bytes;
396
397 return 0;
398 }
399
400 /*
401 * This is to find the current page (with correct gfp flags and
402 * migrate type) at free event.
403 */
404 stat = search_page(page, true);
405 if (stat == NULL)
406 return -ENOMEM;
407
408 stat->order = order;
409 stat->gfp_flags = gfp_flags;
410 stat->migrate_type = migrate_type;
411
412 this.page = page;
413 stat = search_page_alloc_stat(&this, true);
414 if (stat == NULL)
415 return -ENOMEM;
416
417 stat->nr_alloc++;
418 stat->alloc_bytes += bytes;
419
420 order_stats[order][migrate_type]++;
421
422 return 0;
423}
424
425static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
426 struct perf_sample *sample)
427{
428 u64 page;
429 unsigned int order = perf_evsel__intval(evsel, sample, "order");
430 u64 bytes = kmem_page_size << order;
431 struct page_stat *stat;
432 struct page_stat this = {
433 .order = order,
434 };
435
436 if (use_pfn)
437 page = perf_evsel__intval(evsel, sample, "pfn");
438 else
439 page = perf_evsel__intval(evsel, sample, "page");
440
441 nr_page_frees++;
442 total_page_free_bytes += bytes;
443
444 stat = search_page(page, false);
445 if (stat == NULL) {
446 pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
447 page, order);
448
449 nr_page_nomatch++;
450 total_page_nomatch_bytes += bytes;
451
452 return 0;
453 }
454
455 this.page = page;
456 this.gfp_flags = stat->gfp_flags;
457 this.migrate_type = stat->migrate_type;
458
459 rb_erase(&stat->node, &page_tree);
460 free(stat);
461
462 stat = search_page_alloc_stat(&this, false);
463 if (stat == NULL)
464 return -ENOENT;
465
466 stat->nr_free++;
467 stat->free_bytes += bytes;
468
469 return 0;
470}
471
229typedef int (*tracepoint_handler)(struct perf_evsel *evsel, 472typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
230 struct perf_sample *sample); 473 struct perf_sample *sample);
231 474
@@ -270,8 +513,9 @@ static double fragmentation(unsigned long n_req, unsigned long n_alloc)
270 return 100.0 - (100.0 * n_req / n_alloc); 513 return 100.0 - (100.0 * n_req / n_alloc);
271} 514}
272 515
273static void __print_result(struct rb_root *root, struct perf_session *session, 516static void __print_slab_result(struct rb_root *root,
274 int n_lines, int is_caller) 517 struct perf_session *session,
518 int n_lines, int is_caller)
275{ 519{
276 struct rb_node *next; 520 struct rb_node *next;
277 struct machine *machine = &session->machines.host; 521 struct machine *machine = &session->machines.host;
@@ -323,9 +567,56 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
323 printf("%.105s\n", graph_dotted_line); 567 printf("%.105s\n", graph_dotted_line);
324} 568}
325 569
326static void print_summary(void) 570static const char * const migrate_type_str[] = {
571 "UNMOVABL",
572 "RECLAIM",
573 "MOVABLE",
574 "RESERVED",
575 "CMA/ISLT",
576 "UNKNOWN",
577};
578
579static void __print_page_result(struct rb_root *root,
580 struct perf_session *session __maybe_unused,
581 int n_lines)
582{
583 struct rb_node *next = rb_first(root);
584 const char *format;
585
586 printf("\n%.80s\n", graph_dotted_line);
587 printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n",
588 use_pfn ? "PFN" : "Page");
589 printf("%.80s\n", graph_dotted_line);
590
591 if (use_pfn)
592 format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n";
593 else
594 format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n";
595
596 while (next && n_lines--) {
597 struct page_stat *data;
598
599 data = rb_entry(next, struct page_stat, node);
600
601 printf(format, (unsigned long long)data->page,
602 (unsigned long long)data->alloc_bytes / 1024,
603 data->nr_alloc, data->order,
604 migrate_type_str[data->migrate_type],
605 (unsigned long)data->gfp_flags);
606
607 next = rb_next(next);
608 }
609
610 if (n_lines == -1)
611 printf(" ... | ... | ... | ... | ... | ... \n");
612
613 printf("%.80s\n", graph_dotted_line);
614}
615
616static void print_slab_summary(void)
327{ 617{
328 printf("\nSUMMARY\n=======\n"); 618 printf("\nSUMMARY (SLAB allocator)");
619 printf("\n========================\n");
329 printf("Total bytes requested: %'lu\n", total_requested); 620 printf("Total bytes requested: %'lu\n", total_requested);
330 printf("Total bytes allocated: %'lu\n", total_allocated); 621 printf("Total bytes allocated: %'lu\n", total_allocated);
331 printf("Total bytes wasted on internal fragmentation: %'lu\n", 622 printf("Total bytes wasted on internal fragmentation: %'lu\n",
@@ -335,13 +626,73 @@ static void print_summary(void)
335 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); 626 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
336} 627}
337 628
338static void print_result(struct perf_session *session) 629static void print_page_summary(void)
630{
631 int o, m;
632 u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
633 u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
634
635 printf("\nSUMMARY (page allocator)");
636 printf("\n========================\n");
637 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests",
638 nr_page_allocs, total_page_alloc_bytes / 1024);
639 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests",
640 nr_page_frees, total_page_free_bytes / 1024);
641 printf("\n");
642
643 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
644 nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
645 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
646 nr_page_allocs - nr_alloc_freed,
647 (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
648 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",
649 nr_page_nomatch, total_page_nomatch_bytes / 1024);
650 printf("\n");
651
652 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures",
653 nr_page_fails, total_page_fail_bytes / 1024);
654 printf("\n");
655
656 printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable",
657 "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
658 printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line,
659 graph_dotted_line, graph_dotted_line, graph_dotted_line,
660 graph_dotted_line, graph_dotted_line);
661
662 for (o = 0; o < MAX_PAGE_ORDER; o++) {
663 printf("%5d", o);
664 for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
665 if (order_stats[o][m])
666 printf(" %'12d", order_stats[o][m]);
667 else
668 printf(" %12c", '.');
669 }
670 printf("\n");
671 }
672}
673
674static void print_slab_result(struct perf_session *session)
339{ 675{
340 if (caller_flag) 676 if (caller_flag)
341 __print_result(&root_caller_sorted, session, caller_lines, 1); 677 __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
678 if (alloc_flag)
679 __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
680 print_slab_summary();
681}
682
683static void print_page_result(struct perf_session *session)
684{
342 if (alloc_flag) 685 if (alloc_flag)
343 __print_result(&root_alloc_sorted, session, alloc_lines, 0); 686 __print_page_result(&page_alloc_sorted, session, alloc_lines);
344 print_summary(); 687 print_page_summary();
688}
689
690static void print_result(struct perf_session *session)
691{
692 if (kmem_slab)
693 print_slab_result(session);
694 if (kmem_page)
695 print_page_result(session);
345} 696}
346 697
347struct sort_dimension { 698struct sort_dimension {
@@ -353,8 +704,8 @@ struct sort_dimension {
353static LIST_HEAD(caller_sort); 704static LIST_HEAD(caller_sort);
354static LIST_HEAD(alloc_sort); 705static LIST_HEAD(alloc_sort);
355 706
356static void sort_insert(struct rb_root *root, struct alloc_stat *data, 707static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
357 struct list_head *sort_list) 708 struct list_head *sort_list)
358{ 709{
359 struct rb_node **new = &(root->rb_node); 710 struct rb_node **new = &(root->rb_node);
360 struct rb_node *parent = NULL; 711 struct rb_node *parent = NULL;
@@ -383,8 +734,8 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data,
383 rb_insert_color(&data->node, root); 734 rb_insert_color(&data->node, root);
384} 735}
385 736
386static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, 737static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
387 struct list_head *sort_list) 738 struct list_head *sort_list)
388{ 739{
389 struct rb_node *node; 740 struct rb_node *node;
390 struct alloc_stat *data; 741 struct alloc_stat *data;
@@ -396,26 +747,79 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
396 747
397 rb_erase(node, root); 748 rb_erase(node, root);
398 data = rb_entry(node, struct alloc_stat, node); 749 data = rb_entry(node, struct alloc_stat, node);
399 sort_insert(root_sorted, data, sort_list); 750 sort_slab_insert(root_sorted, data, sort_list);
751 }
752}
753
754static void sort_page_insert(struct rb_root *root, struct page_stat *data)
755{
756 struct rb_node **new = &root->rb_node;
757 struct rb_node *parent = NULL;
758
759 while (*new) {
760 struct page_stat *this;
761 int cmp = 0;
762
763 this = rb_entry(*new, struct page_stat, node);
764 parent = *new;
765
766 /* TODO: support more sort key */
767 cmp = data->alloc_bytes - this->alloc_bytes;
768
769 if (cmp > 0)
770 new = &parent->rb_left;
771 else
772 new = &parent->rb_right;
773 }
774
775 rb_link_node(&data->node, parent, new);
776 rb_insert_color(&data->node, root);
777}
778
779static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted)
780{
781 struct rb_node *node;
782 struct page_stat *data;
783
784 for (;;) {
785 node = rb_first(root);
786 if (!node)
787 break;
788
789 rb_erase(node, root);
790 data = rb_entry(node, struct page_stat, node);
791 sort_page_insert(root_sorted, data);
400 } 792 }
401} 793}
402 794
403static void sort_result(void) 795static void sort_result(void)
404{ 796{
405 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); 797 if (kmem_slab) {
406 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); 798 __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
799 &alloc_sort);
800 __sort_slab_result(&root_caller_stat, &root_caller_sorted,
801 &caller_sort);
802 }
803 if (kmem_page) {
804 __sort_page_result(&page_alloc_tree, &page_alloc_sorted);
805 }
407} 806}
408 807
409static int __cmd_kmem(struct perf_session *session) 808static int __cmd_kmem(struct perf_session *session)
410{ 809{
411 int err = -EINVAL; 810 int err = -EINVAL;
811 struct perf_evsel *evsel;
412 const struct perf_evsel_str_handler kmem_tracepoints[] = { 812 const struct perf_evsel_str_handler kmem_tracepoints[] = {
813 /* slab allocator */
413 { "kmem:kmalloc", perf_evsel__process_alloc_event, }, 814 { "kmem:kmalloc", perf_evsel__process_alloc_event, },
414 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, 815 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
415 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, 816 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
416 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, 817 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
417 { "kmem:kfree", perf_evsel__process_free_event, }, 818 { "kmem:kfree", perf_evsel__process_free_event, },
418 { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, 819 { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
820 /* page allocator */
821 { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, },
822 { "kmem:mm_page_free", perf_evsel__process_page_free_event, },
419 }; 823 };
420 824
421 if (!perf_session__has_traces(session, "kmem record")) 825 if (!perf_session__has_traces(session, "kmem record"))
@@ -426,10 +830,20 @@ static int __cmd_kmem(struct perf_session *session)
426 goto out; 830 goto out;
427 } 831 }
428 832
833 evlist__for_each(session->evlist, evsel) {
834 if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
835 perf_evsel__field(evsel, "pfn")) {
836 use_pfn = true;
837 break;
838 }
839 }
840
429 setup_pager(); 841 setup_pager();
430 err = perf_session__process_events(session); 842 err = perf_session__process_events(session);
431 if (err != 0) 843 if (err != 0) {
844 pr_err("error during process events: %d\n", err);
432 goto out; 845 goto out;
846 }
433 sort_result(); 847 sort_result();
434 print_result(session); 848 print_result(session);
435out: 849out:
@@ -612,6 +1026,22 @@ static int parse_alloc_opt(const struct option *opt __maybe_unused,
612 return 0; 1026 return 0;
613} 1027}
614 1028
1029static int parse_slab_opt(const struct option *opt __maybe_unused,
1030 const char *arg __maybe_unused,
1031 int unset __maybe_unused)
1032{
1033 kmem_slab = (kmem_page + 1);
1034 return 0;
1035}
1036
1037static int parse_page_opt(const struct option *opt __maybe_unused,
1038 const char *arg __maybe_unused,
1039 int unset __maybe_unused)
1040{
1041 kmem_page = (kmem_slab + 1);
1042 return 0;
1043}
1044
615static int parse_line_opt(const struct option *opt __maybe_unused, 1045static int parse_line_opt(const struct option *opt __maybe_unused,
616 const char *arg, int unset __maybe_unused) 1046 const char *arg, int unset __maybe_unused)
617{ 1047{
@@ -634,6 +1064,8 @@ static int __cmd_record(int argc, const char **argv)
634{ 1064{
635 const char * const record_args[] = { 1065 const char * const record_args[] = {
636 "record", "-a", "-R", "-c", "1", 1066 "record", "-a", "-R", "-c", "1",
1067 };
1068 const char * const slab_events[] = {
637 "-e", "kmem:kmalloc", 1069 "-e", "kmem:kmalloc",
638 "-e", "kmem:kmalloc_node", 1070 "-e", "kmem:kmalloc_node",
639 "-e", "kmem:kfree", 1071 "-e", "kmem:kfree",
@@ -641,10 +1073,19 @@ static int __cmd_record(int argc, const char **argv)
641 "-e", "kmem:kmem_cache_alloc_node", 1073 "-e", "kmem:kmem_cache_alloc_node",
642 "-e", "kmem:kmem_cache_free", 1074 "-e", "kmem:kmem_cache_free",
643 }; 1075 };
1076 const char * const page_events[] = {
1077 "-e", "kmem:mm_page_alloc",
1078 "-e", "kmem:mm_page_free",
1079 };
644 unsigned int rec_argc, i, j; 1080 unsigned int rec_argc, i, j;
645 const char **rec_argv; 1081 const char **rec_argv;
646 1082
647 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1083 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1084 if (kmem_slab)
1085 rec_argc += ARRAY_SIZE(slab_events);
1086 if (kmem_page)
1087 rec_argc += ARRAY_SIZE(page_events);
1088
648 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1089 rec_argv = calloc(rec_argc + 1, sizeof(char *));
649 1090
650 if (rec_argv == NULL) 1091 if (rec_argv == NULL)
@@ -653,6 +1094,15 @@ static int __cmd_record(int argc, const char **argv)
653 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1094 for (i = 0; i < ARRAY_SIZE(record_args); i++)
654 rec_argv[i] = strdup(record_args[i]); 1095 rec_argv[i] = strdup(record_args[i]);
655 1096
1097 if (kmem_slab) {
1098 for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1099 rec_argv[i] = strdup(slab_events[j]);
1100 }
1101 if (kmem_page) {
1102 for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1103 rec_argv[i] = strdup(page_events[j]);
1104 }
1105
656 for (j = 1; j < (unsigned int)argc; j++, i++) 1106 for (j = 1; j < (unsigned int)argc; j++, i++)
657 rec_argv[i] = argv[j]; 1107 rec_argv[i] = argv[j];
658 1108
@@ -679,6 +1129,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
679 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), 1129 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
680 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 1130 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
681 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 1131 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
1132 OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1133 parse_slab_opt),
1134 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1135 parse_page_opt),
682 OPT_END() 1136 OPT_END()
683 }; 1137 };
684 const char *const kmem_subcommands[] = { "record", "stat", NULL }; 1138 const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -695,6 +1149,9 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
695 if (!argc) 1149 if (!argc)
696 usage_with_options(kmem_usage, kmem_options); 1150 usage_with_options(kmem_usage, kmem_options);
697 1151
1152 if (kmem_slab == 0 && kmem_page == 0)
1153 kmem_slab = 1; /* for backward compatibility */
1154
698 if (!strncmp(argv[0], "rec", 3)) { 1155 if (!strncmp(argv[0], "rec", 3)) {
699 symbol__init(NULL); 1156 symbol__init(NULL);
700 return __cmd_record(argc, argv); 1157 return __cmd_record(argc, argv);
@@ -706,6 +1163,17 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
706 if (session == NULL) 1163 if (session == NULL)
707 return -1; 1164 return -1;
708 1165
1166 if (kmem_page) {
1167 struct perf_evsel *evsel = perf_evlist__first(session->evlist);
1168
1169 if (evsel == NULL || evsel->tp_format == NULL) {
1170 pr_err("invalid event found.. aborting\n");
1171 return -1;
1172 }
1173
1174 kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
1175 }
1176
709 symbol__init(&session->header.env); 1177 symbol__init(&session->header.env);
710 1178
711 if (!strcmp(argv[0], "stat")) { 1179 if (!strcmp(argv[0], "stat")) {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 30545ce2c712..d8bb616ff57c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -332,6 +332,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
332 else { 332 else {
333 result->offset += pp->offset; 333 result->offset += pp->offset;
334 result->line += pp->line; 334 result->line += pp->line;
335 result->retprobe = pp->retprobe;
335 ret = 0; 336 ret = 0;
336 } 337 }
337 338
@@ -654,65 +655,6 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
654 return ntevs; 655 return ntevs;
655} 656}
656 657
657/*
658 * Find a src file from a DWARF tag path. Prepend optional source path prefix
659 * and chop off leading directories that do not exist. Result is passed back as
660 * a newly allocated path on success.
661 * Return 0 if file was found and readable, -errno otherwise.
662 */
663static int get_real_path(const char *raw_path, const char *comp_dir,
664 char **new_path)
665{
666 const char *prefix = symbol_conf.source_prefix;
667
668 if (!prefix) {
669 if (raw_path[0] != '/' && comp_dir)
670 /* If not an absolute path, try to use comp_dir */
671 prefix = comp_dir;
672 else {
673 if (access(raw_path, R_OK) == 0) {
674 *new_path = strdup(raw_path);
675 return *new_path ? 0 : -ENOMEM;
676 } else
677 return -errno;
678 }
679 }
680
681 *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
682 if (!*new_path)
683 return -ENOMEM;
684
685 for (;;) {
686 sprintf(*new_path, "%s/%s", prefix, raw_path);
687
688 if (access(*new_path, R_OK) == 0)
689 return 0;
690
691 if (!symbol_conf.source_prefix) {
692 /* In case of searching comp_dir, don't retry */
693 zfree(new_path);
694 return -errno;
695 }
696
697 switch (errno) {
698 case ENAMETOOLONG:
699 case ENOENT:
700 case EROFS:
701 case EFAULT:
702 raw_path = strchr(++raw_path, '/');
703 if (!raw_path) {
704 zfree(new_path);
705 return -ENOENT;
706 }
707 continue;
708
709 default:
710 zfree(new_path);
711 return -errno;
712 }
713 }
714}
715
716#define LINEBUF_SIZE 256 658#define LINEBUF_SIZE 256
717#define NR_ADDITIONAL_LINES 2 659#define NR_ADDITIONAL_LINES 2
718 660
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index e3074230f236..b5bf9d5efeaf 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -855,11 +855,22 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
855static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) 855static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
856{ 856{
857 int ret = 0; 857 int ret = 0;
858 char *fpath;
858 859
859 if (intlist__empty(pf->lcache)) { 860 if (intlist__empty(pf->lcache)) {
861 const char *comp_dir;
862
863 comp_dir = cu_get_comp_dir(&pf->cu_die);
864 ret = get_real_path(pf->fname, comp_dir, &fpath);
865 if (ret < 0) {
866 pr_warning("Failed to find source file path.\n");
867 return ret;
868 }
869
860 /* Matching lazy line pattern */ 870 /* Matching lazy line pattern */
861 ret = find_lazy_match_lines(pf->lcache, pf->fname, 871 ret = find_lazy_match_lines(pf->lcache, fpath,
862 pf->pev->point.lazy_line); 872 pf->pev->point.lazy_line);
873 free(fpath);
863 if (ret <= 0) 874 if (ret <= 0)
864 return ret; 875 return ret;
865 } 876 }
@@ -1055,7 +1066,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
1055 if (pp->function) 1066 if (pp->function)
1056 ret = find_probe_point_by_func(pf); 1067 ret = find_probe_point_by_func(pf);
1057 else if (pp->lazy_line) 1068 else if (pp->lazy_line)
1058 ret = find_probe_point_lazy(NULL, pf); 1069 ret = find_probe_point_lazy(&pf->cu_die, pf);
1059 else { 1070 else {
1060 pf->lno = pp->line; 1071 pf->lno = pp->line;
1061 ret = find_probe_point_by_line(pf); 1072 ret = find_probe_point_by_line(pf);
@@ -1622,3 +1633,61 @@ found:
1622 return (ret < 0) ? ret : lf.found; 1633 return (ret < 0) ? ret : lf.found;
1623} 1634}
1624 1635
1636/*
1637 * Find a src file from a DWARF tag path. Prepend optional source path prefix
1638 * and chop off leading directories that do not exist. Result is passed back as
1639 * a newly allocated path on success.
1640 * Return 0 if file was found and readable, -errno otherwise.
1641 */
1642int get_real_path(const char *raw_path, const char *comp_dir,
1643 char **new_path)
1644{
1645 const char *prefix = symbol_conf.source_prefix;
1646
1647 if (!prefix) {
1648 if (raw_path[0] != '/' && comp_dir)
1649 /* If not an absolute path, try to use comp_dir */
1650 prefix = comp_dir;
1651 else {
1652 if (access(raw_path, R_OK) == 0) {
1653 *new_path = strdup(raw_path);
1654 return *new_path ? 0 : -ENOMEM;
1655 } else
1656 return -errno;
1657 }
1658 }
1659
1660 *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
1661 if (!*new_path)
1662 return -ENOMEM;
1663
1664 for (;;) {
1665 sprintf(*new_path, "%s/%s", prefix, raw_path);
1666
1667 if (access(*new_path, R_OK) == 0)
1668 return 0;
1669
1670 if (!symbol_conf.source_prefix) {
1671 /* In case of searching comp_dir, don't retry */
1672 zfree(new_path);
1673 return -errno;
1674 }
1675
1676 switch (errno) {
1677 case ENAMETOOLONG:
1678 case ENOENT:
1679 case EROFS:
1680 case EFAULT:
1681 raw_path = strchr(++raw_path, '/');
1682 if (!raw_path) {
1683 zfree(new_path);
1684 return -ENOENT;
1685 }
1686 continue;
1687
1688 default:
1689 zfree(new_path);
1690 return -errno;
1691 }
1692 }
1693}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 92590b2c7e1c..ebf8c8c81453 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -55,6 +55,10 @@ extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
55 struct variable_list **vls, 55 struct variable_list **vls,
56 int max_points, bool externs); 56 int max_points, bool externs);
57 57
58/* Find a src file from a DWARF tag path */
59int get_real_path(const char *raw_path, const char *comp_dir,
60 char **new_path);
61
58struct probe_finder { 62struct probe_finder {
59 struct perf_probe_event *pev; /* Target probe event */ 63 struct perf_probe_event *pev; /* Target probe event */
60 64