aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-08-13 15:24:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-08-13 15:24:33 -0400
commit3493e84de60590d3012139187f631f2dfbf0887f (patch)
treec7a994c2e6f0d06b5044ea0b322e5ff0cc0b4a63
parent919aa96a9cfc5071f037bf58718e05335562a6ac (diff)
parent94d5d1b2d891f1fd5205f978246b7864d998b25c (diff)
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: perf_counter: Report the cloning task as parent on perf_counter_fork() perf_counter: Fix an ipi-deadlock perf: Rework/fix the whole read vs group stuff perf_counter: Fix swcounter context invariance perf report: Don't show unresolved DSOs and symbols when -S/-d is used perf tools: Add a general option to enable raw sample records perf tools: Add a per tracepoint counter attribute to get raw sample perf_counter: Provide hw_perf_counter_setup_online() APIs perf list: Fix large list output by using the pager perf_counter, x86: Fix/improve apic fallback perf record: Add missing -C option support for specifying profile cpu perf tools: Fix dso__new handle() to handle deleted DSOs perf tools: Fix fallback to cplus_demangle() when bfd_demangle() is not available perf report: Show the tid too in -D perf record: Fix .tid and .pid fill-in when synthesizing events perf_counter, x86: Fix generic cache events on P6-mobile CPUs perf_counter, x86: Fix lapic printk message
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--include/linux/perf_counter.h49
-rw-r--r--kernel/perf_counter.c338
-rw-r--r--tools/perf/Makefile29
-rw-r--r--tools/perf/builtin-list.c3
-rw-r--r--tools/perf/builtin-record.c95
-rw-r--r--tools/perf/builtin-report.c12
-rw-r--r--tools/perf/util/parse-events.c10
-rw-r--r--tools/perf/util/symbol.c17
-rw-r--r--tools/perf/util/symbol.h24
11 files changed, 429 insertions, 190 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486)
27 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
28 select HAVE_KPROBES 29 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
742config X86_LOCAL_APIC 743config X86_LOCAL_APIC
743 def_bool y 744 def_bool y
744 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 745 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
745 select HAVE_PERF_COUNTERS if (!M386 && !M486)
746 746
747config X86_IO_APIC 747config X86_IO_APIC
748 def_bool y 748 def_bool y
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f900954..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
55 int num_counters_fixed; 55 int num_counters_fixed;
56 int counter_bits; 56 int counter_bits;
57 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
58 u64 max_period; 59 u64 max_period;
59 u64 intel_ctrl; 60 u64 intel_ctrl;
60}; 61};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
72{ 73{
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
613 614
614static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
615{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
616 int i; 618 int i;
617 619
618 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
627 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628 goto eventsel_fail; 630 goto eventsel_fail;
629 } 631 }
632#endif
630 633
631 return true; 634 return true;
632 635
636#ifdef CONFIG_X86_LOCAL_APIC
633eventsel_fail: 637eventsel_fail:
634 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
635 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
644 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
645 649
646 return false; 650 return false;
651#endif
647} 652}
648 653
649static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
650{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
651 int i; 657 int i;
652 658
653 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
657 663
658 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
659 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
660} 667}
661 668
662static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
748 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
749 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
750 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
751 } 767 }
752 768
753 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1449 1465
1450void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1451{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1452 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1453} 1471}
1454 1472
1455void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1456{ 1474{
1457 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1458 return; 1477 return;
1459 1478
1460 /* 1479 /*
1461 * Always use NMI for PMU 1480 * Always use NMI for PMU
1462 */ 1481 */
1463 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1464} 1484}
1465 1485
1466static int __kprobes 1486static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1484 1504
1485 regs = args->regs; 1505 regs = args->regs;
1486 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1487 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1488 /* 1510 /*
1489 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1490 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
1515 .event_map = p6_pmu_event_map, 1537 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event, 1538 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1518 .max_period = (1ULL << 31) - 1, 1541 .max_period = (1ULL << 31) - 1,
1519 .version = 0, 1542 .version = 0,
1520 .num_counters = 2, 1543 .num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1541 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1542 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1543 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1544 /* 1568 /*
1545 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1546 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
1564 .num_counters = 4, 1588 .num_counters = 4,
1565 .counter_bits = 48, 1589 .counter_bits = 48,
1566 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1567 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1568 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1569}; 1594};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
1589 return -ENODEV; 1614 return -ENODEV;
1590 } 1615 }
1591 1616
1617 x86_pmu = p6_pmu;
1618
1592 if (!cpu_has_apic) { 1619 if (!cpu_has_apic) {
1593 pr_info("no Local APIC, try rebooting with lapic"); 1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1594 return -ENODEV; 1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1595 } 1623 }
1596 1624
1597 x86_pmu = p6_pmu;
1598
1599 return 0; 1625 return 0;
1600} 1626}
1601 1627
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a9d823a93fe8..b53f7006cc4e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
115 PERF_SAMPLE_TID = 1U << 1, 115 PERF_SAMPLE_TID = 1U << 1,
116 PERF_SAMPLE_TIME = 1U << 2, 116 PERF_SAMPLE_TIME = 1U << 2,
117 PERF_SAMPLE_ADDR = 1U << 3, 117 PERF_SAMPLE_ADDR = 1U << 3,
118 PERF_SAMPLE_GROUP = 1U << 4, 118 PERF_SAMPLE_READ = 1U << 4,
119 PERF_SAMPLE_CALLCHAIN = 1U << 5, 119 PERF_SAMPLE_CALLCHAIN = 1U << 5,
120 PERF_SAMPLE_ID = 1U << 6, 120 PERF_SAMPLE_ID = 1U << 6,
121 PERF_SAMPLE_CPU = 1U << 7, 121 PERF_SAMPLE_CPU = 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
127}; 127};
128 128
129/* 129/*
130 * Bits that can be set in attr.read_format to request that 130 * The format of the data returned by read() on a perf counter fd,
131 * reads on the counter should return the indicated quantities, 131 * as specified by attr.read_format:
132 * in increasing order of bit value, after the counter value. 132 *
133 * struct read_format {
134 * { u64 value;
135 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
136 * { u64 time_running; } && PERF_FORMAT_RUNNING
137 * { u64 id; } && PERF_FORMAT_ID
138 * } && !PERF_FORMAT_GROUP
139 *
140 * { u64 nr;
141 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
142 * { u64 time_running; } && PERF_FORMAT_RUNNING
143 * { u64 value;
144 * { u64 id; } && PERF_FORMAT_ID
145 * } cntr[nr];
146 * } && PERF_FORMAT_GROUP
147 * };
133 */ 148 */
134enum perf_counter_read_format { 149enum perf_counter_read_format {
135 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, 150 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
136 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, 151 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
137 PERF_FORMAT_ID = 1U << 2, 152 PERF_FORMAT_ID = 1U << 2,
153 PERF_FORMAT_GROUP = 1U << 3,
138 154
139 PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ 155 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
140}; 156};
141 157
142#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 158#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
343 * struct { 359 * struct {
344 * struct perf_event_header header; 360 * struct perf_event_header header;
345 * u32 pid, tid; 361 * u32 pid, tid;
346 * u64 value; 362 *
347 * { u64 time_enabled; } && PERF_FORMAT_ENABLED 363 * struct read_format values;
348 * { u64 time_running; } && PERF_FORMAT_RUNNING
349 * { u64 parent_id; } && PERF_FORMAT_ID
350 * }; 364 * };
351 */ 365 */
352 PERF_EVENT_READ = 8, 366 PERF_EVENT_READ = 8,
@@ -364,11 +378,22 @@ enum perf_event_type {
364 * { u32 cpu, res; } && PERF_SAMPLE_CPU 378 * { u32 cpu, res; } && PERF_SAMPLE_CPU
365 * { u64 period; } && PERF_SAMPLE_PERIOD 379 * { u64 period; } && PERF_SAMPLE_PERIOD
366 * 380 *
367 * { u64 nr; 381 * { struct read_format values; } && PERF_SAMPLE_READ
368 * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
369 * 382 *
370 * { u64 nr, 383 * { u64 nr,
371 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 384 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
385 *
386 * #
387 * # The RAW record below is opaque data wrt the ABI
388 * #
389 * # That is, the ABI doesn't make any promises wrt to
390 * # the stability of its content, it may vary depending
391 * # on event, hardware, kernel version and phase of
392 * # the moon.
393 * #
394 * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
395 * #
396 *
372 * { u32 size; 397 * { u32 size;
373 * char data[size];}&& PERF_SAMPLE_RAW 398 * char data[size];}&& PERF_SAMPLE_RAW
374 * }; 399 * };
@@ -694,6 +719,8 @@ struct perf_sample_data {
694 719
695extern int perf_counter_overflow(struct perf_counter *counter, int nmi, 720extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
696 struct perf_sample_data *data); 721 struct perf_sample_data *data);
722extern void perf_counter_output(struct perf_counter *counter, int nmi,
723 struct perf_sample_data *data);
697 724
698/* 725/*
699 * Return 1 for a software counter, 0 for a hardware counter 726 * Return 1 for a software counter, 0 for a hardware counter
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b0b20a07f394..534e20d14d63 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
88void __weak hw_perf_enable(void) { barrier(); } 88void __weak hw_perf_enable(void) { barrier(); }
89 89
90void __weak hw_perf_counter_setup(int cpu) { barrier(); } 90void __weak hw_perf_counter_setup(int cpu) { barrier(); }
91void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
91 92
92int __weak 93int __weak
93hw_perf_group_sched_in(struct perf_counter *group_leader, 94hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
306 return; 307 return;
307 308
308 counter->state = PERF_COUNTER_STATE_INACTIVE; 309 counter->state = PERF_COUNTER_STATE_INACTIVE;
310 if (counter->pending_disable) {
311 counter->pending_disable = 0;
312 counter->state = PERF_COUNTER_STATE_OFF;
313 }
309 counter->tstamp_stopped = ctx->time; 314 counter->tstamp_stopped = ctx->time;
310 counter->pmu->disable(counter); 315 counter->pmu->disable(counter);
311 counter->oncpu = -1; 316 counter->oncpu = -1;
@@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
1691 return 0; 1696 return 0;
1692} 1697}
1693 1698
1694static u64 perf_counter_read_tree(struct perf_counter *counter) 1699static int perf_counter_read_size(struct perf_counter *counter)
1700{
1701 int entry = sizeof(u64); /* value */
1702 int size = 0;
1703 int nr = 1;
1704
1705 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1706 size += sizeof(u64);
1707
1708 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1709 size += sizeof(u64);
1710
1711 if (counter->attr.read_format & PERF_FORMAT_ID)
1712 entry += sizeof(u64);
1713
1714 if (counter->attr.read_format & PERF_FORMAT_GROUP) {
1715 nr += counter->group_leader->nr_siblings;
1716 size += sizeof(u64);
1717 }
1718
1719 size += entry * nr;
1720
1721 return size;
1722}
1723
1724static u64 perf_counter_read_value(struct perf_counter *counter)
1695{ 1725{
1696 struct perf_counter *child; 1726 struct perf_counter *child;
1697 u64 total = 0; 1727 u64 total = 0;
@@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
1703 return total; 1733 return total;
1704} 1734}
1705 1735
1736static int perf_counter_read_entry(struct perf_counter *counter,
1737 u64 read_format, char __user *buf)
1738{
1739 int n = 0, count = 0;
1740 u64 values[2];
1741
1742 values[n++] = perf_counter_read_value(counter);
1743 if (read_format & PERF_FORMAT_ID)
1744 values[n++] = primary_counter_id(counter);
1745
1746 count = n * sizeof(u64);
1747
1748 if (copy_to_user(buf, values, count))
1749 return -EFAULT;
1750
1751 return count;
1752}
1753
1754static int perf_counter_read_group(struct perf_counter *counter,
1755 u64 read_format, char __user *buf)
1756{
1757 struct perf_counter *leader = counter->group_leader, *sub;
1758 int n = 0, size = 0, err = -EFAULT;
1759 u64 values[3];
1760
1761 values[n++] = 1 + leader->nr_siblings;
1762 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1763 values[n++] = leader->total_time_enabled +
1764 atomic64_read(&leader->child_total_time_enabled);
1765 }
1766 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1767 values[n++] = leader->total_time_running +
1768 atomic64_read(&leader->child_total_time_running);
1769 }
1770
1771 size = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, size))
1774 return -EFAULT;
1775
1776 err = perf_counter_read_entry(leader, read_format, buf + size);
1777 if (err < 0)
1778 return err;
1779
1780 size += err;
1781
1782 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
1783 err = perf_counter_read_entry(counter, read_format,
1784 buf + size);
1785 if (err < 0)
1786 return err;
1787
1788 size += err;
1789 }
1790
1791 return size;
1792}
1793
1794static int perf_counter_read_one(struct perf_counter *counter,
1795 u64 read_format, char __user *buf)
1796{
1797 u64 values[4];
1798 int n = 0;
1799
1800 values[n++] = perf_counter_read_value(counter);
1801 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1802 values[n++] = counter->total_time_enabled +
1803 atomic64_read(&counter->child_total_time_enabled);
1804 }
1805 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1806 values[n++] = counter->total_time_running +
1807 atomic64_read(&counter->child_total_time_running);
1808 }
1809 if (read_format & PERF_FORMAT_ID)
1810 values[n++] = primary_counter_id(counter);
1811
1812 if (copy_to_user(buf, values, n * sizeof(u64)))
1813 return -EFAULT;
1814
1815 return n * sizeof(u64);
1816}
1817
1706/* 1818/*
1707 * Read the performance counter - simple non blocking version for now 1819 * Read the performance counter - simple non blocking version for now
1708 */ 1820 */
1709static ssize_t 1821static ssize_t
1710perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) 1822perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1711{ 1823{
1712 u64 values[4]; 1824 u64 read_format = counter->attr.read_format;
1713 int n; 1825 int ret;
1714 1826
1715 /* 1827 /*
1716 * Return end-of-file for a read on a counter that is in 1828 * Return end-of-file for a read on a counter that is in
@@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1720 if (counter->state == PERF_COUNTER_STATE_ERROR) 1832 if (counter->state == PERF_COUNTER_STATE_ERROR)
1721 return 0; 1833 return 0;
1722 1834
1835 if (count < perf_counter_read_size(counter))
1836 return -ENOSPC;
1837
1723 WARN_ON_ONCE(counter->ctx->parent_ctx); 1838 WARN_ON_ONCE(counter->ctx->parent_ctx);
1724 mutex_lock(&counter->child_mutex); 1839 mutex_lock(&counter->child_mutex);
1725 values[0] = perf_counter_read_tree(counter); 1840 if (read_format & PERF_FORMAT_GROUP)
1726 n = 1; 1841 ret = perf_counter_read_group(counter, read_format, buf);
1727 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1842 else
1728 values[n++] = counter->total_time_enabled + 1843 ret = perf_counter_read_one(counter, read_format, buf);
1729 atomic64_read(&counter->child_total_time_enabled);
1730 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1731 values[n++] = counter->total_time_running +
1732 atomic64_read(&counter->child_total_time_running);
1733 if (counter->attr.read_format & PERF_FORMAT_ID)
1734 values[n++] = primary_counter_id(counter);
1735 mutex_unlock(&counter->child_mutex); 1844 mutex_unlock(&counter->child_mutex);
1736 1845
1737 if (count < n * sizeof(u64)) 1846 return ret;
1738 return -EINVAL;
1739 count = n * sizeof(u64);
1740
1741 if (copy_to_user(buf, values, count))
1742 return -EFAULT;
1743
1744 return count;
1745} 1847}
1746 1848
1747static ssize_t 1849static ssize_t
@@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
2245 2347
2246 if (counter->pending_disable) { 2348 if (counter->pending_disable) {
2247 counter->pending_disable = 0; 2349 counter->pending_disable = 0;
2248 perf_counter_disable(counter); 2350 __perf_counter_disable(counter);
2249 } 2351 }
2250 2352
2251 if (counter->pending_wakeup) { 2353 if (counter->pending_wakeup) {
@@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
2630 return task_pid_nr_ns(p, counter->ns); 2732 return task_pid_nr_ns(p, counter->ns);
2631} 2733}
2632 2734
2633static void perf_counter_output(struct perf_counter *counter, int nmi, 2735static void perf_output_read_one(struct perf_output_handle *handle,
2736 struct perf_counter *counter)
2737{
2738 u64 read_format = counter->attr.read_format;
2739 u64 values[4];
2740 int n = 0;
2741
2742 values[n++] = atomic64_read(&counter->count);
2743 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2744 values[n++] = counter->total_time_enabled +
2745 atomic64_read(&counter->child_total_time_enabled);
2746 }
2747 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2748 values[n++] = counter->total_time_running +
2749 atomic64_read(&counter->child_total_time_running);
2750 }
2751 if (read_format & PERF_FORMAT_ID)
2752 values[n++] = primary_counter_id(counter);
2753
2754 perf_output_copy(handle, values, n * sizeof(u64));
2755}
2756
2757/*
2758 * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
2759 */
2760static void perf_output_read_group(struct perf_output_handle *handle,
2761 struct perf_counter *counter)
2762{
2763 struct perf_counter *leader = counter->group_leader, *sub;
2764 u64 read_format = counter->attr.read_format;
2765 u64 values[5];
2766 int n = 0;
2767
2768 values[n++] = 1 + leader->nr_siblings;
2769
2770 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2771 values[n++] = leader->total_time_enabled;
2772
2773 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2774 values[n++] = leader->total_time_running;
2775
2776 if (leader != counter)
2777 leader->pmu->read(leader);
2778
2779 values[n++] = atomic64_read(&leader->count);
2780 if (read_format & PERF_FORMAT_ID)
2781 values[n++] = primary_counter_id(leader);
2782
2783 perf_output_copy(handle, values, n * sizeof(u64));
2784
2785 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2786 n = 0;
2787
2788 if (sub != counter)
2789 sub->pmu->read(sub);
2790
2791 values[n++] = atomic64_read(&sub->count);
2792 if (read_format & PERF_FORMAT_ID)
2793 values[n++] = primary_counter_id(sub);
2794
2795 perf_output_copy(handle, values, n * sizeof(u64));
2796 }
2797}
2798
2799static void perf_output_read(struct perf_output_handle *handle,
2800 struct perf_counter *counter)
2801{
2802 if (counter->attr.read_format & PERF_FORMAT_GROUP)
2803 perf_output_read_group(handle, counter);
2804 else
2805 perf_output_read_one(handle, counter);
2806}
2807
2808void perf_counter_output(struct perf_counter *counter, int nmi,
2634 struct perf_sample_data *data) 2809 struct perf_sample_data *data)
2635{ 2810{
2636 int ret; 2811 int ret;
@@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2641 struct { 2816 struct {
2642 u32 pid, tid; 2817 u32 pid, tid;
2643 } tid_entry; 2818 } tid_entry;
2644 struct {
2645 u64 id;
2646 u64 counter;
2647 } group_entry;
2648 struct perf_callchain_entry *callchain = NULL; 2819 struct perf_callchain_entry *callchain = NULL;
2649 int callchain_size = 0; 2820 int callchain_size = 0;
2650 u64 time; 2821 u64 time;
@@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2699 if (sample_type & PERF_SAMPLE_PERIOD) 2870 if (sample_type & PERF_SAMPLE_PERIOD)
2700 header.size += sizeof(u64); 2871 header.size += sizeof(u64);
2701 2872
2702 if (sample_type & PERF_SAMPLE_GROUP) { 2873 if (sample_type & PERF_SAMPLE_READ)
2703 header.size += sizeof(u64) + 2874 header.size += perf_counter_read_size(counter);
2704 counter->nr_siblings * sizeof(group_entry);
2705 }
2706 2875
2707 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2876 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2708 callchain = perf_callchain(data->regs); 2877 callchain = perf_callchain(data->regs);
@@ -2759,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2759 if (sample_type & PERF_SAMPLE_PERIOD) 2928 if (sample_type & PERF_SAMPLE_PERIOD)
2760 perf_output_put(&handle, data->period); 2929 perf_output_put(&handle, data->period);
2761 2930
2762 /* 2931 if (sample_type & PERF_SAMPLE_READ)
2763 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. 2932 perf_output_read(&handle, counter);
2764 */
2765 if (sample_type & PERF_SAMPLE_GROUP) {
2766 struct perf_counter *leader, *sub;
2767 u64 nr = counter->nr_siblings;
2768
2769 perf_output_put(&handle, nr);
2770
2771 leader = counter->group_leader;
2772 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2773 if (sub != counter)
2774 sub->pmu->read(sub);
2775
2776 group_entry.id = primary_counter_id(sub);
2777 group_entry.counter = atomic64_read(&sub->count);
2778
2779 perf_output_put(&handle, group_entry);
2780 }
2781 }
2782 2933
2783 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2784 if (callchain) 2935 if (callchain)
@@ -2817,8 +2968,6 @@ struct perf_read_event {
2817 2968
2818 u32 pid; 2969 u32 pid;
2819 u32 tid; 2970 u32 tid;
2820 u64 value;
2821 u64 format[3];
2822}; 2971};
2823 2972
2824static void 2973static void
@@ -2830,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
2830 .header = { 2979 .header = {
2831 .type = PERF_EVENT_READ, 2980 .type = PERF_EVENT_READ,
2832 .misc = 0, 2981 .misc = 0,
2833 .size = sizeof(event) - sizeof(event.format), 2982 .size = sizeof(event) + perf_counter_read_size(counter),
2834 }, 2983 },
2835 .pid = perf_counter_pid(counter, task), 2984 .pid = perf_counter_pid(counter, task),
2836 .tid = perf_counter_tid(counter, task), 2985 .tid = perf_counter_tid(counter, task),
2837 .value = atomic64_read(&counter->count),
2838 }; 2986 };
2839 int ret, i = 0; 2987 int ret;
2840
2841 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2842 event.header.size += sizeof(u64);
2843 event.format[i++] = counter->total_time_enabled;
2844 }
2845
2846 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2847 event.header.size += sizeof(u64);
2848 event.format[i++] = counter->total_time_running;
2849 }
2850
2851 if (counter->attr.read_format & PERF_FORMAT_ID) {
2852 event.header.size += sizeof(u64);
2853 event.format[i++] = primary_counter_id(counter);
2854 }
2855 2988
2856 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); 2989 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2857 if (ret) 2990 if (ret)
2858 return; 2991 return;
2859 2992
2860 perf_output_copy(&handle, &event, event.header.size); 2993 perf_output_put(&handle, event);
2994 perf_output_read(&handle, counter);
2995
2861 perf_output_end(&handle); 2996 perf_output_end(&handle);
2862} 2997}
2863 2998
@@ -2893,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
2893 return; 3028 return;
2894 3029
2895 task_event->event.pid = perf_counter_pid(counter, task); 3030 task_event->event.pid = perf_counter_pid(counter, task);
2896 task_event->event.ppid = perf_counter_pid(counter, task->real_parent); 3031 task_event->event.ppid = perf_counter_pid(counter, current);
2897 3032
2898 task_event->event.tid = perf_counter_tid(counter, task); 3033 task_event->event.tid = perf_counter_tid(counter, task);
2899 task_event->event.ptid = perf_counter_tid(counter, task->real_parent); 3034 task_event->event.ptid = perf_counter_tid(counter, current);
2900 3035
2901 perf_output_put(&handle, task_event->event); 3036 perf_output_put(&handle, task_event->event);
2902 perf_output_end(&handle); 3037 perf_output_end(&handle);
@@ -3443,40 +3578,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3443 3578
3444static int perf_swcounter_is_counting(struct perf_counter *counter) 3579static int perf_swcounter_is_counting(struct perf_counter *counter)
3445{ 3580{
3446 struct perf_counter_context *ctx; 3581 /*
3447 unsigned long flags; 3582 * The counter is active, we're good!
3448 int count; 3583 */
3449
3450 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 3584 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
3451 return 1; 3585 return 1;
3452 3586
3587 /*
3588 * The counter is off/error, not counting.
3589 */
3453 if (counter->state != PERF_COUNTER_STATE_INACTIVE) 3590 if (counter->state != PERF_COUNTER_STATE_INACTIVE)
3454 return 0; 3591 return 0;
3455 3592
3456 /* 3593 /*
3457 * If the counter is inactive, it could be just because 3594 * The counter is inactive, if the context is active
3458 * its task is scheduled out, or because it's in a group 3595 * we're part of a group that didn't make it on the 'pmu',
3459 * which could not go on the PMU. We want to count in 3596 * not counting.
3460 * the first case but not the second. If the context is
3461 * currently active then an inactive software counter must
3462 * be the second case. If it's not currently active then
3463 * we need to know whether the counter was active when the
3464 * context was last active, which we can determine by
3465 * comparing counter->tstamp_stopped with ctx->time.
3466 *
3467 * We are within an RCU read-side critical section,
3468 * which protects the existence of *ctx.
3469 */ 3597 */
3470 ctx = counter->ctx; 3598 if (counter->ctx->is_active)
3471 spin_lock_irqsave(&ctx->lock, flags); 3599 return 0;
3472 count = 1; 3600
3473 /* Re-check state now we have the lock */ 3601 /*
3474 if (counter->state < PERF_COUNTER_STATE_INACTIVE || 3602 * We're inactive and the context is too, this means the
3475 counter->ctx->is_active || 3603 * task is scheduled out, we're counting events that happen
3476 counter->tstamp_stopped < ctx->time) 3604 * to us, like migration events.
3477 count = 0; 3605 */
3478 spin_unlock_irqrestore(&ctx->lock, flags); 3606 return 1;
3479 return count;
3480} 3607}
3481 3608
3482static int perf_swcounter_match(struct perf_counter *counter, 3609static int perf_swcounter_match(struct perf_counter *counter,
@@ -3928,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3928 atomic64_set(&hwc->period_left, hwc->sample_period); 4055 atomic64_set(&hwc->period_left, hwc->sample_period);
3929 4056
3930 /* 4057 /*
3931 * we currently do not support PERF_SAMPLE_GROUP on inherited counters 4058 * we currently do not support PERF_FORMAT_GROUP on inherited counters
3932 */ 4059 */
3933 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) 4060 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
3934 goto done; 4061 goto done;
3935 4062
3936 switch (attr->type) { 4063 switch (attr->type) {
@@ -4592,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
4592 perf_counter_init_cpu(cpu); 4719 perf_counter_init_cpu(cpu);
4593 break; 4720 break;
4594 4721
4722 case CPU_ONLINE:
4723 case CPU_ONLINE_FROZEN:
4724 hw_perf_counter_setup_online(cpu);
4725 break;
4726
4595 case CPU_DOWN_PREPARE: 4727 case CPU_DOWN_PREPARE:
4596 case CPU_DOWN_PREPARE_FROZEN: 4728 case CPU_DOWN_PREPARE_FROZEN:
4597 perf_counter_exit_cpu(cpu); 4729 perf_counter_exit_cpu(cpu);
@@ -4616,6 +4748,8 @@ void __init perf_counter_init(void)
4616{ 4748{
4617 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 4749 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
4618 (void *)(long)smp_processor_id()); 4750 (void *)(long)smp_processor_id());
4751 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
4752 (void *)(long)smp_processor_id());
4619 register_cpu_notifier(&perf_cpu_nb); 4753 register_cpu_notifier(&perf_cpu_nb);
4620} 4754}
4621 4755
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 60411e94113b..c045b4271e57 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,22 +382,29 @@ endif
382ifdef NO_DEMANGLE 382ifdef NO_DEMANGLE
383 BASIC_CFLAGS += -DNO_DEMANGLE 383 BASIC_CFLAGS += -DNO_DEMANGLE
384else 384else
385
386 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") 385 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
387 386
388 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
389
390 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
391
392 ifeq ($(has_bfd),y) 387 ifeq ($(has_bfd),y)
393 EXTLIBS += -lbfd 388 EXTLIBS += -lbfd
394 else ifeq ($(has_bfd_iberty),y)
395 EXTLIBS += -lbfd -liberty
396 else ifeq ($(has_bfd_iberty_z),y)
397 EXTLIBS += -lbfd -liberty -lz
398 else 389 else
399 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) 390 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
400 BASIC_CFLAGS += -DNO_DEMANGLE 391 ifeq ($(has_bfd_iberty),y)
392 EXTLIBS += -lbfd -liberty
393 else
394 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
395 ifeq ($(has_bfd_iberty_z),y)
396 EXTLIBS += -lbfd -liberty -lz
397 else
398 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
399 ifeq ($(has_cplus_demangle),y)
400 EXTLIBS += -liberty
401 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
402 else
403 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
404 BASIC_CFLAGS += -DNO_DEMANGLE
405 endif
406 endif
407 endif
401 endif 408 endif
402endif 409endif
403 410
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8a35c9..d88c6961274c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
10 10
11#include "perf.h" 11#include "perf.h"
12 12
13#include "util/parse-options.h"
14#include "util/parse-events.h" 13#include "util/parse-events.h"
14#include "util/cache.h"
15 15
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
17{ 17{
18 setup_pager();
18 print_events(); 19 print_events();
19 return 0; 20 return 0;
20} 21}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0345aad8eba5..3d051b9cf25f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int output;
34static const char *output_name = "perf.data"; 34static const char *output_name = "perf.data";
35static int group = 0; 35static int group = 0;
36static unsigned int realtime_prio = 0; 36static unsigned int realtime_prio = 0;
37static int raw_samples = 0;
37static int system_wide = 0; 38static int system_wide = 0;
39static int profile_cpu = -1;
38static pid_t target_pid = -1; 40static pid_t target_pid = -1;
39static int inherit = 1; 41static int inherit = 1;
40static int force = 0; 42static int force = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
203 kill(getpid(), signr); 205 kill(getpid(), signr);
204} 206}
205 207
206static void pid_synthesize_comm_event(pid_t pid, int full) 208static pid_t pid_synthesize_comm_event(pid_t pid, int full)
207{ 209{
208 struct comm_event comm_ev; 210 struct comm_event comm_ev;
209 char filename[PATH_MAX]; 211 char filename[PATH_MAX];
210 char bf[BUFSIZ]; 212 char bf[BUFSIZ];
211 int fd; 213 FILE *fp;
212 size_t size; 214 size_t size = 0;
213 char *field, *sep;
214 DIR *tasks; 215 DIR *tasks;
215 struct dirent dirent, *next; 216 struct dirent dirent, *next;
217 pid_t tgid = 0;
216 218
217 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); 219 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
218 220
219 fd = open(filename, O_RDONLY); 221 fp = fopen(filename, "r");
220 if (fd < 0) { 222 if (fd == NULL) {
221 /* 223 /*
222 * We raced with a task exiting - just return: 224 * We raced with a task exiting - just return:
223 */ 225 */
224 if (verbose) 226 if (verbose)
225 fprintf(stderr, "couldn't open %s\n", filename); 227 fprintf(stderr, "couldn't open %s\n", filename);
226 return; 228 return 0;
227 }
228 if (read(fd, bf, sizeof(bf)) < 0) {
229 fprintf(stderr, "couldn't read %s\n", filename);
230 exit(EXIT_FAILURE);
231 } 229 }
232 close(fd);
233 230
234 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
235 memset(&comm_ev, 0, sizeof(comm_ev)); 231 memset(&comm_ev, 0, sizeof(comm_ev));
236 field = strchr(bf, '('); 232 while (!comm_ev.comm[0] || !comm_ev.pid) {
237 if (field == NULL) 233 if (fgets(bf, sizeof(bf), fp) == NULL)
238 goto out_failure; 234 goto out_failure;
239 sep = strchr(++field, ')'); 235
240 if (sep == NULL) 236 if (memcmp(bf, "Name:", 5) == 0) {
241 goto out_failure; 237 char *name = bf + 5;
242 size = sep - field; 238 while (*name && isspace(*name))
243 memcpy(comm_ev.comm, field, size++); 239 ++name;
244 240 size = strlen(name) - 1;
245 comm_ev.pid = pid; 241 memcpy(comm_ev.comm, name, size++);
242 } else if (memcmp(bf, "Tgid:", 5) == 0) {
243 char *tgids = bf + 5;
244 while (*tgids && isspace(*tgids))
245 ++tgids;
246 tgid = comm_ev.pid = atoi(tgids);
247 }
248 }
249
246 comm_ev.header.type = PERF_EVENT_COMM; 250 comm_ev.header.type = PERF_EVENT_COMM;
247 size = ALIGN(size, sizeof(u64)); 251 size = ALIGN(size, sizeof(u64));
248 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); 252 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
251 comm_ev.tid = pid; 255 comm_ev.tid = pid;
252 256
253 write_output(&comm_ev, comm_ev.header.size); 257 write_output(&comm_ev, comm_ev.header.size);
254 return; 258 goto out_fclose;
255 } 259 }
256 260
257 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 261 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
268 write_output(&comm_ev, comm_ev.header.size); 272 write_output(&comm_ev, comm_ev.header.size);
269 } 273 }
270 closedir(tasks); 274 closedir(tasks);
271 return; 275
276out_fclose:
277 fclose(fp);
278 return tgid;
272 279
273out_failure: 280out_failure:
274 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", 281 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
276 exit(EXIT_FAILURE); 283 exit(EXIT_FAILURE);
277} 284}
278 285
279static void pid_synthesize_mmap_samples(pid_t pid) 286static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
280{ 287{
281 char filename[PATH_MAX]; 288 char filename[PATH_MAX];
282 FILE *fp; 289 FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
328 mmap_ev.len -= mmap_ev.start; 335 mmap_ev.len -= mmap_ev.start;
329 mmap_ev.header.size = (sizeof(mmap_ev) - 336 mmap_ev.header.size = (sizeof(mmap_ev) -
330 (sizeof(mmap_ev.filename) - size)); 337 (sizeof(mmap_ev.filename) - size));
331 mmap_ev.pid = pid; 338 mmap_ev.pid = tgid;
332 mmap_ev.tid = pid; 339 mmap_ev.tid = pid;
333 340
334 write_output(&mmap_ev, mmap_ev.header.size); 341 write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
347 354
348 while (!readdir_r(proc, &dirent, &next) && next) { 355 while (!readdir_r(proc, &dirent, &next) && next) {
349 char *end; 356 char *end;
350 pid_t pid; 357 pid_t pid, tgid;
351 358
352 pid = strtol(dirent.d_name, &end, 10); 359 pid = strtol(dirent.d_name, &end, 10);
353 if (*end) /* only interested in proper numerical dirents */ 360 if (*end) /* only interested in proper numerical dirents */
354 continue; 361 continue;
355 362
356 pid_synthesize_comm_event(pid, 1); 363 tgid = pid_synthesize_comm_event(pid, 1);
357 pid_synthesize_mmap_samples(pid); 364 pid_synthesize_mmap_samples(pid, tgid);
358 } 365 }
359 366
360 closedir(proc); 367 closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
392 PERF_FORMAT_TOTAL_TIME_RUNNING | 399 PERF_FORMAT_TOTAL_TIME_RUNNING |
393 PERF_FORMAT_ID; 400 PERF_FORMAT_ID;
394 401
395 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 402 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
396 403
397 if (freq) { 404 if (freq) {
398 attr->sample_type |= PERF_SAMPLE_PERIOD; 405 attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
412 if (call_graph) 419 if (call_graph)
413 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 420 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
414 421
422 if (raw_samples)
423 attr->sample_type |= PERF_SAMPLE_RAW;
415 424
416 attr->mmap = track; 425 attr->mmap = track;
417 attr->comm = track; 426 attr->comm = track;
@@ -426,6 +435,8 @@ try_again:
426 435
427 if (err == EPERM) 436 if (err == EPERM)
428 die("Permission error - are you root?\n"); 437 die("Permission error - are you root?\n");
438 else if (err == ENODEV && profile_cpu != -1)
439 die("No such device - did you specify an out-of-range profile CPU?\n");
429 440
430 /* 441 /*
431 * If it's cycles then fall back to hrtimer 442 * If it's cycles then fall back to hrtimer
@@ -559,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
559 if (pid == -1) 570 if (pid == -1)
560 pid = getpid(); 571 pid = getpid();
561 572
562 open_counters(-1, pid); 573 open_counters(profile_cpu, pid);
563 } else for (i = 0; i < nr_cpus; i++) 574 } else {
564 open_counters(i, target_pid); 575 if (profile_cpu != -1) {
576 open_counters(profile_cpu, target_pid);
577 } else {
578 for (i = 0; i < nr_cpus; i++)
579 open_counters(i, target_pid);
580 }
581 }
565 582
566 if (file_new) 583 if (file_new)
567 perf_header__write(header, output); 584 perf_header__write(header, output);
568 585
569 if (!system_wide) { 586 if (!system_wide) {
570 pid_synthesize_comm_event(pid, 0); 587 pid_t tgid = pid_synthesize_comm_event(pid, 0);
571 pid_synthesize_mmap_samples(pid); 588 pid_synthesize_mmap_samples(pid, tgid);
572 } else 589 } else
573 synthesize_all(); 590 synthesize_all();
574 591
@@ -636,10 +653,14 @@ static const struct option options[] = {
636 "record events on existing pid"), 653 "record events on existing pid"),
637 OPT_INTEGER('r', "realtime", &realtime_prio, 654 OPT_INTEGER('r', "realtime", &realtime_prio,
638 "collect data with this RT SCHED_FIFO priority"), 655 "collect data with this RT SCHED_FIFO priority"),
656 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
657 "collect raw sample records from all opened counters"),
639 OPT_BOOLEAN('a', "all-cpus", &system_wide, 658 OPT_BOOLEAN('a', "all-cpus", &system_wide,
640 "system-wide collection from all CPUs"), 659 "system-wide collection from all CPUs"),
641 OPT_BOOLEAN('A', "append", &append_file, 660 OPT_BOOLEAN('A', "append", &append_file,
642 "append to the output file to do incremental profiling"), 661 "append to the output file to do incremental profiling"),
662 OPT_INTEGER('C', "profile_cpu", &profile_cpu,
663 "CPU to profile on"),
643 OPT_BOOLEAN('f', "force", &force, 664 OPT_BOOLEAN('f', "force", &force,
644 "overwrite existing data file"), 665 "overwrite existing data file"),
645 OPT_LONG('c', "count", &default_interval, 666 OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 99274cec0adb..b53a60fc12de 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1526,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1526 more_data += sizeof(u64); 1526 more_data += sizeof(u64);
1527 } 1527 }
1528 1528
1529 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", 1529 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1530 (void *)(offset + head), 1530 (void *)(offset + head),
1531 (void *)(long)(event->header.size), 1531 (void *)(long)(event->header.size),
1532 event->header.misc, 1532 event->header.misc,
1533 event->ip.pid, 1533 event->ip.pid, event->ip.tid,
1534 (void *)(long)ip, 1534 (void *)(long)ip,
1535 (long long)period); 1535 (long long)period);
1536 1536
@@ -1590,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1590 if (show & show_mask) { 1590 if (show & show_mask) {
1591 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); 1591 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
1592 1592
1593 if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) 1593 if (dso_list && (!dso || !dso->name ||
1594 !strlist__has_entry(dso_list, dso->name)))
1594 return 0; 1595 return 0;
1595 1596
1596 if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) 1597 if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
1597 return 0; 1598 return 0;
1598 1599
1599 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { 1600 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1612,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
1612 struct thread *thread = threads__findnew(event->mmap.pid); 1613 struct thread *thread = threads__findnew(event->mmap.pid);
1613 struct map *map = map__new(&event->mmap); 1614 struct map *map = map__new(&event->mmap);
1614 1615
1615 dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", 1616 dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
1616 (void *)(offset + head), 1617 (void *)(offset + head),
1617 (void *)(long)(event->header.size), 1618 (void *)(long)(event->header.size),
1618 event->mmap.pid, 1619 event->mmap.pid,
1620 event->mmap.tid,
1619 (void *)(long)event->mmap.start, 1621 (void *)(long)event->mmap.start,
1620 (void *)(long)event->mmap.len, 1622 (void *)(long)event->mmap.len,
1621 (void *)(long)event->mmap.pgoff, 1623 (void *)(long)event->mmap.pgoff,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4858d83b3b67..044178408783 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
379 struct perf_counter_attr *attr) 379 struct perf_counter_attr *attr)
380{ 380{
381 const char *evt_name; 381 const char *evt_name;
382 char *flags;
382 char sys_name[MAX_EVENT_LENGTH]; 383 char sys_name[MAX_EVENT_LENGTH];
383 char id_buf[4]; 384 char id_buf[4];
384 int fd; 385 int fd;
@@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
400 strncpy(sys_name, *strp, sys_length); 401 strncpy(sys_name, *strp, sys_length);
401 sys_name[sys_length] = '\0'; 402 sys_name[sys_length] = '\0';
402 evt_name = evt_name + 1; 403 evt_name = evt_name + 1;
404
405 flags = strchr(evt_name, ':');
406 if (flags) {
407 *flags = '\0';
408 flags++;
409 if (!strncmp(flags, "record", strlen(flags)))
410 attr->sample_type |= PERF_SAMPLE_RAW;
411 }
412
403 evt_length = strlen(evt_name); 413 evt_length = strlen(evt_name);
404 if (evt_length >= MAX_EVENT_LENGTH) 414 if (evt_length >= MAX_EVENT_LENGTH)
405 return 0; 415 return 0;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f1dcede14307..5c0f42e6b33b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,23 +7,8 @@
7#include <gelf.h> 7#include <gelf.h>
8#include <elf.h> 8#include <elf.h>
9 9
10#ifndef NO_DEMANGLE
11#include <bfd.h>
12#else
13static inline
14char *bfd_demangle(void __used *v, const char __used *c, int __used i)
15{
16 return NULL;
17}
18#endif
19
20const char *sym_hist_filter; 10const char *sym_hist_filter;
21 11
22#ifndef DMGL_PARAMS
23#define DMGL_PARAMS (1 << 0) /* Include function args */
24#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
25#endif
26
27enum dso_origin { 12enum dso_origin {
28 DSO__ORIG_KERNEL = 0, 13 DSO__ORIG_KERNEL = 0,
29 DSO__ORIG_JAVA_JIT, 14 DSO__ORIG_JAVA_JIT,
@@ -816,6 +801,8 @@ more:
816 } 801 }
817out: 802out:
818 free(name); 803 free(name);
804 if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
805 return 0;
819 return ret; 806 return ret;
820} 807}
821 808
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1e003ec2f4b1..b53bf0125c1b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
7#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include "module.h" 8#include "module.h"
9 9
10#ifdef HAVE_CPLUS_DEMANGLE
11extern char *cplus_demangle(const char *, int);
12
13static inline char *bfd_demangle(void __used *v, const char *c, int i)
14{
15 return cplus_demangle(c, i);
16}
17#else
18#ifdef NO_DEMANGLE
19static inline char *bfd_demangle(void __used *v, const char __used *c,
20 int __used i)
21{
22 return NULL;
23}
24#else
25#include <bfd.h>
26#endif
27#endif
28
29#ifndef DMGL_PARAMS
30#define DMGL_PARAMS (1 << 0) /* Include function args */
31#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
32#endif
33
10struct symbol { 34struct symbol {
11 struct rb_node rb_node; 35 struct rb_node rb_node;
12 u64 start; 36 u64 start;