aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-03-30 13:07:02 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-06 03:30:36 -0400
commit925d519ab82b6dd7aca9420d809ee83819c08db2 (patch)
treeaa05bd7eb607915aa691d5434ec74521b487b466 /kernel/perf_counter.c
parent53cfbf593758916aac41db728f029986a62f1254 (diff)
perf_counter: unify and fix delayed counter wakeup
While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c128
1 files changed, 120 insertions, 8 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b862a7988c..f70ff80e79d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head)
1197 kfree(counter); 1197 kfree(counter);
1198} 1198}
1199 1199
1200static void perf_pending_sync(struct perf_counter *counter);
1201
1200static void free_counter(struct perf_counter *counter) 1202static void free_counter(struct perf_counter *counter)
1201{ 1203{
1204 perf_pending_sync(counter);
1205
1202 if (counter->destroy) 1206 if (counter->destroy)
1203 counter->destroy(counter); 1207 counter->destroy(counter);
1204 1208
@@ -1529,6 +1533,118 @@ static const struct file_operations perf_fops = {
1529}; 1533};
1530 1534
1531/* 1535/*
1536 * Perf counter wakeup
1537 *
1538 * If there's data, ensure we set the poll() state and publish everything
1539 * to user-space before waking everybody up.
1540 */
1541
1542void perf_counter_wakeup(struct perf_counter *counter)
1543{
1544 struct perf_mmap_data *data;
1545
1546 rcu_read_lock();
1547 data = rcu_dereference(counter->data);
1548 if (data) {
1549 (void)atomic_xchg(&data->wakeup, POLL_IN);
1550 __perf_counter_update_userpage(counter, data);
1551 }
1552 rcu_read_unlock();
1553
1554 wake_up_all(&counter->waitq);
1555}
1556
1557/*
1558 * Pending wakeups
1559 *
1560 * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
1561 *
1562 * The NMI bit means we cannot possibly take locks. Therefore, maintain a
1563 * single linked list and use cmpxchg() to add entries lockless.
1564 */
1565
1566#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL)
1567
1568static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = {
1569 PENDING_TAIL,
1570};
1571
1572static void perf_pending_queue(struct perf_counter *counter)
1573{
1574 struct perf_wakeup_entry **head;
1575 struct perf_wakeup_entry *prev, *next;
1576
1577 if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL)
1578 return;
1579
1580 head = &get_cpu_var(perf_wakeup_head);
1581
1582 do {
1583 prev = counter->wakeup.next = *head;
1584 next = &counter->wakeup;
1585 } while (cmpxchg(head, prev, next) != prev);
1586
1587 set_perf_counter_pending();
1588
1589 put_cpu_var(perf_wakeup_head);
1590}
1591
1592static int __perf_pending_run(void)
1593{
1594 struct perf_wakeup_entry *list;
1595 int nr = 0;
1596
1597 list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL);
1598 while (list != PENDING_TAIL) {
1599 struct perf_counter *counter = container_of(list,
1600 struct perf_counter, wakeup);
1601
1602 list = list->next;
1603
1604 counter->wakeup.next = NULL;
1605 /*
1606 * Ensure we observe the unqueue before we issue the wakeup,
1607 * so that we won't be waiting forever.
1608 * -- see perf_not_pending().
1609 */
1610 smp_wmb();
1611
1612 perf_counter_wakeup(counter);
1613 nr++;
1614 }
1615
1616 return nr;
1617}
1618
1619static inline int perf_not_pending(struct perf_counter *counter)
1620{
1621 /*
1622 * If we flush on whatever cpu we run, there is a chance we don't
1623 * need to wait.
1624 */
1625 get_cpu();
1626 __perf_pending_run();
1627 put_cpu();
1628
1629 /*
1630 * Ensure we see the proper queue state before going to sleep
1631 * so that we do not miss the wakeup. -- see perf_pending_handle()
1632 */
1633 smp_rmb();
1634 return counter->wakeup.next == NULL;
1635}
1636
1637static void perf_pending_sync(struct perf_counter *counter)
1638{
1639 wait_event(counter->waitq, perf_not_pending(counter));
1640}
1641
1642void perf_counter_do_pending(void)
1643{
1644 __perf_pending_run();
1645}
1646
1647/*
1532 * Output 1648 * Output
1533 */ 1649 */
1534 1650
@@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle,
1611static void perf_output_end(struct perf_output_handle *handle, int nmi) 1727static void perf_output_end(struct perf_output_handle *handle, int nmi)
1612{ 1728{
1613 if (handle->wakeup) { 1729 if (handle->wakeup) {
1614 (void)atomic_xchg(&handle->data->wakeup, POLL_IN); 1730 if (nmi)
1615 __perf_counter_update_userpage(handle->counter, handle->data); 1731 perf_pending_queue(handle->counter);
1616 if (nmi) { 1732 else
1617 handle->counter->wakeup_pending = 1; 1733 perf_counter_wakeup(handle->counter);
1618 set_perf_counter_pending();
1619 } else
1620 wake_up(&handle->counter->waitq);
1621 } 1734 }
1622 rcu_read_unlock(); 1735 rcu_read_unlock();
1623} 1736}
@@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
2211 2324
2212 counter->cpu = cpu; 2325 counter->cpu = cpu;
2213 counter->hw_event = *hw_event; 2326 counter->hw_event = *hw_event;
2214 counter->wakeup_pending = 0;
2215 counter->group_leader = group_leader; 2327 counter->group_leader = group_leader;
2216 counter->hw_ops = NULL; 2328 counter->hw_ops = NULL;
2217 counter->ctx = ctx; 2329 counter->ctx = ctx;