aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace.c
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2011-09-22 14:01:55 -0400
committerSteven Rostedt <rostedt@goodmis.org>2012-04-23 21:15:55 -0400
commit07d777fe8c3985bc83428c2866713c2d1b3d4129 (patch)
treef14c6aa59b9719761ad9feebebca22550ebdf0e3 /kernel/trace/trace.c
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
tracing: Add percpu buffers for trace_printk()
Currently, trace_printk() uses a single buffer to write into to calculate the size and format needed to save the trace. To do this safely in an SMP environment, a spin_lock() is taken to only allow one writer at a time to the buffer. But this could also affect what is being traced, and add synchronization that would not be there otherwise. Ideally, using percpu buffers would be useful, but since trace_printk() is only used in development, having per cpu buffers for something never used is a waste of space. Thus, the use of the trace_bprintk() format section is changed to be used for static fmts as well as dynamic ones. Then at boot up, we can check if the section that holds the trace_printk formats is non-empty, and if it does contain something, then we know a trace_printk() has been added to the kernel. At this time the trace_printk per cpu buffers are allocated. A check is also done at module load time in case a module is added that contains a trace_printk(). Once the buffers are allocated, they are never freed. If you use a trace_printk() then you should know what you are doing. A buffer is made for each type of context: normal softirq irq nmi The context is checked and the appropriate buffer is used. This allows for totally lockless usage of trace_printk(), and they no longer even disable interrupts. Requested-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r--kernel/trace/trace.c184
1 files changed, 135 insertions, 49 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed7b5d1e12f4..1ab8e35d069b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1498,25 +1498,119 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1498 1498
1499#endif /* CONFIG_STACKTRACE */ 1499#endif /* CONFIG_STACKTRACE */
1500 1500
1501/* created for use with alloc_percpu */
1502struct trace_buffer_struct {
1503 char buffer[TRACE_BUF_SIZE];
1504};
1505
1506static struct trace_buffer_struct *trace_percpu_buffer;
1507static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1508static struct trace_buffer_struct *trace_percpu_irq_buffer;
1509static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1510
1511/*
1512 * The buffer used is dependent on the context. There is a per cpu
1513 * buffer for normal context, softirq contex, hard irq context and
1514 * for NMI context. Thise allows for lockless recording.
1515 *
1516 * Note, if the buffers failed to be allocated, then this returns NULL
1517 */
1518static char *get_trace_buf(void)
1519{
1520 struct trace_buffer_struct *percpu_buffer;
1521 struct trace_buffer_struct *buffer;
1522
1523 /*
1524 * If we have allocated per cpu buffers, then we do not
1525 * need to do any locking.
1526 */
1527 if (in_nmi())
1528 percpu_buffer = trace_percpu_nmi_buffer;
1529 else if (in_irq())
1530 percpu_buffer = trace_percpu_irq_buffer;
1531 else if (in_softirq())
1532 percpu_buffer = trace_percpu_sirq_buffer;
1533 else
1534 percpu_buffer = trace_percpu_buffer;
1535
1536 if (!percpu_buffer)
1537 return NULL;
1538
1539 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
1540
1541 return buffer->buffer;
1542}
1543
1544static int alloc_percpu_trace_buffer(void)
1545{
1546 struct trace_buffer_struct *buffers;
1547 struct trace_buffer_struct *sirq_buffers;
1548 struct trace_buffer_struct *irq_buffers;
1549 struct trace_buffer_struct *nmi_buffers;
1550
1551 buffers = alloc_percpu(struct trace_buffer_struct);
1552 if (!buffers)
1553 goto err_warn;
1554
1555 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1556 if (!sirq_buffers)
1557 goto err_sirq;
1558
1559 irq_buffers = alloc_percpu(struct trace_buffer_struct);
1560 if (!irq_buffers)
1561 goto err_irq;
1562
1563 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1564 if (!nmi_buffers)
1565 goto err_nmi;
1566
1567 trace_percpu_buffer = buffers;
1568 trace_percpu_sirq_buffer = sirq_buffers;
1569 trace_percpu_irq_buffer = irq_buffers;
1570 trace_percpu_nmi_buffer = nmi_buffers;
1571
1572 return 0;
1573
1574 err_nmi:
1575 free_percpu(irq_buffers);
1576 err_irq:
1577 free_percpu(sirq_buffers);
1578 err_sirq:
1579 free_percpu(buffers);
1580 err_warn:
1581 WARN(1, "Could not allocate percpu trace_printk buffer");
1582 return -ENOMEM;
1583}
1584
1585void trace_printk_init_buffers(void)
1586{
1587 static int buffers_allocated;
1588
1589 if (buffers_allocated)
1590 return;
1591
1592 if (alloc_percpu_trace_buffer())
1593 return;
1594
1595 pr_info("ftrace: Allocated trace_printk buffers\n");
1596
1597 buffers_allocated = 1;
1598}
1599
1501/** 1600/**
1502 * trace_vbprintk - write binary msg to tracing buffer 1601 * trace_vbprintk - write binary msg to tracing buffer
1503 * 1602 *
1504 */ 1603 */
1505int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1604int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1506{ 1605{
1507 static arch_spinlock_t trace_buf_lock =
1508 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1509 static u32 trace_buf[TRACE_BUF_SIZE];
1510
1511 struct ftrace_event_call *call = &event_bprint; 1606 struct ftrace_event_call *call = &event_bprint;
1512 struct ring_buffer_event *event; 1607 struct ring_buffer_event *event;
1513 struct ring_buffer *buffer; 1608 struct ring_buffer *buffer;
1514 struct trace_array *tr = &global_trace; 1609 struct trace_array *tr = &global_trace;
1515 struct trace_array_cpu *data;
1516 struct bprint_entry *entry; 1610 struct bprint_entry *entry;
1517 unsigned long flags; 1611 unsigned long flags;
1518 int disable; 1612 char *tbuffer;
1519 int cpu, len = 0, size, pc; 1613 int len = 0, size, pc;
1520 1614
1521 if (unlikely(tracing_selftest_running || tracing_disabled)) 1615 if (unlikely(tracing_selftest_running || tracing_disabled))
1522 return 0; 1616 return 0;
@@ -1526,43 +1620,36 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1526 1620
1527 pc = preempt_count(); 1621 pc = preempt_count();
1528 preempt_disable_notrace(); 1622 preempt_disable_notrace();
1529 cpu = raw_smp_processor_id();
1530 data = tr->data[cpu];
1531 1623
1532 disable = atomic_inc_return(&data->disabled); 1624 tbuffer = get_trace_buf();
1533 if (unlikely(disable != 1)) 1625 if (!tbuffer) {
1626 len = 0;
1534 goto out; 1627 goto out;
1628 }
1535 1629
1536 /* Lockdep uses trace_printk for lock tracing */ 1630 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1537 local_irq_save(flags);
1538 arch_spin_lock(&trace_buf_lock);
1539 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1540 1631
1541 if (len > TRACE_BUF_SIZE || len < 0) 1632 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1542 goto out_unlock; 1633 goto out;
1543 1634
1635 local_save_flags(flags);
1544 size = sizeof(*entry) + sizeof(u32) * len; 1636 size = sizeof(*entry) + sizeof(u32) * len;
1545 buffer = tr->buffer; 1637 buffer = tr->buffer;
1546 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 1638 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1547 flags, pc); 1639 flags, pc);
1548 if (!event) 1640 if (!event)
1549 goto out_unlock; 1641 goto out;
1550 entry = ring_buffer_event_data(event); 1642 entry = ring_buffer_event_data(event);
1551 entry->ip = ip; 1643 entry->ip = ip;
1552 entry->fmt = fmt; 1644 entry->fmt = fmt;
1553 1645
1554 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1646 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
1555 if (!filter_check_discard(call, entry, buffer, event)) { 1647 if (!filter_check_discard(call, entry, buffer, event)) {
1556 ring_buffer_unlock_commit(buffer, event); 1648 ring_buffer_unlock_commit(buffer, event);
1557 ftrace_trace_stack(buffer, flags, 6, pc); 1649 ftrace_trace_stack(buffer, flags, 6, pc);
1558 } 1650 }
1559 1651
1560out_unlock:
1561 arch_spin_unlock(&trace_buf_lock);
1562 local_irq_restore(flags);
1563
1564out: 1652out:
1565 atomic_dec_return(&data->disabled);
1566 preempt_enable_notrace(); 1653 preempt_enable_notrace();
1567 unpause_graph_tracing(); 1654 unpause_graph_tracing();
1568 1655
@@ -1588,58 +1675,53 @@ int trace_array_printk(struct trace_array *tr,
1588int trace_array_vprintk(struct trace_array *tr, 1675int trace_array_vprintk(struct trace_array *tr,
1589 unsigned long ip, const char *fmt, va_list args) 1676 unsigned long ip, const char *fmt, va_list args)
1590{ 1677{
1591 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1592 static char trace_buf[TRACE_BUF_SIZE];
1593
1594 struct ftrace_event_call *call = &event_print; 1678 struct ftrace_event_call *call = &event_print;
1595 struct ring_buffer_event *event; 1679 struct ring_buffer_event *event;
1596 struct ring_buffer *buffer; 1680 struct ring_buffer *buffer;
1597 struct trace_array_cpu *data; 1681 int len = 0, size, pc;
1598 int cpu, len = 0, size, pc;
1599 struct print_entry *entry; 1682 struct print_entry *entry;
1600 unsigned long irq_flags; 1683 unsigned long flags;
1601 int disable; 1684 char *tbuffer;
1602 1685
1603 if (tracing_disabled || tracing_selftest_running) 1686 if (tracing_disabled || tracing_selftest_running)
1604 return 0; 1687 return 0;
1605 1688
1689 /* Don't pollute graph traces with trace_vprintk internals */
1690 pause_graph_tracing();
1691
1606 pc = preempt_count(); 1692 pc = preempt_count();
1607 preempt_disable_notrace(); 1693 preempt_disable_notrace();
1608 cpu = raw_smp_processor_id();
1609 data = tr->data[cpu];
1610 1694
1611 disable = atomic_inc_return(&data->disabled); 1695
1612 if (unlikely(disable != 1)) 1696 tbuffer = get_trace_buf();
1697 if (!tbuffer) {
1698 len = 0;
1613 goto out; 1699 goto out;
1700 }
1614 1701
1615 pause_graph_tracing(); 1702 len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
1616 raw_local_irq_save(irq_flags); 1703 if (len > TRACE_BUF_SIZE)
1617 arch_spin_lock(&trace_buf_lock); 1704 goto out;
1618 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1619 1705
1706 local_save_flags(flags);
1620 size = sizeof(*entry) + len + 1; 1707 size = sizeof(*entry) + len + 1;
1621 buffer = tr->buffer; 1708 buffer = tr->buffer;
1622 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1709 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1623 irq_flags, pc); 1710 flags, pc);
1624 if (!event) 1711 if (!event)
1625 goto out_unlock; 1712 goto out;
1626 entry = ring_buffer_event_data(event); 1713 entry = ring_buffer_event_data(event);
1627 entry->ip = ip; 1714 entry->ip = ip;
1628 1715
1629 memcpy(&entry->buf, trace_buf, len); 1716 memcpy(&entry->buf, tbuffer, len);
1630 entry->buf[len] = '\0'; 1717 entry->buf[len] = '\0';
1631 if (!filter_check_discard(call, entry, buffer, event)) { 1718 if (!filter_check_discard(call, entry, buffer, event)) {
1632 ring_buffer_unlock_commit(buffer, event); 1719 ring_buffer_unlock_commit(buffer, event);
1633 ftrace_trace_stack(buffer, irq_flags, 6, pc); 1720 ftrace_trace_stack(buffer, flags, 6, pc);
1634 } 1721 }
1635
1636 out_unlock:
1637 arch_spin_unlock(&trace_buf_lock);
1638 raw_local_irq_restore(irq_flags);
1639 unpause_graph_tracing();
1640 out: 1722 out:
1641 atomic_dec_return(&data->disabled);
1642 preempt_enable_notrace(); 1723 preempt_enable_notrace();
1724 unpause_graph_tracing();
1643 1725
1644 return len; 1726 return len;
1645} 1727}
@@ -4955,6 +5037,10 @@ __init static int tracer_alloc_buffers(void)
4955 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 5037 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4956 goto out_free_buffer_mask; 5038 goto out_free_buffer_mask;
4957 5039
5040 /* Only allocate trace_printk buffers if a trace_printk exists */
5041 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
5042 trace_printk_init_buffers();
5043
4958 /* To save memory, keep the ring buffer size to its minimum */ 5044 /* To save memory, keep the ring buffer size to its minimum */
4959 if (ring_buffer_expanded) 5045 if (ring_buffer_expanded)
4960 ring_buf_size = trace_buf_size; 5046 ring_buf_size = trace_buf_size;