diff options
author | David Vrabel <david.vrabel@csr.com> | 2008-11-19 09:48:07 -0500 |
---|---|---|
committer | David Vrabel <david.vrabel@csr.com> | 2008-11-19 09:48:07 -0500 |
commit | dba0a918722ee0f0ba3442575e4448c3ab622be4 (patch) | |
tree | fdb466cf09e7916135098d651b18924b2fe9ba5f /kernel/trace/ring_buffer.c | |
parent | 0996e6382482ce9014787693d3884e9468153a5c (diff) | |
parent | 7f0f598a0069d1ab072375965a4b69137233169c (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-upstream
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 179 |
1 files changed, 175 insertions, 4 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 94af1fe56bb4..036456cbb4f7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -16,14 +16,49 @@ | |||
16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | 18 | ||
19 | #include "trace.h" | ||
20 | |||
21 | /* Global flag to disable all recording to ring buffers */ | ||
22 | static int ring_buffers_off __read_mostly; | ||
23 | |||
24 | /** | ||
25 | * tracing_on - enable all tracing buffers | ||
26 | * | ||
27 | * This function enables all tracing buffers that may have been | ||
28 | * disabled with tracing_off. | ||
29 | */ | ||
30 | void tracing_on(void) | ||
31 | { | ||
32 | ring_buffers_off = 0; | ||
33 | } | ||
34 | |||
35 | /** | ||
36 | * tracing_off - turn off all tracing buffers | ||
37 | * | ||
38 | * This function stops all tracing buffers from recording data. | ||
39 | * It does not disable any overhead the tracers themselves may | ||
40 | * be causing. This function simply causes all recording to | ||
41 | * the ring buffers to fail. | ||
42 | */ | ||
43 | void tracing_off(void) | ||
44 | { | ||
45 | ring_buffers_off = 1; | ||
46 | } | ||
47 | |||
19 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 48 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
20 | #define DEBUG_SHIFT 0 | 49 | #define DEBUG_SHIFT 0 |
21 | 50 | ||
22 | /* FIXME!!! */ | 51 | /* FIXME!!! */ |
23 | u64 ring_buffer_time_stamp(int cpu) | 52 | u64 ring_buffer_time_stamp(int cpu) |
24 | { | 53 | { |
54 | u64 time; | ||
55 | |||
56 | preempt_disable_notrace(); | ||
25 | /* shift to debug/test normalization and TIME_EXTENTS */ | 57 | /* shift to debug/test normalization and TIME_EXTENTS */ |
26 | return sched_clock() << DEBUG_SHIFT; | 58 | time = sched_clock() << DEBUG_SHIFT; |
59 | preempt_enable_notrace(); | ||
60 | |||
61 | return time; | ||
27 | } | 62 | } |
28 | 63 | ||
29 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 64 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) |
@@ -130,7 +165,7 @@ struct buffer_page { | |||
130 | static inline void free_buffer_page(struct buffer_page *bpage) | 165 | static inline void free_buffer_page(struct buffer_page *bpage) |
131 | { | 166 | { |
132 | if (bpage->page) | 167 | if (bpage->page) |
133 | __free_page(bpage->page); | 168 | free_page((unsigned long)bpage->page); |
134 | kfree(bpage); | 169 | kfree(bpage); |
135 | } | 170 | } |
136 | 171 | ||
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
503 | LIST_HEAD(pages); | 538 | LIST_HEAD(pages); |
504 | int i, cpu; | 539 | int i, cpu; |
505 | 540 | ||
541 | /* | ||
542 | * Always succeed at resizing a non-existent buffer: | ||
543 | */ | ||
544 | if (!buffer) | ||
545 | return size; | ||
546 | |||
506 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 547 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
507 | size *= BUF_PAGE_SIZE; | 548 | size *= BUF_PAGE_SIZE; |
508 | buffer_size = buffer->pages * BUF_PAGE_SIZE; | 549 | buffer_size = buffer->pages * BUF_PAGE_SIZE; |
@@ -966,7 +1007,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
966 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | 1007 | if (unlikely(*delta > (1ULL << 59) && !once++)) { |
967 | printk(KERN_WARNING "Delta way too big! %llu" | 1008 | printk(KERN_WARNING "Delta way too big! %llu" |
968 | " ts=%llu write stamp = %llu\n", | 1009 | " ts=%llu write stamp = %llu\n", |
969 | *delta, *ts, cpu_buffer->write_stamp); | 1010 | (unsigned long long)*delta, |
1011 | (unsigned long long)*ts, | ||
1012 | (unsigned long long)cpu_buffer->write_stamp); | ||
970 | WARN_ON(1); | 1013 | WARN_ON(1); |
971 | } | 1014 | } |
972 | 1015 | ||
@@ -1020,8 +1063,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1020 | struct ring_buffer_event *event; | 1063 | struct ring_buffer_event *event; |
1021 | u64 ts, delta; | 1064 | u64 ts, delta; |
1022 | int commit = 0; | 1065 | int commit = 0; |
1066 | int nr_loops = 0; | ||
1023 | 1067 | ||
1024 | again: | 1068 | again: |
1069 | /* | ||
1070 | * We allow for interrupts to reenter here and do a trace. | ||
1071 | * If one does, it will cause this original code to loop | ||
1072 | * back here. Even with heavy interrupts happening, this | ||
1073 | * should only happen a few times in a row. If this happens | ||
1074 | * 1000 times in a row, there must be either an interrupt | ||
1075 | * storm or we have something buggy. | ||
1076 | * Bail! | ||
1077 | */ | ||
1078 | if (unlikely(++nr_loops > 1000)) { | ||
1079 | RB_WARN_ON(cpu_buffer, 1); | ||
1080 | return NULL; | ||
1081 | } | ||
1082 | |||
1025 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1083 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); |
1026 | 1084 | ||
1027 | /* | 1085 | /* |
@@ -1043,7 +1101,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1043 | 1101 | ||
1044 | /* Did the write stamp get updated already? */ | 1102 | /* Did the write stamp get updated already? */ |
1045 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1103 | if (unlikely(ts < cpu_buffer->write_stamp)) |
1046 | goto again; | 1104 | delta = 0; |
1047 | 1105 | ||
1048 | if (test_time_stamp(delta)) { | 1106 | if (test_time_stamp(delta)) { |
1049 | 1107 | ||
@@ -1116,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, | |||
1116 | struct ring_buffer_event *event; | 1174 | struct ring_buffer_event *event; |
1117 | int cpu, resched; | 1175 | int cpu, resched; |
1118 | 1176 | ||
1177 | if (ring_buffers_off) | ||
1178 | return NULL; | ||
1179 | |||
1119 | if (atomic_read(&buffer->record_disabled)) | 1180 | if (atomic_read(&buffer->record_disabled)) |
1120 | return NULL; | 1181 | return NULL; |
1121 | 1182 | ||
@@ -1232,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1232 | int ret = -EBUSY; | 1293 | int ret = -EBUSY; |
1233 | int cpu, resched; | 1294 | int cpu, resched; |
1234 | 1295 | ||
1296 | if (ring_buffers_off) | ||
1297 | return -EBUSY; | ||
1298 | |||
1235 | if (atomic_read(&buffer->record_disabled)) | 1299 | if (atomic_read(&buffer->record_disabled)) |
1236 | return -EBUSY; | 1300 | return -EBUSY; |
1237 | 1301 | ||
@@ -1530,10 +1594,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
1530 | { | 1594 | { |
1531 | struct buffer_page *reader = NULL; | 1595 | struct buffer_page *reader = NULL; |
1532 | unsigned long flags; | 1596 | unsigned long flags; |
1597 | int nr_loops = 0; | ||
1533 | 1598 | ||
1534 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 1599 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
1535 | 1600 | ||
1536 | again: | 1601 | again: |
1602 | /* | ||
1603 | * This should normally only loop twice. But because the | ||
1604 | * start of the reader inserts an empty page, it causes | ||
1605 | * a case where we will loop three times. There should be no | ||
1606 | * reason to loop four times (that I know of). | ||
1607 | */ | ||
1608 | if (unlikely(++nr_loops > 3)) { | ||
1609 | RB_WARN_ON(cpu_buffer, 1); | ||
1610 | reader = NULL; | ||
1611 | goto out; | ||
1612 | } | ||
1613 | |||
1537 | reader = cpu_buffer->reader_page; | 1614 | reader = cpu_buffer->reader_page; |
1538 | 1615 | ||
1539 | /* If there's more to read, return this page */ | 1616 | /* If there's more to read, return this page */ |
@@ -1663,6 +1740,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1663 | struct ring_buffer_per_cpu *cpu_buffer; | 1740 | struct ring_buffer_per_cpu *cpu_buffer; |
1664 | struct ring_buffer_event *event; | 1741 | struct ring_buffer_event *event; |
1665 | struct buffer_page *reader; | 1742 | struct buffer_page *reader; |
1743 | int nr_loops = 0; | ||
1666 | 1744 | ||
1667 | if (!cpu_isset(cpu, buffer->cpumask)) | 1745 | if (!cpu_isset(cpu, buffer->cpumask)) |
1668 | return NULL; | 1746 | return NULL; |
@@ -1670,6 +1748,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1670 | cpu_buffer = buffer->buffers[cpu]; | 1748 | cpu_buffer = buffer->buffers[cpu]; |
1671 | 1749 | ||
1672 | again: | 1750 | again: |
1751 | /* | ||
1752 | * We repeat when a timestamp is encountered. It is possible | ||
1753 | * to get multiple timestamps from an interrupt entering just | ||
1754 | * as one timestamp is about to be written. The max times | ||
1755 | * that this can happen is the number of nested interrupts we | ||
1756 | * can have. Nesting 10 deep of interrupts is clearly | ||
1757 | * an anomaly. | ||
1758 | */ | ||
1759 | if (unlikely(++nr_loops > 10)) { | ||
1760 | RB_WARN_ON(cpu_buffer, 1); | ||
1761 | return NULL; | ||
1762 | } | ||
1763 | |||
1673 | reader = rb_get_reader_page(cpu_buffer); | 1764 | reader = rb_get_reader_page(cpu_buffer); |
1674 | if (!reader) | 1765 | if (!reader) |
1675 | return NULL; | 1766 | return NULL; |
@@ -1720,6 +1811,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
1720 | struct ring_buffer *buffer; | 1811 | struct ring_buffer *buffer; |
1721 | struct ring_buffer_per_cpu *cpu_buffer; | 1812 | struct ring_buffer_per_cpu *cpu_buffer; |
1722 | struct ring_buffer_event *event; | 1813 | struct ring_buffer_event *event; |
1814 | int nr_loops = 0; | ||
1723 | 1815 | ||
1724 | if (ring_buffer_iter_empty(iter)) | 1816 | if (ring_buffer_iter_empty(iter)) |
1725 | return NULL; | 1817 | return NULL; |
@@ -1728,6 +1820,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
1728 | buffer = cpu_buffer->buffer; | 1820 | buffer = cpu_buffer->buffer; |
1729 | 1821 | ||
1730 | again: | 1822 | again: |
1823 | /* | ||
1824 | * We repeat when a timestamp is encountered. It is possible | ||
1825 | * to get multiple timestamps from an interrupt entering just | ||
1826 | * as one timestamp is about to be written. The max times | ||
1827 | * that this can happen is the number of nested interrupts we | ||
1828 | * can have. Nesting 10 deep of interrupts is clearly | ||
1829 | * an anomaly. | ||
1830 | */ | ||
1831 | if (unlikely(++nr_loops > 10)) { | ||
1832 | RB_WARN_ON(cpu_buffer, 1); | ||
1833 | return NULL; | ||
1834 | } | ||
1835 | |||
1731 | if (rb_per_cpu_empty(cpu_buffer)) | 1836 | if (rb_per_cpu_empty(cpu_buffer)) |
1732 | return NULL; | 1837 | return NULL; |
1733 | 1838 | ||
@@ -2012,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2012 | return 0; | 2117 | return 0; |
2013 | } | 2118 | } |
2014 | 2119 | ||
2120 | static ssize_t | ||
2121 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
2122 | size_t cnt, loff_t *ppos) | ||
2123 | { | ||
2124 | int *p = filp->private_data; | ||
2125 | char buf[64]; | ||
2126 | int r; | ||
2127 | |||
2128 | /* !ring_buffers_off == tracing_on */ | ||
2129 | r = sprintf(buf, "%d\n", !*p); | ||
2130 | |||
2131 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
2132 | } | ||
2133 | |||
2134 | static ssize_t | ||
2135 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
2136 | size_t cnt, loff_t *ppos) | ||
2137 | { | ||
2138 | int *p = filp->private_data; | ||
2139 | char buf[64]; | ||
2140 | long val; | ||
2141 | int ret; | ||
2142 | |||
2143 | if (cnt >= sizeof(buf)) | ||
2144 | return -EINVAL; | ||
2145 | |||
2146 | if (copy_from_user(&buf, ubuf, cnt)) | ||
2147 | return -EFAULT; | ||
2148 | |||
2149 | buf[cnt] = 0; | ||
2150 | |||
2151 | ret = strict_strtoul(buf, 10, &val); | ||
2152 | if (ret < 0) | ||
2153 | return ret; | ||
2154 | |||
2155 | /* !ring_buffers_off == tracing_on */ | ||
2156 | *p = !val; | ||
2157 | |||
2158 | (*ppos)++; | ||
2159 | |||
2160 | return cnt; | ||
2161 | } | ||
2162 | |||
2163 | static struct file_operations rb_simple_fops = { | ||
2164 | .open = tracing_open_generic, | ||
2165 | .read = rb_simple_read, | ||
2166 | .write = rb_simple_write, | ||
2167 | }; | ||
2168 | |||
2169 | |||
2170 | static __init int rb_init_debugfs(void) | ||
2171 | { | ||
2172 | struct dentry *d_tracer; | ||
2173 | struct dentry *entry; | ||
2174 | |||
2175 | d_tracer = tracing_init_dentry(); | ||
2176 | |||
2177 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | ||
2178 | &ring_buffers_off, &rb_simple_fops); | ||
2179 | if (!entry) | ||
2180 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
2181 | |||
2182 | return 0; | ||
2183 | } | ||
2184 | |||
2185 | fs_initcall(rb_init_debugfs); | ||