diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-12-18 15:54:49 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-12-18 15:54:49 -0500 |
| commit | d110ec3a1e1f522e2e9dfceb9c36d6590c26d2d4 (patch) | |
| tree | 86b2f8f1d22b74b05239525c55bd42e3db6afc03 /kernel/trace/ring_buffer.c | |
| parent | 343e9099c8152daff20e10d6269edec21da44fc0 (diff) | |
| parent | 55dac3a5553b13891f0ae4bbd11920619b5436d4 (diff) | |
Merge branch 'linus' into core/rcu
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 176 |
1 files changed, 173 insertions, 3 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cedf4e26828..668bbb5ef2b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -16,14 +16,49 @@ | |||
| 16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | 18 | ||
| 19 | #include "trace.h" | ||
| 20 | |||
| 21 | /* Global flag to disable all recording to ring buffers */ | ||
| 22 | static int ring_buffers_off __read_mostly; | ||
| 23 | |||
| 24 | /** | ||
| 25 | * tracing_on - enable all tracing buffers | ||
| 26 | * | ||
| 27 | * This function enables all tracing buffers that may have been | ||
| 28 | * disabled with tracing_off. | ||
| 29 | */ | ||
| 30 | void tracing_on(void) | ||
| 31 | { | ||
| 32 | ring_buffers_off = 0; | ||
| 33 | } | ||
| 34 | |||
| 35 | /** | ||
| 36 | * tracing_off - turn off all tracing buffers | ||
| 37 | * | ||
| 38 | * This function stops all tracing buffers from recording data. | ||
| 39 | * It does not disable any overhead the tracers themselves may | ||
| 40 | * be causing. This function simply causes all recording to | ||
| 41 | * the ring buffers to fail. | ||
| 42 | */ | ||
| 43 | void tracing_off(void) | ||
| 44 | { | ||
| 45 | ring_buffers_off = 1; | ||
| 46 | } | ||
| 47 | |||
| 19 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 48 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
| 20 | #define DEBUG_SHIFT 0 | 49 | #define DEBUG_SHIFT 0 |
| 21 | 50 | ||
| 22 | /* FIXME!!! */ | 51 | /* FIXME!!! */ |
| 23 | u64 ring_buffer_time_stamp(int cpu) | 52 | u64 ring_buffer_time_stamp(int cpu) |
| 24 | { | 53 | { |
| 54 | u64 time; | ||
| 55 | |||
| 56 | preempt_disable_notrace(); | ||
| 25 | /* shift to debug/test normalization and TIME_EXTENTS */ | 57 | /* shift to debug/test normalization and TIME_EXTENTS */ |
| 26 | return sched_clock() << DEBUG_SHIFT; | 58 | time = sched_clock() << DEBUG_SHIFT; |
| 59 | preempt_enable_notrace(); | ||
| 60 | |||
| 61 | return time; | ||
| 27 | } | 62 | } |
| 28 | 63 | ||
| 29 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 64 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) |
| @@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 503 | LIST_HEAD(pages); | 538 | LIST_HEAD(pages); |
| 504 | int i, cpu; | 539 | int i, cpu; |
| 505 | 540 | ||
| 541 | /* | ||
| 542 | * Always succeed at resizing a non-existent buffer: | ||
| 543 | */ | ||
| 544 | if (!buffer) | ||
| 545 | return size; | ||
| 546 | |||
| 506 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 547 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
| 507 | size *= BUF_PAGE_SIZE; | 548 | size *= BUF_PAGE_SIZE; |
| 508 | buffer_size = buffer->pages * BUF_PAGE_SIZE; | 549 | buffer_size = buffer->pages * BUF_PAGE_SIZE; |
| @@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 576 | list_del_init(&page->list); | 617 | list_del_init(&page->list); |
| 577 | free_buffer_page(page); | 618 | free_buffer_page(page); |
| 578 | } | 619 | } |
| 620 | mutex_unlock(&buffer->mutex); | ||
| 579 | return -ENOMEM; | 621 | return -ENOMEM; |
| 580 | } | 622 | } |
| 581 | 623 | ||
| @@ -1022,8 +1064,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1022 | struct ring_buffer_event *event; | 1064 | struct ring_buffer_event *event; |
| 1023 | u64 ts, delta; | 1065 | u64 ts, delta; |
| 1024 | int commit = 0; | 1066 | int commit = 0; |
| 1067 | int nr_loops = 0; | ||
| 1025 | 1068 | ||
| 1026 | again: | 1069 | again: |
| 1070 | /* | ||
| 1071 | * We allow for interrupts to reenter here and do a trace. | ||
| 1072 | * If one does, it will cause this original code to loop | ||
| 1073 | * back here. Even with heavy interrupts happening, this | ||
| 1074 | * should only happen a few times in a row. If this happens | ||
| 1075 | * 1000 times in a row, there must be either an interrupt | ||
| 1076 | * storm or we have something buggy. | ||
| 1077 | * Bail! | ||
| 1078 | */ | ||
| 1079 | if (unlikely(++nr_loops > 1000)) { | ||
| 1080 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1081 | return NULL; | ||
| 1082 | } | ||
| 1083 | |||
| 1027 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1084 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); |
| 1028 | 1085 | ||
| 1029 | /* | 1086 | /* |
| @@ -1045,7 +1102,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1045 | 1102 | ||
| 1046 | /* Did the write stamp get updated already? */ | 1103 | /* Did the write stamp get updated already? */ |
| 1047 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1104 | if (unlikely(ts < cpu_buffer->write_stamp)) |
| 1048 | goto again; | 1105 | delta = 0; |
| 1049 | 1106 | ||
| 1050 | if (test_time_stamp(delta)) { | 1107 | if (test_time_stamp(delta)) { |
| 1051 | 1108 | ||
| @@ -1118,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, | |||
| 1118 | struct ring_buffer_event *event; | 1175 | struct ring_buffer_event *event; |
| 1119 | int cpu, resched; | 1176 | int cpu, resched; |
| 1120 | 1177 | ||
| 1178 | if (ring_buffers_off) | ||
| 1179 | return NULL; | ||
| 1180 | |||
| 1121 | if (atomic_read(&buffer->record_disabled)) | 1181 | if (atomic_read(&buffer->record_disabled)) |
| 1122 | return NULL; | 1182 | return NULL; |
| 1123 | 1183 | ||
| @@ -1155,7 +1215,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, | |||
| 1155 | 1215 | ||
| 1156 | out: | 1216 | out: |
| 1157 | if (resched) | 1217 | if (resched) |
| 1158 | preempt_enable_notrace(); | 1218 | preempt_enable_no_resched_notrace(); |
| 1159 | else | 1219 | else |
| 1160 | preempt_enable_notrace(); | 1220 | preempt_enable_notrace(); |
| 1161 | return NULL; | 1221 | return NULL; |
| @@ -1234,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 1234 | int ret = -EBUSY; | 1294 | int ret = -EBUSY; |
| 1235 | int cpu, resched; | 1295 | int cpu, resched; |
| 1236 | 1296 | ||
| 1297 | if (ring_buffers_off) | ||
| 1298 | return -EBUSY; | ||
| 1299 | |||
| 1237 | if (atomic_read(&buffer->record_disabled)) | 1300 | if (atomic_read(&buffer->record_disabled)) |
| 1238 | return -EBUSY; | 1301 | return -EBUSY; |
| 1239 | 1302 | ||
| @@ -1532,10 +1595,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1532 | { | 1595 | { |
| 1533 | struct buffer_page *reader = NULL; | 1596 | struct buffer_page *reader = NULL; |
| 1534 | unsigned long flags; | 1597 | unsigned long flags; |
| 1598 | int nr_loops = 0; | ||
| 1535 | 1599 | ||
| 1536 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 1600 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
| 1537 | 1601 | ||
| 1538 | again: | 1602 | again: |
| 1603 | /* | ||
| 1604 | * This should normally only loop twice. But because the | ||
| 1605 | * start of the reader inserts an empty page, it causes | ||
| 1606 | * a case where we will loop three times. There should be no | ||
| 1607 | * reason to loop four times (that I know of). | ||
| 1608 | */ | ||
| 1609 | if (unlikely(++nr_loops > 3)) { | ||
| 1610 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1611 | reader = NULL; | ||
| 1612 | goto out; | ||
| 1613 | } | ||
| 1614 | |||
| 1539 | reader = cpu_buffer->reader_page; | 1615 | reader = cpu_buffer->reader_page; |
| 1540 | 1616 | ||
| 1541 | /* If there's more to read, return this page */ | 1617 | /* If there's more to read, return this page */ |
| @@ -1665,6 +1741,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1665 | struct ring_buffer_per_cpu *cpu_buffer; | 1741 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1666 | struct ring_buffer_event *event; | 1742 | struct ring_buffer_event *event; |
| 1667 | struct buffer_page *reader; | 1743 | struct buffer_page *reader; |
| 1744 | int nr_loops = 0; | ||
| 1668 | 1745 | ||
| 1669 | if (!cpu_isset(cpu, buffer->cpumask)) | 1746 | if (!cpu_isset(cpu, buffer->cpumask)) |
| 1670 | return NULL; | 1747 | return NULL; |
| @@ -1672,6 +1749,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1672 | cpu_buffer = buffer->buffers[cpu]; | 1749 | cpu_buffer = buffer->buffers[cpu]; |
| 1673 | 1750 | ||
| 1674 | again: | 1751 | again: |
| 1752 | /* | ||
| 1753 | * We repeat when a timestamp is encountered. It is possible | ||
| 1754 | * to get multiple timestamps from an interrupt entering just | ||
| 1755 | * as one timestamp is about to be written. The max times | ||
| 1756 | * that this can happen is the number of nested interrupts we | ||
| 1757 | * can have. Nesting 10 deep of interrupts is clearly | ||
| 1758 | * an anomaly. | ||
| 1759 | */ | ||
| 1760 | if (unlikely(++nr_loops > 10)) { | ||
| 1761 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1762 | return NULL; | ||
| 1763 | } | ||
| 1764 | |||
| 1675 | reader = rb_get_reader_page(cpu_buffer); | 1765 | reader = rb_get_reader_page(cpu_buffer); |
| 1676 | if (!reader) | 1766 | if (!reader) |
| 1677 | return NULL; | 1767 | return NULL; |
| @@ -1722,6 +1812,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 1722 | struct ring_buffer *buffer; | 1812 | struct ring_buffer *buffer; |
| 1723 | struct ring_buffer_per_cpu *cpu_buffer; | 1813 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1724 | struct ring_buffer_event *event; | 1814 | struct ring_buffer_event *event; |
| 1815 | int nr_loops = 0; | ||
| 1725 | 1816 | ||
| 1726 | if (ring_buffer_iter_empty(iter)) | 1817 | if (ring_buffer_iter_empty(iter)) |
| 1727 | return NULL; | 1818 | return NULL; |
| @@ -1730,6 +1821,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 1730 | buffer = cpu_buffer->buffer; | 1821 | buffer = cpu_buffer->buffer; |
| 1731 | 1822 | ||
| 1732 | again: | 1823 | again: |
| 1824 | /* | ||
| 1825 | * We repeat when a timestamp is encountered. It is possible | ||
| 1826 | * to get multiple timestamps from an interrupt entering just | ||
| 1827 | * as one timestamp is about to be written. The max times | ||
| 1828 | * that this can happen is the number of nested interrupts we | ||
| 1829 | * can have. Nesting 10 deep of interrupts is clearly | ||
| 1830 | * an anomaly. | ||
| 1831 | */ | ||
| 1832 | if (unlikely(++nr_loops > 10)) { | ||
| 1833 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1834 | return NULL; | ||
| 1835 | } | ||
| 1836 | |||
| 1733 | if (rb_per_cpu_empty(cpu_buffer)) | 1837 | if (rb_per_cpu_empty(cpu_buffer)) |
| 1734 | return NULL; | 1838 | return NULL; |
| 1735 | 1839 | ||
| @@ -2014,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
| 2014 | return 0; | 2118 | return 0; |
| 2015 | } | 2119 | } |
| 2016 | 2120 | ||
| 2121 | static ssize_t | ||
| 2122 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
| 2123 | size_t cnt, loff_t *ppos) | ||
| 2124 | { | ||
| 2125 | int *p = filp->private_data; | ||
| 2126 | char buf[64]; | ||
| 2127 | int r; | ||
| 2128 | |||
| 2129 | /* !ring_buffers_off == tracing_on */ | ||
| 2130 | r = sprintf(buf, "%d\n", !*p); | ||
| 2131 | |||
| 2132 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | static ssize_t | ||
| 2136 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
| 2137 | size_t cnt, loff_t *ppos) | ||
| 2138 | { | ||
| 2139 | int *p = filp->private_data; | ||
| 2140 | char buf[64]; | ||
| 2141 | long val; | ||
| 2142 | int ret; | ||
| 2143 | |||
| 2144 | if (cnt >= sizeof(buf)) | ||
| 2145 | return -EINVAL; | ||
| 2146 | |||
| 2147 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 2148 | return -EFAULT; | ||
| 2149 | |||
| 2150 | buf[cnt] = 0; | ||
| 2151 | |||
| 2152 | ret = strict_strtoul(buf, 10, &val); | ||
| 2153 | if (ret < 0) | ||
| 2154 | return ret; | ||
| 2155 | |||
| 2156 | /* !ring_buffers_off == tracing_on */ | ||
| 2157 | *p = !val; | ||
| 2158 | |||
| 2159 | (*ppos)++; | ||
| 2160 | |||
| 2161 | return cnt; | ||
| 2162 | } | ||
| 2163 | |||
| 2164 | static struct file_operations rb_simple_fops = { | ||
| 2165 | .open = tracing_open_generic, | ||
| 2166 | .read = rb_simple_read, | ||
| 2167 | .write = rb_simple_write, | ||
| 2168 | }; | ||
| 2169 | |||
| 2170 | |||
| 2171 | static __init int rb_init_debugfs(void) | ||
| 2172 | { | ||
| 2173 | struct dentry *d_tracer; | ||
| 2174 | struct dentry *entry; | ||
| 2175 | |||
| 2176 | d_tracer = tracing_init_dentry(); | ||
| 2177 | |||
| 2178 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | ||
| 2179 | &ring_buffers_off, &rb_simple_fops); | ||
| 2180 | if (!entry) | ||
| 2181 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
| 2182 | |||
| 2183 | return 0; | ||
| 2184 | } | ||
| 2185 | |||
| 2186 | fs_initcall(rb_init_debugfs); | ||
