diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 176 |
1 files changed, 173 insertions, 3 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cedf4e26828..668bbb5ef2b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -16,14 +16,49 @@ | |||
16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | 18 | ||
19 | #include "trace.h" | ||
20 | |||
21 | /* Global flag to disable all recording to ring buffers */ | ||
22 | static int ring_buffers_off __read_mostly; | ||
23 | |||
24 | /** | ||
25 | * tracing_on - enable all tracing buffers | ||
26 | * | ||
27 | * This function enables all tracing buffers that may have been | ||
28 | * disabled with tracing_off. | ||
29 | */ | ||
30 | void tracing_on(void) | ||
31 | { | ||
32 | ring_buffers_off = 0; | ||
33 | } | ||
34 | |||
35 | /** | ||
36 | * tracing_off - turn off all tracing buffers | ||
37 | * | ||
38 | * This function stops all tracing buffers from recording data. | ||
39 | * It does not disable any overhead the tracers themselves may | ||
40 | * be causing. This function simply causes all recording to | ||
41 | * the ring buffers to fail. | ||
42 | */ | ||
43 | void tracing_off(void) | ||
44 | { | ||
45 | ring_buffers_off = 1; | ||
46 | } | ||
47 | |||
19 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 48 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
20 | #define DEBUG_SHIFT 0 | 49 | #define DEBUG_SHIFT 0 |
21 | 50 | ||
22 | /* FIXME!!! */ | 51 | /* FIXME!!! */ |
23 | u64 ring_buffer_time_stamp(int cpu) | 52 | u64 ring_buffer_time_stamp(int cpu) |
24 | { | 53 | { |
54 | u64 time; | ||
55 | |||
56 | preempt_disable_notrace(); | ||
25 | /* shift to debug/test normalization and TIME_EXTENTS */ | 57 | /* shift to debug/test normalization and TIME_EXTENTS */ |
26 | return sched_clock() << DEBUG_SHIFT; | 58 | time = sched_clock() << DEBUG_SHIFT; |
59 | preempt_enable_notrace(); | ||
60 | |||
61 | return time; | ||
27 | } | 62 | } |
28 | 63 | ||
29 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 64 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) |
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
503 | LIST_HEAD(pages); | 538 | LIST_HEAD(pages); |
504 | int i, cpu; | 539 | int i, cpu; |
505 | 540 | ||
541 | /* | ||
542 | * Always succeed at resizing a non-existent buffer: | ||
543 | */ | ||
544 | if (!buffer) | ||
545 | return size; | ||
546 | |||
506 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 547 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
507 | size *= BUF_PAGE_SIZE; | 548 | size *= BUF_PAGE_SIZE; |
508 | buffer_size = buffer->pages * BUF_PAGE_SIZE; | 549 | buffer_size = buffer->pages * BUF_PAGE_SIZE; |
@@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
576 | list_del_init(&page->list); | 617 | list_del_init(&page->list); |
577 | free_buffer_page(page); | 618 | free_buffer_page(page); |
578 | } | 619 | } |
620 | mutex_unlock(&buffer->mutex); | ||
579 | return -ENOMEM; | 621 | return -ENOMEM; |
580 | } | 622 | } |
581 | 623 | ||
@@ -1022,8 +1064,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1022 | struct ring_buffer_event *event; | 1064 | struct ring_buffer_event *event; |
1023 | u64 ts, delta; | 1065 | u64 ts, delta; |
1024 | int commit = 0; | 1066 | int commit = 0; |
1067 | int nr_loops = 0; | ||
1025 | 1068 | ||
1026 | again: | 1069 | again: |
1070 | /* | ||
1071 | * We allow for interrupts to reenter here and do a trace. | ||
1072 | * If one does, it will cause this original code to loop | ||
1073 | * back here. Even with heavy interrupts happening, this | ||
1074 | * should only happen a few times in a row. If this happens | ||
1075 | * 1000 times in a row, there must be either an interrupt | ||
1076 | * storm or we have something buggy. | ||
1077 | * Bail! | ||
1078 | */ | ||
1079 | if (unlikely(++nr_loops > 1000)) { | ||
1080 | RB_WARN_ON(cpu_buffer, 1); | ||
1081 | return NULL; | ||
1082 | } | ||
1083 | |||
1027 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1084 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); |
1028 | 1085 | ||
1029 | /* | 1086 | /* |
@@ -1045,7 +1102,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1045 | 1102 | ||
1046 | /* Did the write stamp get updated already? */ | 1103 | /* Did the write stamp get updated already? */ |
1047 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1104 | if (unlikely(ts < cpu_buffer->write_stamp)) |
1048 | goto again; | 1105 | delta = 0; |
1049 | 1106 | ||
1050 | if (test_time_stamp(delta)) { | 1107 | if (test_time_stamp(delta)) { |
1051 | 1108 | ||
@@ -1118,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, | |||
1118 | struct ring_buffer_event *event; | 1175 | struct ring_buffer_event *event; |
1119 | int cpu, resched; | 1176 | int cpu, resched; |
1120 | 1177 | ||
1178 | if (ring_buffers_off) | ||
1179 | return NULL; | ||
1180 | |||
1121 | if (atomic_read(&buffer->record_disabled)) | 1181 | if (atomic_read(&buffer->record_disabled)) |
1122 | return NULL; | 1182 | return NULL; |
1123 | 1183 | ||
@@ -1155,7 +1215,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, | |||
1155 | 1215 | ||
1156 | out: | 1216 | out: |
1157 | if (resched) | 1217 | if (resched) |
1158 | preempt_enable_notrace(); | 1218 | preempt_enable_no_resched_notrace(); |
1159 | else | 1219 | else |
1160 | preempt_enable_notrace(); | 1220 | preempt_enable_notrace(); |
1161 | return NULL; | 1221 | return NULL; |
@@ -1234,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1234 | int ret = -EBUSY; | 1294 | int ret = -EBUSY; |
1235 | int cpu, resched; | 1295 | int cpu, resched; |
1236 | 1296 | ||
1297 | if (ring_buffers_off) | ||
1298 | return -EBUSY; | ||
1299 | |||
1237 | if (atomic_read(&buffer->record_disabled)) | 1300 | if (atomic_read(&buffer->record_disabled)) |
1238 | return -EBUSY; | 1301 | return -EBUSY; |
1239 | 1302 | ||
@@ -1532,10 +1595,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
1532 | { | 1595 | { |
1533 | struct buffer_page *reader = NULL; | 1596 | struct buffer_page *reader = NULL; |
1534 | unsigned long flags; | 1597 | unsigned long flags; |
1598 | int nr_loops = 0; | ||
1535 | 1599 | ||
1536 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 1600 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
1537 | 1601 | ||
1538 | again: | 1602 | again: |
1603 | /* | ||
1604 | * This should normally only loop twice. But because the | ||
1605 | * start of the reader inserts an empty page, it causes | ||
1606 | * a case where we will loop three times. There should be no | ||
1607 | * reason to loop four times (that I know of). | ||
1608 | */ | ||
1609 | if (unlikely(++nr_loops > 3)) { | ||
1610 | RB_WARN_ON(cpu_buffer, 1); | ||
1611 | reader = NULL; | ||
1612 | goto out; | ||
1613 | } | ||
1614 | |||
1539 | reader = cpu_buffer->reader_page; | 1615 | reader = cpu_buffer->reader_page; |
1540 | 1616 | ||
1541 | /* If there's more to read, return this page */ | 1617 | /* If there's more to read, return this page */ |
@@ -1665,6 +1741,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1665 | struct ring_buffer_per_cpu *cpu_buffer; | 1741 | struct ring_buffer_per_cpu *cpu_buffer; |
1666 | struct ring_buffer_event *event; | 1742 | struct ring_buffer_event *event; |
1667 | struct buffer_page *reader; | 1743 | struct buffer_page *reader; |
1744 | int nr_loops = 0; | ||
1668 | 1745 | ||
1669 | if (!cpu_isset(cpu, buffer->cpumask)) | 1746 | if (!cpu_isset(cpu, buffer->cpumask)) |
1670 | return NULL; | 1747 | return NULL; |
@@ -1672,6 +1749,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1672 | cpu_buffer = buffer->buffers[cpu]; | 1749 | cpu_buffer = buffer->buffers[cpu]; |
1673 | 1750 | ||
1674 | again: | 1751 | again: |
1752 | /* | ||
1753 | * We repeat when a timestamp is encountered. It is possible | ||
1754 | * to get multiple timestamps from an interrupt entering just | ||
1755 | * as one timestamp is about to be written. The max times | ||
1756 | * that this can happen is the number of nested interrupts we | ||
1757 | * can have. Nesting 10 deep of interrupts is clearly | ||
1758 | * an anomaly. | ||
1759 | */ | ||
1760 | if (unlikely(++nr_loops > 10)) { | ||
1761 | RB_WARN_ON(cpu_buffer, 1); | ||
1762 | return NULL; | ||
1763 | } | ||
1764 | |||
1675 | reader = rb_get_reader_page(cpu_buffer); | 1765 | reader = rb_get_reader_page(cpu_buffer); |
1676 | if (!reader) | 1766 | if (!reader) |
1677 | return NULL; | 1767 | return NULL; |
@@ -1722,6 +1812,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
1722 | struct ring_buffer *buffer; | 1812 | struct ring_buffer *buffer; |
1723 | struct ring_buffer_per_cpu *cpu_buffer; | 1813 | struct ring_buffer_per_cpu *cpu_buffer; |
1724 | struct ring_buffer_event *event; | 1814 | struct ring_buffer_event *event; |
1815 | int nr_loops = 0; | ||
1725 | 1816 | ||
1726 | if (ring_buffer_iter_empty(iter)) | 1817 | if (ring_buffer_iter_empty(iter)) |
1727 | return NULL; | 1818 | return NULL; |
@@ -1730,6 +1821,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
1730 | buffer = cpu_buffer->buffer; | 1821 | buffer = cpu_buffer->buffer; |
1731 | 1822 | ||
1732 | again: | 1823 | again: |
1824 | /* | ||
1825 | * We repeat when a timestamp is encountered. It is possible | ||
1826 | * to get multiple timestamps from an interrupt entering just | ||
1827 | * as one timestamp is about to be written. The max times | ||
1828 | * that this can happen is the number of nested interrupts we | ||
1829 | * can have. Nesting 10 deep of interrupts is clearly | ||
1830 | * an anomaly. | ||
1831 | */ | ||
1832 | if (unlikely(++nr_loops > 10)) { | ||
1833 | RB_WARN_ON(cpu_buffer, 1); | ||
1834 | return NULL; | ||
1835 | } | ||
1836 | |||
1733 | if (rb_per_cpu_empty(cpu_buffer)) | 1837 | if (rb_per_cpu_empty(cpu_buffer)) |
1734 | return NULL; | 1838 | return NULL; |
1735 | 1839 | ||
@@ -2014,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2014 | return 0; | 2118 | return 0; |
2015 | } | 2119 | } |
2016 | 2120 | ||
2121 | static ssize_t | ||
2122 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
2123 | size_t cnt, loff_t *ppos) | ||
2124 | { | ||
2125 | int *p = filp->private_data; | ||
2126 | char buf[64]; | ||
2127 | int r; | ||
2128 | |||
2129 | /* !ring_buffers_off == tracing_on */ | ||
2130 | r = sprintf(buf, "%d\n", !*p); | ||
2131 | |||
2132 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
2133 | } | ||
2134 | |||
2135 | static ssize_t | ||
2136 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
2137 | size_t cnt, loff_t *ppos) | ||
2138 | { | ||
2139 | int *p = filp->private_data; | ||
2140 | char buf[64]; | ||
2141 | long val; | ||
2142 | int ret; | ||
2143 | |||
2144 | if (cnt >= sizeof(buf)) | ||
2145 | return -EINVAL; | ||
2146 | |||
2147 | if (copy_from_user(&buf, ubuf, cnt)) | ||
2148 | return -EFAULT; | ||
2149 | |||
2150 | buf[cnt] = 0; | ||
2151 | |||
2152 | ret = strict_strtoul(buf, 10, &val); | ||
2153 | if (ret < 0) | ||
2154 | return ret; | ||
2155 | |||
2156 | /* !ring_buffers_off == tracing_on */ | ||
2157 | *p = !val; | ||
2158 | |||
2159 | (*ppos)++; | ||
2160 | |||
2161 | return cnt; | ||
2162 | } | ||
2163 | |||
2164 | static struct file_operations rb_simple_fops = { | ||
2165 | .open = tracing_open_generic, | ||
2166 | .read = rb_simple_read, | ||
2167 | .write = rb_simple_write, | ||
2168 | }; | ||
2169 | |||
2170 | |||
2171 | static __init int rb_init_debugfs(void) | ||
2172 | { | ||
2173 | struct dentry *d_tracer; | ||
2174 | struct dentry *entry; | ||
2175 | |||
2176 | d_tracer = tracing_init_dentry(); | ||
2177 | |||
2178 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | ||
2179 | &ring_buffers_off, &rb_simple_fops); | ||
2180 | if (!entry) | ||
2181 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
2182 | |||
2183 | return 0; | ||
2184 | } | ||
2185 | |||
2186 | fs_initcall(rb_init_debugfs); | ||