diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-11-20 03:03:38 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-11-20 03:03:38 -0500 |
| commit | 90accd6fabf9b2fa2705945a4c601877a75d43bf (patch) | |
| tree | d393cb54f0228b1313139e4e14adf4f5cf236b59 /kernel/trace/ring_buffer.c | |
| parent | b43d196c4d3fe46d6dda7c987c47792612b80b1b (diff) | |
| parent | ee2f6cc7f9ea2542ad46070ed62ba7aa04d08871 (diff) | |
Merge branch 'linus' into x86/memory-corruption-check
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 179 |
1 files changed, 175 insertions, 4 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 94af1fe56bb4..036456cbb4f7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -16,14 +16,49 @@ | |||
| 16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | 18 | ||
| 19 | #include "trace.h" | ||
| 20 | |||
| 21 | /* Global flag to disable all recording to ring buffers */ | ||
| 22 | static int ring_buffers_off __read_mostly; | ||
| 23 | |||
| 24 | /** | ||
| 25 | * tracing_on - enable all tracing buffers | ||
| 26 | * | ||
| 27 | * This function enables all tracing buffers that may have been | ||
| 28 | * disabled with tracing_off. | ||
| 29 | */ | ||
| 30 | void tracing_on(void) | ||
| 31 | { | ||
| 32 | ring_buffers_off = 0; | ||
| 33 | } | ||
| 34 | |||
| 35 | /** | ||
| 36 | * tracing_off - turn off all tracing buffers | ||
| 37 | * | ||
| 38 | * This function stops all tracing buffers from recording data. | ||
| 39 | * It does not disable any overhead the tracers themselves may | ||
| 40 | * be causing. This function simply causes all recording to | ||
| 41 | * the ring buffers to fail. | ||
| 42 | */ | ||
| 43 | void tracing_off(void) | ||
| 44 | { | ||
| 45 | ring_buffers_off = 1; | ||
| 46 | } | ||
| 47 | |||
| 19 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 48 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
| 20 | #define DEBUG_SHIFT 0 | 49 | #define DEBUG_SHIFT 0 |
| 21 | 50 | ||
| 22 | /* FIXME!!! */ | 51 | /* FIXME!!! */ |
| 23 | u64 ring_buffer_time_stamp(int cpu) | 52 | u64 ring_buffer_time_stamp(int cpu) |
| 24 | { | 53 | { |
| 54 | u64 time; | ||
| 55 | |||
| 56 | preempt_disable_notrace(); | ||
| 25 | /* shift to debug/test normalization and TIME_EXTENTS */ | 57 | /* shift to debug/test normalization and TIME_EXTENTS */ |
| 26 | return sched_clock() << DEBUG_SHIFT; | 58 | time = sched_clock() << DEBUG_SHIFT; |
| 59 | preempt_enable_notrace(); | ||
| 60 | |||
| 61 | return time; | ||
| 27 | } | 62 | } |
| 28 | 63 | ||
| 29 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 64 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) |
| @@ -130,7 +165,7 @@ struct buffer_page { | |||
| 130 | static inline void free_buffer_page(struct buffer_page *bpage) | 165 | static inline void free_buffer_page(struct buffer_page *bpage) |
| 131 | { | 166 | { |
| 132 | if (bpage->page) | 167 | if (bpage->page) |
| 133 | __free_page(bpage->page); | 168 | free_page((unsigned long)bpage->page); |
| 134 | kfree(bpage); | 169 | kfree(bpage); |
| 135 | } | 170 | } |
| 136 | 171 | ||
| @@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 503 | LIST_HEAD(pages); | 538 | LIST_HEAD(pages); |
| 504 | int i, cpu; | 539 | int i, cpu; |
| 505 | 540 | ||
| 541 | /* | ||
| 542 | * Always succeed at resizing a non-existent buffer: | ||
| 543 | */ | ||
| 544 | if (!buffer) | ||
| 545 | return size; | ||
| 546 | |||
| 506 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 547 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
| 507 | size *= BUF_PAGE_SIZE; | 548 | size *= BUF_PAGE_SIZE; |
| 508 | buffer_size = buffer->pages * BUF_PAGE_SIZE; | 549 | buffer_size = buffer->pages * BUF_PAGE_SIZE; |
| @@ -966,7 +1007,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 966 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | 1007 | if (unlikely(*delta > (1ULL << 59) && !once++)) { |
| 967 | printk(KERN_WARNING "Delta way too big! %llu" | 1008 | printk(KERN_WARNING "Delta way too big! %llu" |
| 968 | " ts=%llu write stamp = %llu\n", | 1009 | " ts=%llu write stamp = %llu\n", |
| 969 | *delta, *ts, cpu_buffer->write_stamp); | 1010 | (unsigned long long)*delta, |
| 1011 | (unsigned long long)*ts, | ||
| 1012 | (unsigned long long)cpu_buffer->write_stamp); | ||
| 970 | WARN_ON(1); | 1013 | WARN_ON(1); |
| 971 | } | 1014 | } |
| 972 | 1015 | ||
| @@ -1020,8 +1063,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1020 | struct ring_buffer_event *event; | 1063 | struct ring_buffer_event *event; |
| 1021 | u64 ts, delta; | 1064 | u64 ts, delta; |
| 1022 | int commit = 0; | 1065 | int commit = 0; |
| 1066 | int nr_loops = 0; | ||
| 1023 | 1067 | ||
| 1024 | again: | 1068 | again: |
| 1069 | /* | ||
| 1070 | * We allow for interrupts to reenter here and do a trace. | ||
| 1071 | * If one does, it will cause this original code to loop | ||
| 1072 | * back here. Even with heavy interrupts happening, this | ||
| 1073 | * should only happen a few times in a row. If this happens | ||
| 1074 | * 1000 times in a row, there must be either an interrupt | ||
| 1075 | * storm or we have something buggy. | ||
| 1076 | * Bail! | ||
| 1077 | */ | ||
| 1078 | if (unlikely(++nr_loops > 1000)) { | ||
| 1079 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1080 | return NULL; | ||
| 1081 | } | ||
| 1082 | |||
| 1025 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1083 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); |
| 1026 | 1084 | ||
| 1027 | /* | 1085 | /* |
| @@ -1043,7 +1101,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1043 | 1101 | ||
| 1044 | /* Did the write stamp get updated already? */ | 1102 | /* Did the write stamp get updated already? */ |
| 1045 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1103 | if (unlikely(ts < cpu_buffer->write_stamp)) |
| 1046 | goto again; | 1104 | delta = 0; |
| 1047 | 1105 | ||
| 1048 | if (test_time_stamp(delta)) { | 1106 | if (test_time_stamp(delta)) { |
| 1049 | 1107 | ||
| @@ -1116,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, | |||
| 1116 | struct ring_buffer_event *event; | 1174 | struct ring_buffer_event *event; |
| 1117 | int cpu, resched; | 1175 | int cpu, resched; |
| 1118 | 1176 | ||
| 1177 | if (ring_buffers_off) | ||
| 1178 | return NULL; | ||
| 1179 | |||
| 1119 | if (atomic_read(&buffer->record_disabled)) | 1180 | if (atomic_read(&buffer->record_disabled)) |
| 1120 | return NULL; | 1181 | return NULL; |
| 1121 | 1182 | ||
| @@ -1232,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 1232 | int ret = -EBUSY; | 1293 | int ret = -EBUSY; |
| 1233 | int cpu, resched; | 1294 | int cpu, resched; |
| 1234 | 1295 | ||
| 1296 | if (ring_buffers_off) | ||
| 1297 | return -EBUSY; | ||
| 1298 | |||
| 1235 | if (atomic_read(&buffer->record_disabled)) | 1299 | if (atomic_read(&buffer->record_disabled)) |
| 1236 | return -EBUSY; | 1300 | return -EBUSY; |
| 1237 | 1301 | ||
| @@ -1530,10 +1594,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1530 | { | 1594 | { |
| 1531 | struct buffer_page *reader = NULL; | 1595 | struct buffer_page *reader = NULL; |
| 1532 | unsigned long flags; | 1596 | unsigned long flags; |
| 1597 | int nr_loops = 0; | ||
| 1533 | 1598 | ||
| 1534 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 1599 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
| 1535 | 1600 | ||
| 1536 | again: | 1601 | again: |
| 1602 | /* | ||
| 1603 | * This should normally only loop twice. But because the | ||
| 1604 | * start of the reader inserts an empty page, it causes | ||
| 1605 | * a case where we will loop three times. There should be no | ||
| 1606 | * reason to loop four times (that I know of). | ||
| 1607 | */ | ||
| 1608 | if (unlikely(++nr_loops > 3)) { | ||
| 1609 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1610 | reader = NULL; | ||
| 1611 | goto out; | ||
| 1612 | } | ||
| 1613 | |||
| 1537 | reader = cpu_buffer->reader_page; | 1614 | reader = cpu_buffer->reader_page; |
| 1538 | 1615 | ||
| 1539 | /* If there's more to read, return this page */ | 1616 | /* If there's more to read, return this page */ |
| @@ -1663,6 +1740,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1663 | struct ring_buffer_per_cpu *cpu_buffer; | 1740 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1664 | struct ring_buffer_event *event; | 1741 | struct ring_buffer_event *event; |
| 1665 | struct buffer_page *reader; | 1742 | struct buffer_page *reader; |
| 1743 | int nr_loops = 0; | ||
| 1666 | 1744 | ||
| 1667 | if (!cpu_isset(cpu, buffer->cpumask)) | 1745 | if (!cpu_isset(cpu, buffer->cpumask)) |
| 1668 | return NULL; | 1746 | return NULL; |
| @@ -1670,6 +1748,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1670 | cpu_buffer = buffer->buffers[cpu]; | 1748 | cpu_buffer = buffer->buffers[cpu]; |
| 1671 | 1749 | ||
| 1672 | again: | 1750 | again: |
| 1751 | /* | ||
| 1752 | * We repeat when a timestamp is encountered. It is possible | ||
| 1753 | * to get multiple timestamps from an interrupt entering just | ||
| 1754 | * as one timestamp is about to be written. The max times | ||
| 1755 | * that this can happen is the number of nested interrupts we | ||
| 1756 | * can have. Nesting 10 deep of interrupts is clearly | ||
| 1757 | * an anomaly. | ||
| 1758 | */ | ||
| 1759 | if (unlikely(++nr_loops > 10)) { | ||
| 1760 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1761 | return NULL; | ||
| 1762 | } | ||
| 1763 | |||
| 1673 | reader = rb_get_reader_page(cpu_buffer); | 1764 | reader = rb_get_reader_page(cpu_buffer); |
| 1674 | if (!reader) | 1765 | if (!reader) |
| 1675 | return NULL; | 1766 | return NULL; |
| @@ -1720,6 +1811,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 1720 | struct ring_buffer *buffer; | 1811 | struct ring_buffer *buffer; |
| 1721 | struct ring_buffer_per_cpu *cpu_buffer; | 1812 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1722 | struct ring_buffer_event *event; | 1813 | struct ring_buffer_event *event; |
| 1814 | int nr_loops = 0; | ||
| 1723 | 1815 | ||
| 1724 | if (ring_buffer_iter_empty(iter)) | 1816 | if (ring_buffer_iter_empty(iter)) |
| 1725 | return NULL; | 1817 | return NULL; |
| @@ -1728,6 +1820,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 1728 | buffer = cpu_buffer->buffer; | 1820 | buffer = cpu_buffer->buffer; |
| 1729 | 1821 | ||
| 1730 | again: | 1822 | again: |
| 1823 | /* | ||
| 1824 | * We repeat when a timestamp is encountered. It is possible | ||
| 1825 | * to get multiple timestamps from an interrupt entering just | ||
| 1826 | * as one timestamp is about to be written. The max times | ||
| 1827 | * that this can happen is the number of nested interrupts we | ||
| 1828 | * can have. Nesting 10 deep of interrupts is clearly | ||
| 1829 | * an anomaly. | ||
| 1830 | */ | ||
| 1831 | if (unlikely(++nr_loops > 10)) { | ||
| 1832 | RB_WARN_ON(cpu_buffer, 1); | ||
| 1833 | return NULL; | ||
| 1834 | } | ||
| 1835 | |||
| 1731 | if (rb_per_cpu_empty(cpu_buffer)) | 1836 | if (rb_per_cpu_empty(cpu_buffer)) |
| 1732 | return NULL; | 1837 | return NULL; |
| 1733 | 1838 | ||
| @@ -2012,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
| 2012 | return 0; | 2117 | return 0; |
| 2013 | } | 2118 | } |
| 2014 | 2119 | ||
| 2120 | static ssize_t | ||
| 2121 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
| 2122 | size_t cnt, loff_t *ppos) | ||
| 2123 | { | ||
| 2124 | int *p = filp->private_data; | ||
| 2125 | char buf[64]; | ||
| 2126 | int r; | ||
| 2127 | |||
| 2128 | /* !ring_buffers_off == tracing_on */ | ||
| 2129 | r = sprintf(buf, "%d\n", !*p); | ||
| 2130 | |||
| 2131 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
| 2132 | } | ||
| 2133 | |||
| 2134 | static ssize_t | ||
| 2135 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
| 2136 | size_t cnt, loff_t *ppos) | ||
| 2137 | { | ||
| 2138 | int *p = filp->private_data; | ||
| 2139 | char buf[64]; | ||
| 2140 | long val; | ||
| 2141 | int ret; | ||
| 2142 | |||
| 2143 | if (cnt >= sizeof(buf)) | ||
| 2144 | return -EINVAL; | ||
| 2145 | |||
| 2146 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 2147 | return -EFAULT; | ||
| 2148 | |||
| 2149 | buf[cnt] = 0; | ||
| 2150 | |||
| 2151 | ret = strict_strtoul(buf, 10, &val); | ||
| 2152 | if (ret < 0) | ||
| 2153 | return ret; | ||
| 2154 | |||
| 2155 | /* !ring_buffers_off == tracing_on */ | ||
| 2156 | *p = !val; | ||
| 2157 | |||
| 2158 | (*ppos)++; | ||
| 2159 | |||
| 2160 | return cnt; | ||
| 2161 | } | ||
| 2162 | |||
| 2163 | static struct file_operations rb_simple_fops = { | ||
| 2164 | .open = tracing_open_generic, | ||
| 2165 | .read = rb_simple_read, | ||
| 2166 | .write = rb_simple_write, | ||
| 2167 | }; | ||
| 2168 | |||
| 2169 | |||
| 2170 | static __init int rb_init_debugfs(void) | ||
| 2171 | { | ||
| 2172 | struct dentry *d_tracer; | ||
| 2173 | struct dentry *entry; | ||
| 2174 | |||
| 2175 | d_tracer = tracing_init_dentry(); | ||
| 2176 | |||
| 2177 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | ||
| 2178 | &ring_buffers_off, &rb_simple_fops); | ||
| 2179 | if (!entry) | ||
| 2180 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
| 2181 | |||
| 2182 | return 0; | ||
| 2183 | } | ||
| 2184 | |||
| 2185 | fs_initcall(rb_init_debugfs); | ||
