aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2009-12-07 09:11:39 -0500
committerSteven Rostedt <rostedt@goodmis.org>2009-12-09 13:55:26 -0500
commita63ce5b306855bccdacba95c03bfc293316c8ae3 (patch)
treeb6d91266e9333a9cf74d6171bbee0acc0e18096b /kernel
parent29bf4a5e3fed3dde3eb629a0cb1762c1e9217458 (diff)
tracing: Buffer the output of seq_file in case of filled buffer
If the seq_read fills the buffer it will call s_start again on the next itertation with the same position. This causes a problem with the function_graph tracer because it consumes the iteration in order to determine leaf functions. What happens is that the iterator stores the entry, and the function graph plugin will look at the next entry. If that next entry is a return of the same function and task, then the function is a leaf and the function_graph plugin calls ring_buffer_read which moves the ring buffer iterator forward (the trace iterator still points to the function start entry). The copying of the trace_seq to the seq_file buffer will fail if the seq_file buffer is full. The seq_read will not show this entry. The next read by userspace will cause seq_read to again call s_start which will reuse the trace iterator entry (the function start entry). But the function return entry was already consumed. The function graph plugin will think that this entry is a nested function and not a leaf. To solve this, the trace code now checks the return status of the seq_printf (trace_print_seq). If the writing to the seq_file buffer fails, we set a flag in the iterator (leftover) and we do not reset the trace_seq buffer. On the next call to s_start, we check the leftover flag, and if it is set, we just reuse the trace_seq buffer and do not call into the plugin print functions. Before this patch: 2) | fput() { 2) | __fput() { 2) 0.550 us | inotify_inode_queue_event(); 2) | __fsnotify_parent() { 2) 0.540 us | inotify_dentry_parent_queue_event(); After the patch: 2) | fput() { 2) | __fput() { 2) 0.550 us | inotify_inode_queue_event(); 2) 0.548 us | __fsnotify_parent(); 2) 0.540 us | inotify_dentry_parent_queue_event(); [ Updated the patch to fix a missing return 0 from the trace_print_seq() stub when CONFIG_TRACING is disabled. Reported-by: Ingo Molnar <mingo@elte.hu> ] Reported-by: Jiri Olsa <jolsa@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/trace.c35
-rw-r--r--kernel/trace/trace_output.c14
2 files changed, 43 insertions, 6 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc937e1baa91..484114d70743 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1516,6 +1516,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1516 int i = (int)*pos; 1516 int i = (int)*pos;
1517 void *ent; 1517 void *ent;
1518 1518
1519 WARN_ON_ONCE(iter->leftover);
1520
1519 (*pos)++; 1521 (*pos)++;
1520 1522
1521 /* can't go backwards */ 1523 /* can't go backwards */
@@ -1614,8 +1616,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1614 ; 1616 ;
1615 1617
1616 } else { 1618 } else {
1617 l = *pos - 1; 1619 /*
1618 p = s_next(m, p, &l); 1620 * If we overflowed the seq_file before, then we want
1621 * to just reuse the trace_seq buffer again.
1622 */
1623 if (iter->leftover)
1624 p = iter;
1625 else {
1626 l = *pos - 1;
1627 p = s_next(m, p, &l);
1628 }
1619 } 1629 }
1620 1630
1621 trace_event_read_lock(); 1631 trace_event_read_lock();
@@ -1923,6 +1933,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1923static int s_show(struct seq_file *m, void *v) 1933static int s_show(struct seq_file *m, void *v)
1924{ 1934{
1925 struct trace_iterator *iter = v; 1935 struct trace_iterator *iter = v;
1936 int ret;
1926 1937
1927 if (iter->ent == NULL) { 1938 if (iter->ent == NULL) {
1928 if (iter->tr) { 1939 if (iter->tr) {
@@ -1942,9 +1953,27 @@ static int s_show(struct seq_file *m, void *v)
1942 if (!(trace_flags & TRACE_ITER_VERBOSE)) 1953 if (!(trace_flags & TRACE_ITER_VERBOSE))
1943 print_func_help_header(m); 1954 print_func_help_header(m);
1944 } 1955 }
1956 } else if (iter->leftover) {
1957 /*
1958 * If we filled the seq_file buffer earlier, we
1959 * want to just show it now.
1960 */
1961 ret = trace_print_seq(m, &iter->seq);
1962
1963 /* ret should this time be zero, but you never know */
1964 iter->leftover = ret;
1965
1945 } else { 1966 } else {
1946 print_trace_line(iter); 1967 print_trace_line(iter);
1947 trace_print_seq(m, &iter->seq); 1968 ret = trace_print_seq(m, &iter->seq);
1969 /*
1970 * If we overflow the seq_file buffer, then it will
1971 * ask us for this data again at start up.
1972 * Use that instead.
1973 * ret is 0 if seq_file write succeeded.
1974 * -1 otherwise.
1975 */
1976 iter->leftover = ret;
1948 } 1977 }
1949 1978
1950 return 0; 1979 return 0;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..e5cf90fef34e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)