aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-05-07 05:03:52 -0400
committerIngo Molnar <mingo@kernel.org>2012-05-07 05:03:52 -0400
commit19631cb3d67c24c8b1fa58bc69bc2fed8d15095d (patch)
treea56d9e11f23e3433f9eaa5b3f9dec7bef378c37e
parent1fa2e84db3f95adab8d9c2aa245e9a0ebf32248a (diff)
parent59a094c994a138049b41a44bc29cff9407d51c5b (diff)
Merge branch 'tip/perf/core-4' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace into perf/core
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/kernel/ftrace.c511
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--include/linux/kernel.h13
-rw-r--r--include/linux/ring_buffer.h6
-rw-r--r--kernel/trace/ring_buffer.c248
-rw-r--r--kernel/trace/trace.c379
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_printk.c4
12 files changed, 810 insertions, 383 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d14cc6b79ad..1324139612e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,7 +40,6 @@ config X86
40 select HAVE_FUNCTION_GRAPH_TRACER 40 select HAVE_FUNCTION_GRAPH_TRACER
41 select HAVE_FUNCTION_GRAPH_FP_TEST 41 select HAVE_FUNCTION_GRAPH_FP_TEST
42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
43 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
44 select HAVE_SYSCALL_TRACEPOINTS 43 select HAVE_SYSCALL_TRACEPOINTS
45 select HAVE_KVM 44 select HAVE_KVM
46 select HAVE_ARCH_KGDB 45 select HAVE_ARCH_KGDB
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c0..18d9005d9e4f 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
34 34
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void mcount(void); 36extern void mcount(void);
37extern int modifying_ftrace_code;
37 38
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 39static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 40{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
50 /* No extra data needed for x86 */ 51 /* No extra data needed for x86 */
51}; 52};
52 53
54int ftrace_int3_handler(struct pt_regs *regs);
55
53#endif /* CONFIG_DYNAMIC_FTRACE */ 56#endif /* CONFIG_DYNAMIC_FTRACE */
54#endif /* __ASSEMBLY__ */ 57#endif /* __ASSEMBLY__ */
55#endif /* CONFIG_FUNCTION_TRACER */ 58#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272fd..4243e8bbdcb1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
24#include <trace/syscall.h> 24#include <trace/syscall.h>
25 25
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/kprobes.h>
27#include <asm/ftrace.h> 28#include <asm/ftrace.h>
28#include <asm/nops.h> 29#include <asm/nops.h>
29#include <asm/nmi.h>
30
31 30
32#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
33 32
34/*
35 * modifying_code is set to notify NMIs that they need to use
36 * memory barriers when entering or exiting. But we don't want
37 * to burden NMIs with unnecessary memory barriers when code
38 * modification is not being done (which is most of the time).
39 *
40 * A mutex is already held when ftrace_arch_code_modify_prepare
41 * and post_process are called. No locks need to be taken here.
42 *
43 * Stop machine will make sure currently running NMIs are done
44 * and new NMIs will see the updated variable before we need
45 * to worry about NMIs doing memory barriers.
46 */
47static int modifying_code __read_mostly;
48static DEFINE_PER_CPU(int, save_modifying_code);
49
50int ftrace_arch_code_modify_prepare(void) 33int ftrace_arch_code_modify_prepare(void)
51{ 34{
52 set_kernel_text_rw(); 35 set_kernel_text_rw();
53 set_all_modules_text_rw(); 36 set_all_modules_text_rw();
54 modifying_code = 1;
55 return 0; 37 return 0;
56} 38}
57 39
58int ftrace_arch_code_modify_post_process(void) 40int ftrace_arch_code_modify_post_process(void)
59{ 41{
60 modifying_code = 0;
61 set_all_modules_text_ro(); 42 set_all_modules_text_ro();
62 set_kernel_text_ro(); 43 set_kernel_text_ro();
63 return 0; 44 return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
90 return calc.code; 71 return calc.code;
91} 72}
92 73
93/*
94 * Modifying code must take extra care. On an SMP machine, if
95 * the code being modified is also being executed on another CPU
96 * that CPU will have undefined results and possibly take a GPF.
97 * We use kstop_machine to stop other CPUS from exectuing code.
98 * But this does not stop NMIs from happening. We still need
99 * to protect against that. We separate out the modification of
100 * the code to take care of this.
101 *
102 * Two buffers are added: An IP buffer and a "code" buffer.
103 *
104 * 1) Put the instruction pointer into the IP buffer
105 * and the new code into the "code" buffer.
106 * 2) Wait for any running NMIs to finish and set a flag that says
107 * we are modifying code, it is done in an atomic operation.
108 * 3) Write the code
109 * 4) clear the flag.
110 * 5) Wait for any running NMIs to finish.
111 *
112 * If an NMI is executed, the first thing it does is to call
113 * "ftrace_nmi_enter". This will check if the flag is set to write
114 * and if it is, it will write what is in the IP and "code" buffers.
115 *
116 * The trick is, it does not matter if everyone is writing the same
117 * content to the code location. Also, if a CPU is executing code
118 * it is OK to write to that code location if the contents being written
119 * are the same as what exists.
120 */
121
122#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123static atomic_t nmi_running = ATOMIC_INIT(0);
124static int mod_code_status; /* holds return value of text write */
125static void *mod_code_ip; /* holds the IP to write to */
126static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128static unsigned nmi_wait_count;
129static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131int ftrace_arch_read_dyn_info(char *buf, int size)
132{
133 int r;
134
135 r = snprintf(buf, size, "%u %u",
136 nmi_wait_count,
137 atomic_read(&nmi_update_count));
138 return r;
139}
140
141static void clear_mod_flag(void)
142{
143 int old = atomic_read(&nmi_running);
144
145 for (;;) {
146 int new = old & ~MOD_CODE_WRITE_FLAG;
147
148 if (old == new)
149 break;
150
151 old = atomic_cmpxchg(&nmi_running, old, new);
152 }
153}
154
155static void ftrace_mod_code(void)
156{
157 /*
158 * Yes, more than one CPU process can be writing to mod_code_status.
159 * (and the code itself)
160 * But if one were to fail, then they all should, and if one were
161 * to succeed, then they all should.
162 */
163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164 MCOUNT_INSN_SIZE);
165
166 /* if we fail, then kill any new writers */
167 if (mod_code_status)
168 clear_mod_flag();
169}
170
171void ftrace_nmi_enter(void)
172{
173 __this_cpu_write(save_modifying_code, modifying_code);
174
175 if (!__this_cpu_read(save_modifying_code))
176 return;
177
178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179 smp_rmb();
180 ftrace_mod_code();
181 atomic_inc(&nmi_update_count);
182 }
183 /* Must have previous changes seen before executions */
184 smp_mb();
185}
186
187void ftrace_nmi_exit(void)
188{
189 if (!__this_cpu_read(save_modifying_code))
190 return;
191
192 /* Finish all executions before clearing nmi_running */
193 smp_mb();
194 atomic_dec(&nmi_running);
195}
196
197static void wait_for_nmi_and_set_mod_flag(void)
198{
199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200 return;
201
202 do {
203 cpu_relax();
204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206 nmi_wait_count++;
207}
208
209static void wait_for_nmi(void)
210{
211 if (!atomic_read(&nmi_running))
212 return;
213
214 do {
215 cpu_relax();
216 } while (atomic_read(&nmi_running));
217
218 nmi_wait_count++;
219}
220
221static inline int 74static inline int
222within(unsigned long addr, unsigned long start, unsigned long end) 75within(unsigned long addr, unsigned long start, unsigned long end)
223{ 76{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa(ip));
240 93
241 mod_code_ip = (void *)ip; 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
242 mod_code_newcode = new_code;
243
244 /* The buffers need to be visible before we let NMIs write them */
245 smp_mb();
246
247 wait_for_nmi_and_set_mod_flag();
248
249 /* Make sure all running NMIs have finished before we write the code */
250 smp_mb();
251
252 ftrace_mod_code();
253
254 /* Make sure the write happens before clearing the bit */
255 smp_mb();
256
257 clear_mod_flag();
258 wait_for_nmi();
259
260 return mod_code_status;
261} 95}
262 96
263static const unsigned char *ftrace_nop_replace(void) 97static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,347 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
334 return ret; 168 return ret;
335} 169}
336 170
171int modifying_ftrace_code __read_mostly;
172
173/*
174 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it.
176 * We are either changing a nop into a trace call, or a trace
177 * call to a nop. While the change is taking place, we treat
178 * it just like it was a nop.
179 */
180int ftrace_int3_handler(struct pt_regs *regs)
181{
182 if (WARN_ON_ONCE(!regs))
183 return 0;
184
185 if (!ftrace_location(regs->ip - 1))
186 return 0;
187
188 regs->ip += MCOUNT_INSN_SIZE - 1;
189
190 return 1;
191}
192
193static int ftrace_write(unsigned long ip, const char *val, int size)
194{
195 /*
196 * On x86_64, kernel text mappings are mapped read-only with
197 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
198 * of the kernel text mapping to modify the kernel text.
199 *
200 * For 32bit kernels, these mappings are same and we can use
201 * kernel identity mapping to modify code.
202 */
203 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
204 ip = (unsigned long)__va(__pa(ip));
205
206 return probe_kernel_write((void *)ip, val, size);
207}
208
209static int add_break(unsigned long ip, const char *old)
210{
211 unsigned char replaced[MCOUNT_INSN_SIZE];
212 unsigned char brk = BREAKPOINT_INSTRUCTION;
213
214 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
215 return -EFAULT;
216
217 /* Make sure it is what we expect it to be */
218 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
219 return -EINVAL;
220
221 if (ftrace_write(ip, &brk, 1))
222 return -EPERM;
223
224 return 0;
225}
226
227static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
228{
229 unsigned const char *old;
230 unsigned long ip = rec->ip;
231
232 old = ftrace_call_replace(ip, addr);
233
234 return add_break(rec->ip, old);
235}
236
237
238static int add_brk_on_nop(struct dyn_ftrace *rec)
239{
240 unsigned const char *old;
241
242 old = ftrace_nop_replace();
243
244 return add_break(rec->ip, old);
245}
246
247static int add_breakpoints(struct dyn_ftrace *rec, int enable)
248{
249 unsigned long ftrace_addr;
250 int ret;
251
252 ret = ftrace_test_record(rec, enable);
253
254 ftrace_addr = (unsigned long)FTRACE_ADDR;
255
256 switch (ret) {
257 case FTRACE_UPDATE_IGNORE:
258 return 0;
259
260 case FTRACE_UPDATE_MAKE_CALL:
261 /* converting nop to call */
262 return add_brk_on_nop(rec);
263
264 case FTRACE_UPDATE_MAKE_NOP:
265 /* converting a call to a nop */
266 return add_brk_on_call(rec, ftrace_addr);
267 }
268 return 0;
269}
270
271/*
272 * On error, we need to remove breakpoints. This needs to
273 * be done caefully. If the address does not currently have a
274 * breakpoint, we know we are done. Otherwise, we look at the
275 * remaining 4 bytes of the instruction. If it matches a nop
276 * we replace the breakpoint with the nop. Otherwise we replace
277 * it with the call instruction.
278 */
279static int remove_breakpoint(struct dyn_ftrace *rec)
280{
281 unsigned char ins[MCOUNT_INSN_SIZE];
282 unsigned char brk = BREAKPOINT_INSTRUCTION;
283 const unsigned char *nop;
284 unsigned long ftrace_addr;
285 unsigned long ip = rec->ip;
286
287 /* If we fail the read, just give up */
288 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
289 return -EFAULT;
290
291 /* If this does not have a breakpoint, we are done */
292 if (ins[0] != brk)
293 return -1;
294
295 nop = ftrace_nop_replace();
296
297 /*
298 * If the last 4 bytes of the instruction do not match
299 * a nop, then we assume that this is a call to ftrace_addr.
300 */
301 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
302 /*
303 * For extra paranoidism, we check if the breakpoint is on
304 * a call that would actually jump to the ftrace_addr.
305 * If not, don't touch the breakpoint, we make just create
306 * a disaster.
307 */
308 ftrace_addr = (unsigned long)FTRACE_ADDR;
309 nop = ftrace_call_replace(ip, ftrace_addr);
310
311 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
312 return -EINVAL;
313 }
314
315 return probe_kernel_write((void *)ip, &nop[0], 1);
316}
317
318static int add_update_code(unsigned long ip, unsigned const char *new)
319{
320 /* skip breakpoint */
321 ip++;
322 new++;
323 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
324 return -EPERM;
325 return 0;
326}
327
328static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
329{
330 unsigned long ip = rec->ip;
331 unsigned const char *new;
332
333 new = ftrace_call_replace(ip, addr);
334 return add_update_code(ip, new);
335}
336
337static int add_update_nop(struct dyn_ftrace *rec)
338{
339 unsigned long ip = rec->ip;
340 unsigned const char *new;
341
342 new = ftrace_nop_replace();
343 return add_update_code(ip, new);
344}
345
346static int add_update(struct dyn_ftrace *rec, int enable)
347{
348 unsigned long ftrace_addr;
349 int ret;
350
351 ret = ftrace_test_record(rec, enable);
352
353 ftrace_addr = (unsigned long)FTRACE_ADDR;
354
355 switch (ret) {
356 case FTRACE_UPDATE_IGNORE:
357 return 0;
358
359 case FTRACE_UPDATE_MAKE_CALL:
360 /* converting nop to call */
361 return add_update_call(rec, ftrace_addr);
362
363 case FTRACE_UPDATE_MAKE_NOP:
364 /* converting a call to a nop */
365 return add_update_nop(rec);
366 }
367
368 return 0;
369}
370
371static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
372{
373 unsigned long ip = rec->ip;
374 unsigned const char *new;
375
376 new = ftrace_call_replace(ip, addr);
377
378 if (ftrace_write(ip, new, 1))
379 return -EPERM;
380
381 return 0;
382}
383
384static int finish_update_nop(struct dyn_ftrace *rec)
385{
386 unsigned long ip = rec->ip;
387 unsigned const char *new;
388
389 new = ftrace_nop_replace();
390
391 if (ftrace_write(ip, new, 1))
392 return -EPERM;
393 return 0;
394}
395
396static int finish_update(struct dyn_ftrace *rec, int enable)
397{
398 unsigned long ftrace_addr;
399 int ret;
400
401 ret = ftrace_update_record(rec, enable);
402
403 ftrace_addr = (unsigned long)FTRACE_ADDR;
404
405 switch (ret) {
406 case FTRACE_UPDATE_IGNORE:
407 return 0;
408
409 case FTRACE_UPDATE_MAKE_CALL:
410 /* converting nop to call */
411 return finish_update_call(rec, ftrace_addr);
412
413 case FTRACE_UPDATE_MAKE_NOP:
414 /* converting a call to a nop */
415 return finish_update_nop(rec);
416 }
417
418 return 0;
419}
420
421static void do_sync_core(void *data)
422{
423 sync_core();
424}
425
426static void run_sync(void)
427{
428 int enable_irqs = irqs_disabled();
429
430 /* We may be called with interrupts disbled (on bootup). */
431 if (enable_irqs)
432 local_irq_enable();
433 on_each_cpu(do_sync_core, NULL, 1);
434 if (enable_irqs)
435 local_irq_disable();
436}
437
438static void ftrace_replace_code(int enable)
439{
440 struct ftrace_rec_iter *iter;
441 struct dyn_ftrace *rec;
442 const char *report = "adding breakpoints";
443 int count = 0;
444 int ret;
445
446 for_ftrace_rec_iter(iter) {
447 rec = ftrace_rec_iter_record(iter);
448
449 ret = add_breakpoints(rec, enable);
450 if (ret)
451 goto remove_breakpoints;
452 count++;
453 }
454
455 run_sync();
456
457 report = "updating code";
458
459 for_ftrace_rec_iter(iter) {
460 rec = ftrace_rec_iter_record(iter);
461
462 ret = add_update(rec, enable);
463 if (ret)
464 goto remove_breakpoints;
465 }
466
467 run_sync();
468
469 report = "removing breakpoints";
470
471 for_ftrace_rec_iter(iter) {
472 rec = ftrace_rec_iter_record(iter);
473
474 ret = finish_update(rec, enable);
475 if (ret)
476 goto remove_breakpoints;
477 }
478
479 run_sync();
480
481 return;
482
483 remove_breakpoints:
484 ftrace_bug(ret, rec ? rec->ip : 0);
485 printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
486 for_ftrace_rec_iter(iter) {
487 rec = ftrace_rec_iter_record(iter);
488 remove_breakpoint(rec);
489 }
490}
491
492void arch_ftrace_update_code(int command)
493{
494 modifying_ftrace_code++;
495
496 if (command & FTRACE_UPDATE_CALLS)
497 ftrace_replace_code(1);
498 else if (command & FTRACE_DISABLE_CALLS)
499 ftrace_replace_code(0);
500
501 if (command & FTRACE_UPDATE_TRACE_FUNC)
502 ftrace_update_ftrace_func(ftrace_trace_function);
503
504 if (command & FTRACE_START_FUNC_RET)
505 ftrace_enable_ftrace_graph_caller();
506 else if (command & FTRACE_STOP_FUNC_RET)
507 ftrace_disable_ftrace_graph_caller();
508
509 modifying_ftrace_code--;
510}
511
337int __init ftrace_dyn_arch_init(void *data) 512int __init ftrace_dyn_arch_init(void *data)
338{ 513{
339 /* The return code is retured via data */ 514 /* The return code is retured via data */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf319165..eb1539eac393 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84 84
85#define nmi_to_desc(type) (&nmi_desc[type]) 85#define nmi_to_desc(type) (&nmi_desc[type])
86 86
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 87static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{ 88{
89 struct nmi_desc *desc = nmi_to_desc(type); 89 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a; 90 struct nmiaction *a;
@@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
209 209
210EXPORT_SYMBOL_GPL(unregister_nmi_handler); 210EXPORT_SYMBOL_GPL(unregister_nmi_handler);
211 211
212static notrace __kprobes void 212static __kprobes void
213pci_serr_error(unsigned char reason, struct pt_regs *regs) 213pci_serr_error(unsigned char reason, struct pt_regs *regs)
214{ 214{
215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
@@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
236 outb(reason, NMI_REASON_PORT); 236 outb(reason, NMI_REASON_PORT);
237} 237}
238 238
239static notrace __kprobes void 239static __kprobes void
240io_check_error(unsigned char reason, struct pt_regs *regs) 240io_check_error(unsigned char reason, struct pt_regs *regs)
241{ 241{
242 unsigned long i; 242 unsigned long i;
@@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
263 outb(reason, NMI_REASON_PORT); 263 outb(reason, NMI_REASON_PORT);
264} 264}
265 265
266static notrace __kprobes void 266static __kprobes void
267unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 267unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
268{ 268{
269 int handled; 269 int handled;
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
305static DEFINE_PER_CPU(bool, swallow_nmi); 305static DEFINE_PER_CPU(bool, swallow_nmi);
306static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 306static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
307 307
308static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 308static __kprobes void default_do_nmi(struct pt_regs *regs)
309{ 309{
310 unsigned char reason = 0; 310 unsigned char reason = 0;
311 int handled; 311 int handled;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f16029..92d5756d85fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/ftrace.h>
53#include <asm/traps.h> 54#include <asm/traps.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
55#include <asm/i387.h> 56#include <asm/i387.h>
@@ -303,8 +304,13 @@ gp_in_kernel:
303} 304}
304 305
305/* May run on IST stack. */ 306/* May run on IST stack. */
306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 307dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
307{ 308{
309#ifdef CONFIG_DYNAMIC_FTRACE
310 /* ftrace must be first, everything else may cause a recursive crash */
311 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
312 return;
313#endif
308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 314#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 315 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
310 SIGTRAP) == NOTIFY_STOP) 316 SIGTRAP) == NOTIFY_STOP)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 72a6cabb4d5b..0b5590330bca 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -286,6 +286,12 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void);
286struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter); 286struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
287struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); 287struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
288 288
289#define for_ftrace_rec_iter(iter) \
290 for (iter = ftrace_rec_iter_start(); \
291 iter; \
292 iter = ftrace_rec_iter_next(iter))
293
294
289int ftrace_update_record(struct dyn_ftrace *rec, int enable); 295int ftrace_update_record(struct dyn_ftrace *rec, int enable);
290int ftrace_test_record(struct dyn_ftrace *rec, int enable); 296int ftrace_test_record(struct dyn_ftrace *rec, int enable);
291void ftrace_run_stop_machine(int command); 297void ftrace_run_stop_machine(int command);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 645231c373c8..c0d34420a913 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -480,15 +480,16 @@ do { \
480 480
481#define trace_printk(fmt, args...) \ 481#define trace_printk(fmt, args...) \
482do { \ 482do { \
483 static const char *trace_printk_fmt \
484 __attribute__((section("__trace_printk_fmt"))) = \
485 __builtin_constant_p(fmt) ? fmt : NULL; \
486 \
483 __trace_printk_check_format(fmt, ##args); \ 487 __trace_printk_check_format(fmt, ##args); \
484 if (__builtin_constant_p(fmt)) { \
485 static const char *trace_printk_fmt \
486 __attribute__((section("__trace_printk_fmt"))) = \
487 __builtin_constant_p(fmt) ? fmt : NULL; \
488 \ 488 \
489 if (__builtin_constant_p(fmt)) \
489 __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ 490 __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \
490 } else \ 491 else \
491 __trace_printk(_THIS_IP_, fmt, ##args); \ 492 __trace_printk(_THIS_IP_, fmt, ##args); \
492} while (0) 493} while (0)
493 494
494extern __printf(2, 3) 495extern __printf(2, 3)
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 7be2e88f23fd..6c8835f74f79 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -96,9 +96,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
96 __ring_buffer_alloc((size), (flags), &__key); \ 96 __ring_buffer_alloc((size), (flags), &__key); \
97}) 97})
98 98
99#define RING_BUFFER_ALL_CPUS -1
100
99void ring_buffer_free(struct ring_buffer *buffer); 101void ring_buffer_free(struct ring_buffer *buffer);
100 102
101int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 103int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu);
102 104
103void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val); 105void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val);
104 106
@@ -129,7 +131,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
129void ring_buffer_iter_reset(struct ring_buffer_iter *iter); 131void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
130int ring_buffer_iter_empty(struct ring_buffer_iter *iter); 132int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
131 133
132unsigned long ring_buffer_size(struct ring_buffer *buffer); 134unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu);
133 135
134void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); 136void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
135void ring_buffer_reset(struct ring_buffer *buffer); 137void ring_buffer_reset(struct ring_buffer *buffer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e91efd..2d5eb3320827 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -449,6 +449,7 @@ struct ring_buffer_per_cpu {
449 raw_spinlock_t reader_lock; /* serialize readers */ 449 raw_spinlock_t reader_lock; /* serialize readers */
450 arch_spinlock_t lock; 450 arch_spinlock_t lock;
451 struct lock_class_key lock_key; 451 struct lock_class_key lock_key;
452 unsigned int nr_pages;
452 struct list_head *pages; 453 struct list_head *pages;
453 struct buffer_page *head_page; /* read from head */ 454 struct buffer_page *head_page; /* read from head */
454 struct buffer_page *tail_page; /* write to tail */ 455 struct buffer_page *tail_page; /* write to tail */
@@ -466,10 +467,12 @@ struct ring_buffer_per_cpu {
466 unsigned long read_bytes; 467 unsigned long read_bytes;
467 u64 write_stamp; 468 u64 write_stamp;
468 u64 read_stamp; 469 u64 read_stamp;
470 /* ring buffer pages to update, > 0 to add, < 0 to remove */
471 int nr_pages_to_update;
472 struct list_head new_pages; /* new pages to add */
469}; 473};
470 474
471struct ring_buffer { 475struct ring_buffer {
472 unsigned pages;
473 unsigned flags; 476 unsigned flags;
474 int cpus; 477 int cpus;
475 atomic_t record_disabled; 478 atomic_t record_disabled;
@@ -963,14 +966,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
963 return 0; 966 return 0;
964} 967}
965 968
966static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 969static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
967 unsigned nr_pages)
968{ 970{
971 int i;
969 struct buffer_page *bpage, *tmp; 972 struct buffer_page *bpage, *tmp;
970 LIST_HEAD(pages);
971 unsigned i;
972
973 WARN_ON(!nr_pages);
974 973
975 for (i = 0; i < nr_pages; i++) { 974 for (i = 0; i < nr_pages; i++) {
976 struct page *page; 975 struct page *page;
@@ -981,15 +980,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
981 */ 980 */
982 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 981 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
983 GFP_KERNEL | __GFP_NORETRY, 982 GFP_KERNEL | __GFP_NORETRY,
984 cpu_to_node(cpu_buffer->cpu)); 983 cpu_to_node(cpu));
985 if (!bpage) 984 if (!bpage)
986 goto free_pages; 985 goto free_pages;
987 986
988 rb_check_bpage(cpu_buffer, bpage); 987 list_add(&bpage->list, pages);
989 988
990 list_add(&bpage->list, &pages); 989 page = alloc_pages_node(cpu_to_node(cpu),
991
992 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
993 GFP_KERNEL | __GFP_NORETRY, 0); 990 GFP_KERNEL | __GFP_NORETRY, 0);
994 if (!page) 991 if (!page)
995 goto free_pages; 992 goto free_pages;
@@ -997,6 +994,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 rb_init_page(bpage->page); 994 rb_init_page(bpage->page);
998 } 995 }
999 996
997 return 0;
998
999free_pages:
1000 list_for_each_entry_safe(bpage, tmp, pages, list) {
1001 list_del_init(&bpage->list);
1002 free_buffer_page(bpage);
1003 }
1004
1005 return -ENOMEM;
1006}
1007
1008static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1009 unsigned nr_pages)
1010{
1011 LIST_HEAD(pages);
1012
1013 WARN_ON(!nr_pages);
1014
1015 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1016 return -ENOMEM;
1017
1000 /* 1018 /*
1001 * The ring buffer page list is a circular list that does not 1019 * The ring buffer page list is a circular list that does not
1002 * start and end with a list head. All page list items point to 1020 * start and end with a list head. All page list items point to
@@ -1005,20 +1023,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1005 cpu_buffer->pages = pages.next; 1023 cpu_buffer->pages = pages.next;
1006 list_del(&pages); 1024 list_del(&pages);
1007 1025
1026 cpu_buffer->nr_pages = nr_pages;
1027
1008 rb_check_pages(cpu_buffer); 1028 rb_check_pages(cpu_buffer);
1009 1029
1010 return 0; 1030 return 0;
1011
1012 free_pages:
1013 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1014 list_del_init(&bpage->list);
1015 free_buffer_page(bpage);
1016 }
1017 return -ENOMEM;
1018} 1031}
1019 1032
1020static struct ring_buffer_per_cpu * 1033static struct ring_buffer_per_cpu *
1021rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 1034rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1022{ 1035{
1023 struct ring_buffer_per_cpu *cpu_buffer; 1036 struct ring_buffer_per_cpu *cpu_buffer;
1024 struct buffer_page *bpage; 1037 struct buffer_page *bpage;
@@ -1052,7 +1065,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1052 1065
1053 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1066 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1054 1067
1055 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 1068 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1056 if (ret < 0) 1069 if (ret < 0)
1057 goto fail_free_reader; 1070 goto fail_free_reader;
1058 1071
@@ -1113,7 +1126,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1113{ 1126{
1114 struct ring_buffer *buffer; 1127 struct ring_buffer *buffer;
1115 int bsize; 1128 int bsize;
1116 int cpu; 1129 int cpu, nr_pages;
1117 1130
1118 /* keep it in its own cache line */ 1131 /* keep it in its own cache line */
1119 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 1132 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1137,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1124 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 1137 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1125 goto fail_free_buffer; 1138 goto fail_free_buffer;
1126 1139
1127 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1140 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1128 buffer->flags = flags; 1141 buffer->flags = flags;
1129 buffer->clock = trace_clock_local; 1142 buffer->clock = trace_clock_local;
1130 buffer->reader_lock_key = key; 1143 buffer->reader_lock_key = key;
1131 1144
1132 /* need at least two pages */ 1145 /* need at least two pages */
1133 if (buffer->pages < 2) 1146 if (nr_pages < 2)
1134 buffer->pages = 2; 1147 nr_pages = 2;
1135 1148
1136 /* 1149 /*
1137 * In case of non-hotplug cpu, if the ring-buffer is allocated 1150 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1167,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1154 1167
1155 for_each_buffer_cpu(buffer, cpu) { 1168 for_each_buffer_cpu(buffer, cpu) {
1156 buffer->buffers[cpu] = 1169 buffer->buffers[cpu] =
1157 rb_allocate_cpu_buffer(buffer, cpu); 1170 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1158 if (!buffer->buffers[cpu]) 1171 if (!buffer->buffers[cpu])
1159 goto fail_free_buffers; 1172 goto fail_free_buffers;
1160 } 1173 }
@@ -1276,6 +1289,18 @@ out:
1276 raw_spin_unlock_irq(&cpu_buffer->reader_lock); 1289 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1277} 1290}
1278 1291
1292static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
1293{
1294 if (cpu_buffer->nr_pages_to_update > 0)
1295 rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages,
1296 cpu_buffer->nr_pages_to_update);
1297 else
1298 rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update);
1299 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1300 /* reset this value */
1301 cpu_buffer->nr_pages_to_update = 0;
1302}
1303
1279/** 1304/**
1280 * ring_buffer_resize - resize the ring buffer 1305 * ring_buffer_resize - resize the ring buffer
1281 * @buffer: the buffer to resize. 1306 * @buffer: the buffer to resize.
@@ -1285,14 +1310,12 @@ out:
1285 * 1310 *
1286 * Returns -1 on failure. 1311 * Returns -1 on failure.
1287 */ 1312 */
1288int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) 1313int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1314 int cpu_id)
1289{ 1315{
1290 struct ring_buffer_per_cpu *cpu_buffer; 1316 struct ring_buffer_per_cpu *cpu_buffer;
1291 unsigned nr_pages, rm_pages, new_pages; 1317 unsigned nr_pages;
1292 struct buffer_page *bpage, *tmp; 1318 int cpu;
1293 unsigned long buffer_size;
1294 LIST_HEAD(pages);
1295 int i, cpu;
1296 1319
1297 /* 1320 /*
1298 * Always succeed at resizing a non-existent buffer: 1321 * Always succeed at resizing a non-existent buffer:
@@ -1302,15 +1325,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1302 1325
1303 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1326 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1304 size *= BUF_PAGE_SIZE; 1327 size *= BUF_PAGE_SIZE;
1305 buffer_size = buffer->pages * BUF_PAGE_SIZE;
1306 1328
1307 /* we need a minimum of two pages */ 1329 /* we need a minimum of two pages */
1308 if (size < BUF_PAGE_SIZE * 2) 1330 if (size < BUF_PAGE_SIZE * 2)
1309 size = BUF_PAGE_SIZE * 2; 1331 size = BUF_PAGE_SIZE * 2;
1310 1332
1311 if (size == buffer_size)
1312 return size;
1313
1314 atomic_inc(&buffer->record_disabled); 1333 atomic_inc(&buffer->record_disabled);
1315 1334
1316 /* Make sure all writers are done with this buffer. */ 1335 /* Make sure all writers are done with this buffer. */
@@ -1321,68 +1340,56 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1321 1340
1322 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1341 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1323 1342
1324 if (size < buffer_size) { 1343 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1325 1344 /* calculate the pages to update */
1326 /* easy case, just free pages */
1327 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
1328 goto out_fail;
1329
1330 rm_pages = buffer->pages - nr_pages;
1331
1332 for_each_buffer_cpu(buffer, cpu) { 1345 for_each_buffer_cpu(buffer, cpu) {
1333 cpu_buffer = buffer->buffers[cpu]; 1346 cpu_buffer = buffer->buffers[cpu];
1334 rb_remove_pages(cpu_buffer, rm_pages);
1335 }
1336 goto out;
1337 }
1338 1347
1339 /* 1348 cpu_buffer->nr_pages_to_update = nr_pages -
1340 * This is a bit more difficult. We only want to add pages 1349 cpu_buffer->nr_pages;
1341 * when we can allocate enough for all CPUs. We do this
1342 * by allocating all the pages and storing them on a local
1343 * link list. If we succeed in our allocation, then we
1344 * add these pages to the cpu_buffers. Otherwise we just free
1345 * them all and return -ENOMEM;
1346 */
1347 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
1348 goto out_fail;
1349 1350
1350 new_pages = nr_pages - buffer->pages; 1351 /*
1352 * nothing more to do for removing pages or no update
1353 */
1354 if (cpu_buffer->nr_pages_to_update <= 0)
1355 continue;
1351 1356
1352 for_each_buffer_cpu(buffer, cpu) {
1353 for (i = 0; i < new_pages; i++) {
1354 struct page *page;
1355 /* 1357 /*
1356 * __GFP_NORETRY flag makes sure that the allocation 1358 * to add pages, make sure all new pages can be
1357 * fails gracefully without invoking oom-killer and 1359 * allocated without receiving ENOMEM
1358 * the system is not destabilized.
1359 */ 1360 */
1360 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1361 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1361 cache_line_size()), 1362 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1362 GFP_KERNEL | __GFP_NORETRY, 1363 &cpu_buffer->new_pages, cpu))
1363 cpu_to_node(cpu)); 1364 /* not enough memory for new pages */
1364 if (!bpage) 1365 goto no_mem;
1365 goto free_pages;
1366 list_add(&bpage->list, &pages);
1367 page = alloc_pages_node(cpu_to_node(cpu),
1368 GFP_KERNEL | __GFP_NORETRY, 0);
1369 if (!page)
1370 goto free_pages;
1371 bpage->page = page_address(page);
1372 rb_init_page(bpage->page);
1373 } 1366 }
1374 }
1375 1367
1376 for_each_buffer_cpu(buffer, cpu) { 1368 /* wait for all the updates to complete */
1377 cpu_buffer = buffer->buffers[cpu]; 1369 for_each_buffer_cpu(buffer, cpu) {
1378 rb_insert_pages(cpu_buffer, &pages, new_pages); 1370 cpu_buffer = buffer->buffers[cpu];
1379 } 1371 if (cpu_buffer->nr_pages_to_update) {
1372 update_pages_handler(cpu_buffer);
1373 }
1374 }
1375 } else {
1376 cpu_buffer = buffer->buffers[cpu_id];
1377 if (nr_pages == cpu_buffer->nr_pages)
1378 goto out;
1380 1379
1381 if (RB_WARN_ON(buffer, !list_empty(&pages))) 1380 cpu_buffer->nr_pages_to_update = nr_pages -
1382 goto out_fail; 1381 cpu_buffer->nr_pages;
1382
1383 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1384 if (cpu_buffer->nr_pages_to_update > 0 &&
1385 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1386 &cpu_buffer->new_pages, cpu_id))
1387 goto no_mem;
1388
1389 update_pages_handler(cpu_buffer);
1390 }
1383 1391
1384 out: 1392 out:
1385 buffer->pages = nr_pages;
1386 put_online_cpus(); 1393 put_online_cpus();
1387 mutex_unlock(&buffer->mutex); 1394 mutex_unlock(&buffer->mutex);
1388 1395
@@ -1390,25 +1397,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1390 1397
1391 return size; 1398 return size;
1392 1399
1393 free_pages: 1400 no_mem:
1394 list_for_each_entry_safe(bpage, tmp, &pages, list) { 1401 for_each_buffer_cpu(buffer, cpu) {
1395 list_del_init(&bpage->list); 1402 struct buffer_page *bpage, *tmp;
1396 free_buffer_page(bpage); 1403 cpu_buffer = buffer->buffers[cpu];
1404 /* reset this number regardless */
1405 cpu_buffer->nr_pages_to_update = 0;
1406 if (list_empty(&cpu_buffer->new_pages))
1407 continue;
1408 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1409 list) {
1410 list_del_init(&bpage->list);
1411 free_buffer_page(bpage);
1412 }
1397 } 1413 }
1398 put_online_cpus(); 1414 put_online_cpus();
1399 mutex_unlock(&buffer->mutex); 1415 mutex_unlock(&buffer->mutex);
1400 atomic_dec(&buffer->record_disabled); 1416 atomic_dec(&buffer->record_disabled);
1401 return -ENOMEM; 1417 return -ENOMEM;
1402
1403 /*
1404 * Something went totally wrong, and we are too paranoid
1405 * to even clean up the mess.
1406 */
1407 out_fail:
1408 put_online_cpus();
1409 mutex_unlock(&buffer->mutex);
1410 atomic_dec(&buffer->record_disabled);
1411 return -1;
1412} 1418}
1413EXPORT_SYMBOL_GPL(ring_buffer_resize); 1419EXPORT_SYMBOL_GPL(ring_buffer_resize);
1414 1420
@@ -1510,7 +1516,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1510 * assign the commit to the tail. 1516 * assign the commit to the tail.
1511 */ 1517 */
1512 again: 1518 again:
1513 max_count = cpu_buffer->buffer->pages * 100; 1519 max_count = cpu_buffer->nr_pages * 100;
1514 1520
1515 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1521 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1516 if (RB_WARN_ON(cpu_buffer, !(--max_count))) 1522 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3588,9 +3594,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
3588 * ring_buffer_size - return the size of the ring buffer (in bytes) 3594 * ring_buffer_size - return the size of the ring buffer (in bytes)
3589 * @buffer: The ring buffer. 3595 * @buffer: The ring buffer.
3590 */ 3596 */
3591unsigned long ring_buffer_size(struct ring_buffer *buffer) 3597unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
3592{ 3598{
3593 return BUF_PAGE_SIZE * buffer->pages; 3599 /*
3600 * Earlier, this method returned
3601 * BUF_PAGE_SIZE * buffer->nr_pages
3602 * Since the nr_pages field is now removed, we have converted this to
3603 * return the per cpu buffer value.
3604 */
3605 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3606 return 0;
3607
3608 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3594} 3609}
3595EXPORT_SYMBOL_GPL(ring_buffer_size); 3610EXPORT_SYMBOL_GPL(ring_buffer_size);
3596 3611
@@ -3765,8 +3780,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3765 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 3780 !cpumask_test_cpu(cpu, buffer_b->cpumask))
3766 goto out; 3781 goto out;
3767 3782
3783 cpu_buffer_a = buffer_a->buffers[cpu];
3784 cpu_buffer_b = buffer_b->buffers[cpu];
3785
3768 /* At least make sure the two buffers are somewhat the same */ 3786 /* At least make sure the two buffers are somewhat the same */
3769 if (buffer_a->pages != buffer_b->pages) 3787 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
3770 goto out; 3788 goto out;
3771 3789
3772 ret = -EAGAIN; 3790 ret = -EAGAIN;
@@ -3780,9 +3798,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3780 if (atomic_read(&buffer_b->record_disabled)) 3798 if (atomic_read(&buffer_b->record_disabled))
3781 goto out; 3799 goto out;
3782 3800
3783 cpu_buffer_a = buffer_a->buffers[cpu];
3784 cpu_buffer_b = buffer_b->buffers[cpu];
3785
3786 if (atomic_read(&cpu_buffer_a->record_disabled)) 3801 if (atomic_read(&cpu_buffer_a->record_disabled))
3787 goto out; 3802 goto out;
3788 3803
@@ -4071,6 +4086,8 @@ static int rb_cpu_notify(struct notifier_block *self,
4071 struct ring_buffer *buffer = 4086 struct ring_buffer *buffer =
4072 container_of(self, struct ring_buffer, cpu_notify); 4087 container_of(self, struct ring_buffer, cpu_notify);
4073 long cpu = (long)hcpu; 4088 long cpu = (long)hcpu;
4089 int cpu_i, nr_pages_same;
4090 unsigned int nr_pages;
4074 4091
4075 switch (action) { 4092 switch (action) {
4076 case CPU_UP_PREPARE: 4093 case CPU_UP_PREPARE:
@@ -4078,8 +4095,23 @@ static int rb_cpu_notify(struct notifier_block *self,
4078 if (cpumask_test_cpu(cpu, buffer->cpumask)) 4095 if (cpumask_test_cpu(cpu, buffer->cpumask))
4079 return NOTIFY_OK; 4096 return NOTIFY_OK;
4080 4097
4098 nr_pages = 0;
4099 nr_pages_same = 1;
4100 /* check if all cpu sizes are same */
4101 for_each_buffer_cpu(buffer, cpu_i) {
4102 /* fill in the size from first enabled cpu */
4103 if (nr_pages == 0)
4104 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4105 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4106 nr_pages_same = 0;
4107 break;
4108 }
4109 }
4110 /* allocate minimum pages, user can later expand it */
4111 if (!nr_pages_same)
4112 nr_pages = 2;
4081 buffer->buffers[cpu] = 4113 buffer->buffers[cpu] =
4082 rb_allocate_cpu_buffer(buffer, cpu); 4114 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4083 if (!buffer->buffers[cpu]) { 4115 if (!buffer->buffers[cpu]) {
4084 WARN(1, "failed to allocate ring buffer on CPU %ld\n", 4116 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4085 cpu); 4117 cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed7b5d1e12f4..f11a285ee5bb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -629,7 +629,6 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
629static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 629static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
630{ 630{
631 int len; 631 int len;
632 void *ret;
633 632
634 if (s->len <= s->readpos) 633 if (s->len <= s->readpos)
635 return -EBUSY; 634 return -EBUSY;
@@ -637,9 +636,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
637 len = s->len - s->readpos; 636 len = s->len - s->readpos;
638 if (cnt > len) 637 if (cnt > len)
639 cnt = len; 638 cnt = len;
640 ret = memcpy(buf, s->buffer + s->readpos, cnt); 639 memcpy(buf, s->buffer + s->readpos, cnt);
641 if (!ret)
642 return -EFAULT;
643 640
644 s->readpos += cnt; 641 s->readpos += cnt;
645 return cnt; 642 return cnt;
@@ -841,7 +838,8 @@ __acquires(kernel_lock)
841 838
842 /* If we expanded the buffers, make sure the max is expanded too */ 839 /* If we expanded the buffers, make sure the max is expanded too */
843 if (ring_buffer_expanded && type->use_max_tr) 840 if (ring_buffer_expanded && type->use_max_tr)
844 ring_buffer_resize(max_tr.buffer, trace_buf_size); 841 ring_buffer_resize(max_tr.buffer, trace_buf_size,
842 RING_BUFFER_ALL_CPUS);
845 843
846 /* the test is responsible for initializing and enabling */ 844 /* the test is responsible for initializing and enabling */
847 pr_info("Testing tracer %s: ", type->name); 845 pr_info("Testing tracer %s: ", type->name);
@@ -857,7 +855,8 @@ __acquires(kernel_lock)
857 855
858 /* Shrink the max buffer again */ 856 /* Shrink the max buffer again */
859 if (ring_buffer_expanded && type->use_max_tr) 857 if (ring_buffer_expanded && type->use_max_tr)
860 ring_buffer_resize(max_tr.buffer, 1); 858 ring_buffer_resize(max_tr.buffer, 1,
859 RING_BUFFER_ALL_CPUS);
861 860
862 printk(KERN_CONT "PASSED\n"); 861 printk(KERN_CONT "PASSED\n");
863 } 862 }
@@ -1498,25 +1497,119 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1498 1497
1499#endif /* CONFIG_STACKTRACE */ 1498#endif /* CONFIG_STACKTRACE */
1500 1499
1500/* created for use with alloc_percpu */
1501struct trace_buffer_struct {
1502 char buffer[TRACE_BUF_SIZE];
1503};
1504
1505static struct trace_buffer_struct *trace_percpu_buffer;
1506static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1507static struct trace_buffer_struct *trace_percpu_irq_buffer;
1508static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1509
1510/*
1511 * The buffer used is dependent on the context. There is a per cpu
1512 * buffer for normal context, softirq contex, hard irq context and
1513 * for NMI context. Thise allows for lockless recording.
1514 *
1515 * Note, if the buffers failed to be allocated, then this returns NULL
1516 */
1517static char *get_trace_buf(void)
1518{
1519 struct trace_buffer_struct *percpu_buffer;
1520 struct trace_buffer_struct *buffer;
1521
1522 /*
1523 * If we have allocated per cpu buffers, then we do not
1524 * need to do any locking.
1525 */
1526 if (in_nmi())
1527 percpu_buffer = trace_percpu_nmi_buffer;
1528 else if (in_irq())
1529 percpu_buffer = trace_percpu_irq_buffer;
1530 else if (in_softirq())
1531 percpu_buffer = trace_percpu_sirq_buffer;
1532 else
1533 percpu_buffer = trace_percpu_buffer;
1534
1535 if (!percpu_buffer)
1536 return NULL;
1537
1538 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
1539
1540 return buffer->buffer;
1541}
1542
1543static int alloc_percpu_trace_buffer(void)
1544{
1545 struct trace_buffer_struct *buffers;
1546 struct trace_buffer_struct *sirq_buffers;
1547 struct trace_buffer_struct *irq_buffers;
1548 struct trace_buffer_struct *nmi_buffers;
1549
1550 buffers = alloc_percpu(struct trace_buffer_struct);
1551 if (!buffers)
1552 goto err_warn;
1553
1554 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1555 if (!sirq_buffers)
1556 goto err_sirq;
1557
1558 irq_buffers = alloc_percpu(struct trace_buffer_struct);
1559 if (!irq_buffers)
1560 goto err_irq;
1561
1562 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1563 if (!nmi_buffers)
1564 goto err_nmi;
1565
1566 trace_percpu_buffer = buffers;
1567 trace_percpu_sirq_buffer = sirq_buffers;
1568 trace_percpu_irq_buffer = irq_buffers;
1569 trace_percpu_nmi_buffer = nmi_buffers;
1570
1571 return 0;
1572
1573 err_nmi:
1574 free_percpu(irq_buffers);
1575 err_irq:
1576 free_percpu(sirq_buffers);
1577 err_sirq:
1578 free_percpu(buffers);
1579 err_warn:
1580 WARN(1, "Could not allocate percpu trace_printk buffer");
1581 return -ENOMEM;
1582}
1583
1584void trace_printk_init_buffers(void)
1585{
1586 static int buffers_allocated;
1587
1588 if (buffers_allocated)
1589 return;
1590
1591 if (alloc_percpu_trace_buffer())
1592 return;
1593
1594 pr_info("ftrace: Allocated trace_printk buffers\n");
1595
1596 buffers_allocated = 1;
1597}
1598
1501/** 1599/**
1502 * trace_vbprintk - write binary msg to tracing buffer 1600 * trace_vbprintk - write binary msg to tracing buffer
1503 * 1601 *
1504 */ 1602 */
1505int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1603int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1506{ 1604{
1507 static arch_spinlock_t trace_buf_lock =
1508 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1509 static u32 trace_buf[TRACE_BUF_SIZE];
1510
1511 struct ftrace_event_call *call = &event_bprint; 1605 struct ftrace_event_call *call = &event_bprint;
1512 struct ring_buffer_event *event; 1606 struct ring_buffer_event *event;
1513 struct ring_buffer *buffer; 1607 struct ring_buffer *buffer;
1514 struct trace_array *tr = &global_trace; 1608 struct trace_array *tr = &global_trace;
1515 struct trace_array_cpu *data;
1516 struct bprint_entry *entry; 1609 struct bprint_entry *entry;
1517 unsigned long flags; 1610 unsigned long flags;
1518 int disable; 1611 char *tbuffer;
1519 int cpu, len = 0, size, pc; 1612 int len = 0, size, pc;
1520 1613
1521 if (unlikely(tracing_selftest_running || tracing_disabled)) 1614 if (unlikely(tracing_selftest_running || tracing_disabled))
1522 return 0; 1615 return 0;
@@ -1526,43 +1619,36 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1526 1619
1527 pc = preempt_count(); 1620 pc = preempt_count();
1528 preempt_disable_notrace(); 1621 preempt_disable_notrace();
1529 cpu = raw_smp_processor_id();
1530 data = tr->data[cpu];
1531 1622
1532 disable = atomic_inc_return(&data->disabled); 1623 tbuffer = get_trace_buf();
1533 if (unlikely(disable != 1)) 1624 if (!tbuffer) {
1625 len = 0;
1534 goto out; 1626 goto out;
1627 }
1535 1628
1536 /* Lockdep uses trace_printk for lock tracing */ 1629 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1537 local_irq_save(flags);
1538 arch_spin_lock(&trace_buf_lock);
1539 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1540 1630
1541 if (len > TRACE_BUF_SIZE || len < 0) 1631 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1542 goto out_unlock; 1632 goto out;
1543 1633
1634 local_save_flags(flags);
1544 size = sizeof(*entry) + sizeof(u32) * len; 1635 size = sizeof(*entry) + sizeof(u32) * len;
1545 buffer = tr->buffer; 1636 buffer = tr->buffer;
1546 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 1637 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1547 flags, pc); 1638 flags, pc);
1548 if (!event) 1639 if (!event)
1549 goto out_unlock; 1640 goto out;
1550 entry = ring_buffer_event_data(event); 1641 entry = ring_buffer_event_data(event);
1551 entry->ip = ip; 1642 entry->ip = ip;
1552 entry->fmt = fmt; 1643 entry->fmt = fmt;
1553 1644
1554 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1645 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
1555 if (!filter_check_discard(call, entry, buffer, event)) { 1646 if (!filter_check_discard(call, entry, buffer, event)) {
1556 ring_buffer_unlock_commit(buffer, event); 1647 ring_buffer_unlock_commit(buffer, event);
1557 ftrace_trace_stack(buffer, flags, 6, pc); 1648 ftrace_trace_stack(buffer, flags, 6, pc);
1558 } 1649 }
1559 1650
1560out_unlock:
1561 arch_spin_unlock(&trace_buf_lock);
1562 local_irq_restore(flags);
1563
1564out: 1651out:
1565 atomic_dec_return(&data->disabled);
1566 preempt_enable_notrace(); 1652 preempt_enable_notrace();
1567 unpause_graph_tracing(); 1653 unpause_graph_tracing();
1568 1654
@@ -1588,58 +1674,53 @@ int trace_array_printk(struct trace_array *tr,
1588int trace_array_vprintk(struct trace_array *tr, 1674int trace_array_vprintk(struct trace_array *tr,
1589 unsigned long ip, const char *fmt, va_list args) 1675 unsigned long ip, const char *fmt, va_list args)
1590{ 1676{
1591 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1592 static char trace_buf[TRACE_BUF_SIZE];
1593
1594 struct ftrace_event_call *call = &event_print; 1677 struct ftrace_event_call *call = &event_print;
1595 struct ring_buffer_event *event; 1678 struct ring_buffer_event *event;
1596 struct ring_buffer *buffer; 1679 struct ring_buffer *buffer;
1597 struct trace_array_cpu *data; 1680 int len = 0, size, pc;
1598 int cpu, len = 0, size, pc;
1599 struct print_entry *entry; 1681 struct print_entry *entry;
1600 unsigned long irq_flags; 1682 unsigned long flags;
1601 int disable; 1683 char *tbuffer;
1602 1684
1603 if (tracing_disabled || tracing_selftest_running) 1685 if (tracing_disabled || tracing_selftest_running)
1604 return 0; 1686 return 0;
1605 1687
1688 /* Don't pollute graph traces with trace_vprintk internals */
1689 pause_graph_tracing();
1690
1606 pc = preempt_count(); 1691 pc = preempt_count();
1607 preempt_disable_notrace(); 1692 preempt_disable_notrace();
1608 cpu = raw_smp_processor_id();
1609 data = tr->data[cpu];
1610 1693
1611 disable = atomic_inc_return(&data->disabled); 1694
1612 if (unlikely(disable != 1)) 1695 tbuffer = get_trace_buf();
1696 if (!tbuffer) {
1697 len = 0;
1613 goto out; 1698 goto out;
1699 }
1614 1700
1615 pause_graph_tracing(); 1701 len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
1616 raw_local_irq_save(irq_flags); 1702 if (len > TRACE_BUF_SIZE)
1617 arch_spin_lock(&trace_buf_lock); 1703 goto out;
1618 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1619 1704
1705 local_save_flags(flags);
1620 size = sizeof(*entry) + len + 1; 1706 size = sizeof(*entry) + len + 1;
1621 buffer = tr->buffer; 1707 buffer = tr->buffer;
1622 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1708 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1623 irq_flags, pc); 1709 flags, pc);
1624 if (!event) 1710 if (!event)
1625 goto out_unlock; 1711 goto out;
1626 entry = ring_buffer_event_data(event); 1712 entry = ring_buffer_event_data(event);
1627 entry->ip = ip; 1713 entry->ip = ip;
1628 1714
1629 memcpy(&entry->buf, trace_buf, len); 1715 memcpy(&entry->buf, tbuffer, len);
1630 entry->buf[len] = '\0'; 1716 entry->buf[len] = '\0';
1631 if (!filter_check_discard(call, entry, buffer, event)) { 1717 if (!filter_check_discard(call, entry, buffer, event)) {
1632 ring_buffer_unlock_commit(buffer, event); 1718 ring_buffer_unlock_commit(buffer, event);
1633 ftrace_trace_stack(buffer, irq_flags, 6, pc); 1719 ftrace_trace_stack(buffer, flags, 6, pc);
1634 } 1720 }
1635
1636 out_unlock:
1637 arch_spin_unlock(&trace_buf_lock);
1638 raw_local_irq_restore(irq_flags);
1639 unpause_graph_tracing();
1640 out: 1721 out:
1641 atomic_dec_return(&data->disabled);
1642 preempt_enable_notrace(); 1722 preempt_enable_notrace();
1723 unpause_graph_tracing();
1643 1724
1644 return len; 1725 return len;
1645} 1726}
@@ -2974,7 +3055,14 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
2974 return t->init(tr); 3055 return t->init(tr);
2975} 3056}
2976 3057
2977static int __tracing_resize_ring_buffer(unsigned long size) 3058static void set_buffer_entries(struct trace_array *tr, unsigned long val)
3059{
3060 int cpu;
3061 for_each_tracing_cpu(cpu)
3062 tr->data[cpu]->entries = val;
3063}
3064
3065static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
2978{ 3066{
2979 int ret; 3067 int ret;
2980 3068
@@ -2985,19 +3073,32 @@ static int __tracing_resize_ring_buffer(unsigned long size)
2985 */ 3073 */
2986 ring_buffer_expanded = 1; 3074 ring_buffer_expanded = 1;
2987 3075
2988 ret = ring_buffer_resize(global_trace.buffer, size); 3076 ret = ring_buffer_resize(global_trace.buffer, size, cpu);
2989 if (ret < 0) 3077 if (ret < 0)
2990 return ret; 3078 return ret;
2991 3079
2992 if (!current_trace->use_max_tr) 3080 if (!current_trace->use_max_tr)
2993 goto out; 3081 goto out;
2994 3082
2995 ret = ring_buffer_resize(max_tr.buffer, size); 3083 ret = ring_buffer_resize(max_tr.buffer, size, cpu);
2996 if (ret < 0) { 3084 if (ret < 0) {
2997 int r; 3085 int r = 0;
3086
3087 if (cpu == RING_BUFFER_ALL_CPUS) {
3088 int i;
3089 for_each_tracing_cpu(i) {
3090 r = ring_buffer_resize(global_trace.buffer,
3091 global_trace.data[i]->entries,
3092 i);
3093 if (r < 0)
3094 break;
3095 }
3096 } else {
3097 r = ring_buffer_resize(global_trace.buffer,
3098 global_trace.data[cpu]->entries,
3099 cpu);
3100 }
2998 3101
2999 r = ring_buffer_resize(global_trace.buffer,
3000 global_trace.entries);
3001 if (r < 0) { 3102 if (r < 0) {
3002 /* 3103 /*
3003 * AARGH! We are left with different 3104 * AARGH! We are left with different
@@ -3019,14 +3120,21 @@ static int __tracing_resize_ring_buffer(unsigned long size)
3019 return ret; 3120 return ret;
3020 } 3121 }
3021 3122
3022 max_tr.entries = size; 3123 if (cpu == RING_BUFFER_ALL_CPUS)
3124 set_buffer_entries(&max_tr, size);
3125 else
3126 max_tr.data[cpu]->entries = size;
3127
3023 out: 3128 out:
3024 global_trace.entries = size; 3129 if (cpu == RING_BUFFER_ALL_CPUS)
3130 set_buffer_entries(&global_trace, size);
3131 else
3132 global_trace.data[cpu]->entries = size;
3025 3133
3026 return ret; 3134 return ret;
3027} 3135}
3028 3136
3029static ssize_t tracing_resize_ring_buffer(unsigned long size) 3137static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
3030{ 3138{
3031 int cpu, ret = size; 3139 int cpu, ret = size;
3032 3140
@@ -3042,12 +3150,19 @@ static ssize_t tracing_resize_ring_buffer(unsigned long size)
3042 atomic_inc(&max_tr.data[cpu]->disabled); 3150 atomic_inc(&max_tr.data[cpu]->disabled);
3043 } 3151 }
3044 3152
3045 if (size != global_trace.entries) 3153 if (cpu_id != RING_BUFFER_ALL_CPUS) {
3046 ret = __tracing_resize_ring_buffer(size); 3154 /* make sure, this cpu is enabled in the mask */
3155 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3156 ret = -EINVAL;
3157 goto out;
3158 }
3159 }
3047 3160
3161 ret = __tracing_resize_ring_buffer(size, cpu_id);
3048 if (ret < 0) 3162 if (ret < 0)
3049 ret = -ENOMEM; 3163 ret = -ENOMEM;
3050 3164
3165out:
3051 for_each_tracing_cpu(cpu) { 3166 for_each_tracing_cpu(cpu) {
3052 if (global_trace.data[cpu]) 3167 if (global_trace.data[cpu])
3053 atomic_dec(&global_trace.data[cpu]->disabled); 3168 atomic_dec(&global_trace.data[cpu]->disabled);
@@ -3078,7 +3193,8 @@ int tracing_update_buffers(void)
3078 3193
3079 mutex_lock(&trace_types_lock); 3194 mutex_lock(&trace_types_lock);
3080 if (!ring_buffer_expanded) 3195 if (!ring_buffer_expanded)
3081 ret = __tracing_resize_ring_buffer(trace_buf_size); 3196 ret = __tracing_resize_ring_buffer(trace_buf_size,
3197 RING_BUFFER_ALL_CPUS);
3082 mutex_unlock(&trace_types_lock); 3198 mutex_unlock(&trace_types_lock);
3083 3199
3084 return ret; 3200 return ret;
@@ -3102,7 +3218,8 @@ static int tracing_set_tracer(const char *buf)
3102 mutex_lock(&trace_types_lock); 3218 mutex_lock(&trace_types_lock);
3103 3219
3104 if (!ring_buffer_expanded) { 3220 if (!ring_buffer_expanded) {
3105 ret = __tracing_resize_ring_buffer(trace_buf_size); 3221 ret = __tracing_resize_ring_buffer(trace_buf_size,
3222 RING_BUFFER_ALL_CPUS);
3106 if (ret < 0) 3223 if (ret < 0)
3107 goto out; 3224 goto out;
3108 ret = 0; 3225 ret = 0;
@@ -3128,8 +3245,8 @@ static int tracing_set_tracer(const char *buf)
3128 * The max_tr ring buffer has some state (e.g. ring->clock) and 3245 * The max_tr ring buffer has some state (e.g. ring->clock) and
3129 * we want preserve it. 3246 * we want preserve it.
3130 */ 3247 */
3131 ring_buffer_resize(max_tr.buffer, 1); 3248 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
3132 max_tr.entries = 1; 3249 set_buffer_entries(&max_tr, 1);
3133 } 3250 }
3134 destroy_trace_option_files(topts); 3251 destroy_trace_option_files(topts);
3135 3252
@@ -3137,10 +3254,17 @@ static int tracing_set_tracer(const char *buf)
3137 3254
3138 topts = create_trace_option_files(current_trace); 3255 topts = create_trace_option_files(current_trace);
3139 if (current_trace->use_max_tr) { 3256 if (current_trace->use_max_tr) {
3140 ret = ring_buffer_resize(max_tr.buffer, global_trace.entries); 3257 int cpu;
3141 if (ret < 0) 3258 /* we need to make per cpu buffer sizes equivalent */
3142 goto out; 3259 for_each_tracing_cpu(cpu) {
3143 max_tr.entries = global_trace.entries; 3260 ret = ring_buffer_resize(max_tr.buffer,
3261 global_trace.data[cpu]->entries,
3262 cpu);
3263 if (ret < 0)
3264 goto out;
3265 max_tr.data[cpu]->entries =
3266 global_trace.data[cpu]->entries;
3267 }
3144 } 3268 }
3145 3269
3146 if (t->init) { 3270 if (t->init) {
@@ -3642,30 +3766,82 @@ out_err:
3642 goto out; 3766 goto out;
3643} 3767}
3644 3768
3769struct ftrace_entries_info {
3770 struct trace_array *tr;
3771 int cpu;
3772};
3773
3774static int tracing_entries_open(struct inode *inode, struct file *filp)
3775{
3776 struct ftrace_entries_info *info;
3777
3778 if (tracing_disabled)
3779 return -ENODEV;
3780
3781 info = kzalloc(sizeof(*info), GFP_KERNEL);
3782 if (!info)
3783 return -ENOMEM;
3784
3785 info->tr = &global_trace;
3786 info->cpu = (unsigned long)inode->i_private;
3787
3788 filp->private_data = info;
3789
3790 return 0;
3791}
3792
3645static ssize_t 3793static ssize_t
3646tracing_entries_read(struct file *filp, char __user *ubuf, 3794tracing_entries_read(struct file *filp, char __user *ubuf,
3647 size_t cnt, loff_t *ppos) 3795 size_t cnt, loff_t *ppos)
3648{ 3796{
3649 struct trace_array *tr = filp->private_data; 3797 struct ftrace_entries_info *info = filp->private_data;
3650 char buf[96]; 3798 struct trace_array *tr = info->tr;
3651 int r; 3799 char buf[64];
3800 int r = 0;
3801 ssize_t ret;
3652 3802
3653 mutex_lock(&trace_types_lock); 3803 mutex_lock(&trace_types_lock);
3654 if (!ring_buffer_expanded) 3804
3655 r = sprintf(buf, "%lu (expanded: %lu)\n", 3805 if (info->cpu == RING_BUFFER_ALL_CPUS) {
3656 tr->entries >> 10, 3806 int cpu, buf_size_same;
3657 trace_buf_size >> 10); 3807 unsigned long size;
3658 else 3808
3659 r = sprintf(buf, "%lu\n", tr->entries >> 10); 3809 size = 0;
3810 buf_size_same = 1;
3811 /* check if all cpu sizes are same */
3812 for_each_tracing_cpu(cpu) {
3813 /* fill in the size from first enabled cpu */
3814 if (size == 0)
3815 size = tr->data[cpu]->entries;
3816 if (size != tr->data[cpu]->entries) {
3817 buf_size_same = 0;
3818 break;
3819 }
3820 }
3821
3822 if (buf_size_same) {
3823 if (!ring_buffer_expanded)
3824 r = sprintf(buf, "%lu (expanded: %lu)\n",
3825 size >> 10,
3826 trace_buf_size >> 10);
3827 else
3828 r = sprintf(buf, "%lu\n", size >> 10);
3829 } else
3830 r = sprintf(buf, "X\n");
3831 } else
3832 r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
3833
3660 mutex_unlock(&trace_types_lock); 3834 mutex_unlock(&trace_types_lock);
3661 3835
3662 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3836 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3837 return ret;
3663} 3838}
3664 3839
3665static ssize_t 3840static ssize_t
3666tracing_entries_write(struct file *filp, const char __user *ubuf, 3841tracing_entries_write(struct file *filp, const char __user *ubuf,
3667 size_t cnt, loff_t *ppos) 3842 size_t cnt, loff_t *ppos)
3668{ 3843{
3844 struct ftrace_entries_info *info = filp->private_data;
3669 unsigned long val; 3845 unsigned long val;
3670 int ret; 3846 int ret;
3671 3847
@@ -3680,7 +3856,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3680 /* value is in KB */ 3856 /* value is in KB */
3681 val <<= 10; 3857 val <<= 10;
3682 3858
3683 ret = tracing_resize_ring_buffer(val); 3859 ret = tracing_resize_ring_buffer(val, info->cpu);
3684 if (ret < 0) 3860 if (ret < 0)
3685 return ret; 3861 return ret;
3686 3862
@@ -3689,6 +3865,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3689 return cnt; 3865 return cnt;
3690} 3866}
3691 3867
3868static int
3869tracing_entries_release(struct inode *inode, struct file *filp)
3870{
3871 struct ftrace_entries_info *info = filp->private_data;
3872
3873 kfree(info);
3874
3875 return 0;
3876}
3877
3692static ssize_t 3878static ssize_t
3693tracing_total_entries_read(struct file *filp, char __user *ubuf, 3879tracing_total_entries_read(struct file *filp, char __user *ubuf,
3694 size_t cnt, loff_t *ppos) 3880 size_t cnt, loff_t *ppos)
@@ -3700,7 +3886,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
3700 3886
3701 mutex_lock(&trace_types_lock); 3887 mutex_lock(&trace_types_lock);
3702 for_each_tracing_cpu(cpu) { 3888 for_each_tracing_cpu(cpu) {
3703 size += tr->entries >> 10; 3889 size += tr->data[cpu]->entries >> 10;
3704 if (!ring_buffer_expanded) 3890 if (!ring_buffer_expanded)
3705 expanded_size += trace_buf_size >> 10; 3891 expanded_size += trace_buf_size >> 10;
3706 } 3892 }
@@ -3734,7 +3920,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
3734 if (trace_flags & TRACE_ITER_STOP_ON_FREE) 3920 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3735 tracing_off(); 3921 tracing_off();
3736 /* resize the ring buffer to 0 */ 3922 /* resize the ring buffer to 0 */
3737 tracing_resize_ring_buffer(0); 3923 tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS);
3738 3924
3739 return 0; 3925 return 0;
3740} 3926}
@@ -3933,9 +4119,10 @@ static const struct file_operations tracing_pipe_fops = {
3933}; 4119};
3934 4120
3935static const struct file_operations tracing_entries_fops = { 4121static const struct file_operations tracing_entries_fops = {
3936 .open = tracing_open_generic, 4122 .open = tracing_entries_open,
3937 .read = tracing_entries_read, 4123 .read = tracing_entries_read,
3938 .write = tracing_entries_write, 4124 .write = tracing_entries_write,
4125 .release = tracing_entries_release,
3939 .llseek = generic_file_llseek, 4126 .llseek = generic_file_llseek,
3940}; 4127};
3941 4128
@@ -4387,6 +4574,9 @@ static void tracing_init_debugfs_percpu(long cpu)
4387 4574
4388 trace_create_file("stats", 0444, d_cpu, 4575 trace_create_file("stats", 0444, d_cpu,
4389 (void *) cpu, &tracing_stats_fops); 4576 (void *) cpu, &tracing_stats_fops);
4577
4578 trace_create_file("buffer_size_kb", 0444, d_cpu,
4579 (void *) cpu, &tracing_entries_fops);
4390} 4580}
4391 4581
4392#ifdef CONFIG_FTRACE_SELFTEST 4582#ifdef CONFIG_FTRACE_SELFTEST
@@ -4716,7 +4906,7 @@ static __init int tracer_init_debugfs(void)
4716 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); 4906 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4717 4907
4718 trace_create_file("buffer_size_kb", 0644, d_tracer, 4908 trace_create_file("buffer_size_kb", 0644, d_tracer,
4719 &global_trace, &tracing_entries_fops); 4909 (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops);
4720 4910
4721 trace_create_file("buffer_total_size_kb", 0444, d_tracer, 4911 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
4722 &global_trace, &tracing_total_entries_fops); 4912 &global_trace, &tracing_total_entries_fops);
@@ -4955,6 +5145,10 @@ __init static int tracer_alloc_buffers(void)
4955 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 5145 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4956 goto out_free_buffer_mask; 5146 goto out_free_buffer_mask;
4957 5147
5148 /* Only allocate trace_printk buffers if a trace_printk exists */
5149 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
5150 trace_printk_init_buffers();
5151
4958 /* To save memory, keep the ring buffer size to its minimum */ 5152 /* To save memory, keep the ring buffer size to its minimum */
4959 if (ring_buffer_expanded) 5153 if (ring_buffer_expanded)
4960 ring_buf_size = trace_buf_size; 5154 ring_buf_size = trace_buf_size;
@@ -4973,7 +5167,6 @@ __init static int tracer_alloc_buffers(void)
4973 WARN_ON(1); 5167 WARN_ON(1);
4974 goto out_free_cpumask; 5168 goto out_free_cpumask;
4975 } 5169 }
4976 global_trace.entries = ring_buffer_size(global_trace.buffer);
4977 if (global_trace.buffer_disabled) 5170 if (global_trace.buffer_disabled)
4978 tracing_off(); 5171 tracing_off();
4979 5172
@@ -4986,7 +5179,6 @@ __init static int tracer_alloc_buffers(void)
4986 ring_buffer_free(global_trace.buffer); 5179 ring_buffer_free(global_trace.buffer);
4987 goto out_free_cpumask; 5180 goto out_free_cpumask;
4988 } 5181 }
4989 max_tr.entries = 1;
4990#endif 5182#endif
4991 5183
4992 /* Allocate the first page for all buffers */ 5184 /* Allocate the first page for all buffers */
@@ -4995,6 +5187,11 @@ __init static int tracer_alloc_buffers(void)
4995 max_tr.data[i] = &per_cpu(max_tr_data, i); 5187 max_tr.data[i] = &per_cpu(max_tr_data, i);
4996 } 5188 }
4997 5189
5190 set_buffer_entries(&global_trace, ring_buf_size);
5191#ifdef CONFIG_TRACER_MAX_TRACE
5192 set_buffer_entries(&max_tr, 1);
5193#endif
5194
4998 trace_init_cmdlines(); 5195 trace_init_cmdlines();
4999 5196
5000 register_tracer(&nop_trace); 5197 register_tracer(&nop_trace);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 95059f091a24..1c8b7c6f7b3b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -131,6 +131,7 @@ struct trace_array_cpu {
131 atomic_t disabled; 131 atomic_t disabled;
132 void *buffer_page; /* ring buffer spare */ 132 void *buffer_page; /* ring buffer spare */
133 133
134 unsigned long entries;
134 unsigned long saved_latency; 135 unsigned long saved_latency;
135 unsigned long critical_start; 136 unsigned long critical_start;
136 unsigned long critical_end; 137 unsigned long critical_end;
@@ -152,7 +153,6 @@ struct trace_array_cpu {
152 */ 153 */
153struct trace_array { 154struct trace_array {
154 struct ring_buffer *buffer; 155 struct ring_buffer *buffer;
155 unsigned long entries;
156 int cpu; 156 int cpu;
157 int buffer_disabled; 157 int buffer_disabled;
158 cycle_t time_start; 158 cycle_t time_start;
@@ -826,6 +826,8 @@ extern struct list_head ftrace_events;
826extern const char *__start___trace_bprintk_fmt[]; 826extern const char *__start___trace_bprintk_fmt[];
827extern const char *__stop___trace_bprintk_fmt[]; 827extern const char *__stop___trace_bprintk_fmt[];
828 828
829void trace_printk_init_buffers(void);
830
829#undef FTRACE_ENTRY 831#undef FTRACE_ENTRY
830#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 832#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
831 extern struct ftrace_event_call \ 833 extern struct ftrace_event_call \
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 6fd4ffd042f9..a9077c1b4ad3 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -51,6 +51,10 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
51 const char **iter; 51 const char **iter;
52 char *fmt; 52 char *fmt;
53 53
54 /* allocate the trace_printk per cpu buffers */
55 if (start != end)
56 trace_printk_init_buffers();
57
54 mutex_lock(&btrace_mutex); 58 mutex_lock(&btrace_mutex);
55 for (iter = start; iter < end; iter++) { 59 for (iter = start; iter < end; iter++) {
56 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); 60 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);