aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-05-14 08:43:40 -0400
committerIngo Molnar <mingo@kernel.org>2012-05-14 08:43:40 -0400
commit9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (patch)
treef03743d576a0c7826b9921ad47e70370ebe80a22 /kernel
parentec83db0f78cd44c3b586ec1c3a348d1a8a389797 (diff)
parent73eff9f56e15598c8399c0b86899fd889b97f085 (diff)
Merge branch 'perf/uprobes' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/uprobes
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/events/core.c25
-rw-r--r--kernel/irq/Kconfig2
-rw-r--r--kernel/irq/irqdomain.c47
-rw-r--r--kernel/irq_work.c1
-rw-r--r--kernel/itimer.c8
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/tick-broadcast.c4
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/trace/ftrace.c44
-rw-r--r--kernel/trace/ring_buffer.c248
-rw-r--r--kernel/trace/trace.c453
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_printk.c4
16 files changed, 506 insertions, 355 deletions
diff --git a/kernel/cred.c b/kernel/cred.c
index 97b36eeca4c9..e70683d9ec32 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
386 struct cred *new; 386 struct cred *new;
387 int ret; 387 int ret;
388 388
389 p->replacement_session_keyring = NULL;
390
389 if ( 391 if (
390#ifdef CONFIG_KEYS 392#ifdef CONFIG_KEYS
391 !p->cred->thread_keyring && 393 !p->cred->thread_keyring &&
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a6a9ec4cd8f5..e82c7a1face9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2039 * accessing the event control register. If a NMI hits, then it will 2039 * accessing the event control register. If a NMI hits, then it will
2040 * not restart the event. 2040 * not restart the event.
2041 */ 2041 */
2042void __perf_event_task_sched_out(struct task_struct *task, 2042static void __perf_event_task_sched_out(struct task_struct *task,
2043 struct task_struct *next) 2043 struct task_struct *next)
2044{ 2044{
2045 int ctxn; 2045 int ctxn;
2046 2046
@@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev,
2279 * accessing the event control register. If a NMI hits, then it will 2279 * accessing the event control register. If a NMI hits, then it will
2280 * keep the event running. 2280 * keep the event running.
2281 */ 2281 */
2282void __perf_event_task_sched_in(struct task_struct *prev, 2282static void __perf_event_task_sched_in(struct task_struct *prev,
2283 struct task_struct *task) 2283 struct task_struct *task)
2284{ 2284{
2285 struct perf_event_context *ctx; 2285 struct perf_event_context *ctx;
2286 int ctxn; 2286 int ctxn;
@@ -2305,6 +2305,12 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2305 perf_branch_stack_sched_in(prev, task); 2305 perf_branch_stack_sched_in(prev, task);
2306} 2306}
2307 2307
2308void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next)
2309{
2310 __perf_event_task_sched_out(prev, next);
2311 __perf_event_task_sched_in(prev, next);
2312}
2313
2308static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2314static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
2309{ 2315{
2310 u64 frequency = event->attr.sample_freq; 2316 u64 frequency = event->attr.sample_freq;
@@ -4957,7 +4963,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
4957 if (rctx < 0) 4963 if (rctx < 0)
4958 return; 4964 return;
4959 4965
4960 perf_sample_data_init(&data, addr); 4966 perf_sample_data_init(&data, addr, 0);
4961 4967
4962 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); 4968 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
4963 4969
@@ -5215,7 +5221,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5215 .data = record, 5221 .data = record,
5216 }; 5222 };
5217 5223
5218 perf_sample_data_init(&data, addr); 5224 perf_sample_data_init(&data, addr, 0);
5219 data.raw = &raw; 5225 data.raw = &raw;
5220 5226
5221 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 5227 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
@@ -5318,7 +5324,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
5318 struct perf_sample_data sample; 5324 struct perf_sample_data sample;
5319 struct pt_regs *regs = data; 5325 struct pt_regs *regs = data;
5320 5326
5321 perf_sample_data_init(&sample, bp->attr.bp_addr); 5327 perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
5322 5328
5323 if (!bp->hw.state && !perf_exclude_event(bp, regs)) 5329 if (!bp->hw.state && !perf_exclude_event(bp, regs))
5324 perf_swevent_event(bp, 1, &sample, regs); 5330 perf_swevent_event(bp, 1, &sample, regs);
@@ -5344,13 +5350,12 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
5344 5350
5345 event->pmu->read(event); 5351 event->pmu->read(event);
5346 5352
5347 perf_sample_data_init(&data, 0); 5353 perf_sample_data_init(&data, 0, event->hw.last_period);
5348 data.period = event->hw.last_period;
5349 regs = get_irq_regs(); 5354 regs = get_irq_regs();
5350 5355
5351 if (regs && !perf_exclude_event(event, regs)) { 5356 if (regs && !perf_exclude_event(event, regs)) {
5352 if (!(event->attr.exclude_idle && is_idle_task(current))) 5357 if (!(event->attr.exclude_idle && is_idle_task(current)))
5353 if (perf_event_overflow(event, &data, regs)) 5358 if (__perf_event_overflow(event, 1, &data, regs))
5354 ret = HRTIMER_NORESTART; 5359 ret = HRTIMER_NORESTART;
5355 } 5360 }
5356 5361
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index cf1a4a68ce44..d1a758bc972a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -62,7 +62,7 @@ config IRQ_DOMAIN_DEBUG
62 help 62 help
63 This option will show the mapping relationship between hardware irq 63 This option will show the mapping relationship between hardware irq
64 numbers and Linux irq numbers. The mapping is exposed via debugfs 64 numbers and Linux irq numbers. The mapping is exposed via debugfs
65 in the file "virq_mapping". 65 in the file "irq_domain_mapping".
66 66
67 If you don't know what this means you don't need it. 67 If you don't know what this means you don't need it.
68 68
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3601f3fbf67c..0e0ba5f840b2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list);
23static DEFINE_MUTEX(irq_domain_mutex); 23static DEFINE_MUTEX(irq_domain_mutex);
24 24
25static DEFINE_MUTEX(revmap_trees_mutex); 25static DEFINE_MUTEX(revmap_trees_mutex);
26static unsigned int irq_virq_count = NR_IRQS;
27static struct irq_domain *irq_default_domain; 26static struct irq_domain *irq_default_domain;
28 27
29/** 28/**
@@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
184} 183}
185 184
186struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, 185struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
186 unsigned int max_irq,
187 const struct irq_domain_ops *ops, 187 const struct irq_domain_ops *ops,
188 void *host_data) 188 void *host_data)
189{ 189{
190 struct irq_domain *domain = irq_domain_alloc(of_node, 190 struct irq_domain *domain = irq_domain_alloc(of_node,
191 IRQ_DOMAIN_MAP_NOMAP, ops, host_data); 191 IRQ_DOMAIN_MAP_NOMAP, ops, host_data);
192 if (domain) 192 if (domain) {
193 domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0;
193 irq_domain_add(domain); 194 irq_domain_add(domain);
195 }
194 return domain; 196 return domain;
195} 197}
196 198
@@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain)
262 irq_default_domain = domain; 264 irq_default_domain = domain;
263} 265}
264 266
265/**
266 * irq_set_virq_count() - Set the maximum number of linux irqs
267 * @count: number of linux irqs, capped with NR_IRQS
268 *
269 * This is mainly for use by platforms like iSeries who want to program
270 * the virtual irq number in the controller to avoid the reverse mapping
271 */
272void irq_set_virq_count(unsigned int count)
273{
274 pr_debug("irq: Trying to set virq count to %d\n", count);
275
276 BUG_ON(count < NUM_ISA_INTERRUPTS);
277 if (count < NR_IRQS)
278 irq_virq_count = count;
279}
280
281static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, 267static int irq_setup_virq(struct irq_domain *domain, unsigned int virq,
282 irq_hw_number_t hwirq) 268 irq_hw_number_t hwirq)
283{ 269{
@@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
320 pr_debug("irq: create_direct virq allocation failed\n"); 306 pr_debug("irq: create_direct virq allocation failed\n");
321 return 0; 307 return 0;
322 } 308 }
323 if (virq >= irq_virq_count) { 309 if (virq >= domain->revmap_data.nomap.max_irq) {
324 pr_err("ERROR: no free irqs available below %i maximum\n", 310 pr_err("ERROR: no free irqs available below %i maximum\n",
325 irq_virq_count); 311 domain->revmap_data.nomap.max_irq);
326 irq_free_desc(virq); 312 irq_free_desc(virq);
327 return 0; 313 return 0;
328 } 314 }
329
330 pr_debug("irq: create_direct obtained virq %d\n", virq); 315 pr_debug("irq: create_direct obtained virq %d\n", virq);
331 316
332 if (irq_setup_virq(domain, virq, virq)) { 317 if (irq_setup_virq(domain, virq, virq)) {
@@ -350,7 +335,8 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
350unsigned int irq_create_mapping(struct irq_domain *domain, 335unsigned int irq_create_mapping(struct irq_domain *domain,
351 irq_hw_number_t hwirq) 336 irq_hw_number_t hwirq)
352{ 337{
353 unsigned int virq, hint; 338 unsigned int hint;
339 int virq;
354 340
355 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); 341 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
356 342
@@ -377,13 +363,13 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
377 return irq_domain_legacy_revmap(domain, hwirq); 363 return irq_domain_legacy_revmap(domain, hwirq);
378 364
379 /* Allocate a virtual interrupt number */ 365 /* Allocate a virtual interrupt number */
380 hint = hwirq % irq_virq_count; 366 hint = hwirq % nr_irqs;
381 if (hint == 0) 367 if (hint == 0)
382 hint++; 368 hint++;
383 virq = irq_alloc_desc_from(hint, 0); 369 virq = irq_alloc_desc_from(hint, 0);
384 if (!virq) 370 if (virq <= 0)
385 virq = irq_alloc_desc_from(1, 0); 371 virq = irq_alloc_desc_from(1, 0);
386 if (!virq) { 372 if (virq <= 0) {
387 pr_debug("irq: -> virq allocation failed\n"); 373 pr_debug("irq: -> virq allocation failed\n");
388 return 0; 374 return 0;
389 } 375 }
@@ -515,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
515 irq_hw_number_t hwirq) 501 irq_hw_number_t hwirq)
516{ 502{
517 unsigned int i; 503 unsigned int i;
518 unsigned int hint = hwirq % irq_virq_count; 504 unsigned int hint = hwirq % nr_irqs;
519 505
520 /* Look for default domain if nececssary */ 506 /* Look for default domain if nececssary */
521 if (domain == NULL) 507 if (domain == NULL)
@@ -536,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
536 if (data && (data->domain == domain) && (data->hwirq == hwirq)) 522 if (data && (data->domain == domain) && (data->hwirq == hwirq))
537 return i; 523 return i;
538 i++; 524 i++;
539 if (i >= irq_virq_count) 525 if (i >= nr_irqs)
540 i = 1; 526 i = 1;
541 } while(i != hint); 527 } while(i != hint);
542 return 0; 528 return 0;
@@ -642,8 +628,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
642 void *data; 628 void *data;
643 int i; 629 int i;
644 630
645 seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", 631 seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq",
646 "chip name", "chip data", "domain name"); 632 "chip name", (int)(2 * sizeof(void *) + 2), "chip data",
633 "domain name");
647 634
648 for (i = 1; i < nr_irqs; i++) { 635 for (i = 1; i < nr_irqs; i++) {
649 desc = irq_to_desc(i); 636 desc = irq_to_desc(i);
@@ -666,7 +653,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
666 seq_printf(m, "%-15s ", p); 653 seq_printf(m, "%-15s ", p);
667 654
668 data = irq_desc_get_chip_data(desc); 655 data = irq_desc_get_chip_data(desc);
669 seq_printf(m, "0x%16p ", data); 656 seq_printf(m, data ? "0x%p " : " %p ", data);
670 657
671 if (desc->irq_data.domain && desc->irq_data.domain->of_node) 658 if (desc->irq_data.domain && desc->irq_data.domain->of_node)
672 p = desc->irq_data.domain->of_node->full_name; 659 p = desc->irq_data.domain->of_node->full_name;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 0c56d44b9fd5..1588e3b2871b 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -11,6 +11,7 @@
11#include <linux/irq_work.h> 11#include <linux/irq_work.h>
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/irqflags.h>
14#include <asm/processor.h> 15#include <asm/processor.h>
15 16
16/* 17/*
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 22000c3db0dd..8d262b467573 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -284,8 +284,12 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
284 if (value) { 284 if (value) {
285 if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) 285 if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
286 return -EFAULT; 286 return -EFAULT;
287 } else 287 } else {
288 memset((char *) &set_buffer, 0, sizeof(set_buffer)); 288 memset(&set_buffer, 0, sizeof(set_buffer));
289 printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
290 " Misfeature support will be removed\n",
291 current->comm);
292 }
289 293
290 error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); 294 error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
291 if (error || !ovalue) 295 if (error || !ovalue)
diff --git a/kernel/panic.c b/kernel/panic.c
index 80aed44e345a..8ed89a175d79 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -97,7 +97,7 @@ void panic(const char *fmt, ...)
97 /* 97 /*
98 * Avoid nested stack-dumping if a panic occurs during oops processing 98 * Avoid nested stack-dumping if a panic occurs during oops processing
99 */ 99 */
100 if (!oops_in_progress) 100 if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
101 dump_stack(); 101 dump_stack();
102#endif 102#endif
103 103
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30a..5c692a0a555d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1913,7 +1913,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
1913 struct task_struct *next) 1913 struct task_struct *next)
1914{ 1914{
1915 sched_info_switch(prev, next); 1915 sched_info_switch(prev, next);
1916 perf_event_task_sched_out(prev, next); 1916 perf_event_task_sched(prev, next);
1917 fire_sched_out_preempt_notifiers(prev, next); 1917 fire_sched_out_preempt_notifiers(prev, next);
1918 prepare_lock_switch(rq, next); 1918 prepare_lock_switch(rq, next);
1919 prepare_arch_switch(next); 1919 prepare_arch_switch(next);
@@ -1956,13 +1956,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1956 */ 1956 */
1957 prev_state = prev->state; 1957 prev_state = prev->state;
1958 finish_arch_switch(prev); 1958 finish_arch_switch(prev);
1959#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1960 local_irq_disable();
1961#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
1962 perf_event_task_sched_in(prev, current);
1963#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1964 local_irq_enable();
1965#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
1966 finish_lock_switch(rq, prev); 1959 finish_lock_switch(rq, prev);
1967 finish_arch_post_lock_switch(); 1960 finish_arch_post_lock_switch();
1968 1961
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 2cf9cc7aa103..a20dc8a3c949 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,6 +1,10 @@
1# 1#
2# Timer subsystem related configuration options 2# Timer subsystem related configuration options
3# 3#
4
5# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
6# only related to the tick functionality. Oneshot clockevent devices
7# are supported independ of this.
4config TICK_ONESHOT 8config TICK_ONESHOT
5 bool 9 bool
6 10
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index e883f57a3cd3..bf57abdc7bd0 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void)
575 unsigned long flags; 575 unsigned long flags;
576 576
577 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 577 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
578
579 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
580
578 if (cpumask_empty(tick_get_broadcast_mask())) 581 if (cpumask_empty(tick_get_broadcast_mask()))
579 goto end; 582 goto end;
580 583
581 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
582 bc = tick_broadcast_device.evtdev; 584 bc = tick_broadcast_device.evtdev;
583 if (bc) 585 if (bc)
584 tick_broadcast_setup_oneshot(bc); 586 tick_broadcast_setup_oneshot(bc);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3526038f2836..6a3a5b9ff561 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
534 hrtimer_get_expires(&ts->sched_timer), 0)) 534 hrtimer_get_expires(&ts->sched_timer), 0))
535 break; 535 break;
536 } 536 }
537 /* Update jiffies and reread time */ 537 /* Reread time and update jiffies */
538 tick_do_update_jiffies64(now);
539 now = ktime_get(); 538 now = ktime_get();
539 tick_do_update_jiffies64(now);
540 } 540 }
541} 541}
542 542
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0fa92f677c92..cf81f27ce6c6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2469,57 +2469,35 @@ static int
2469ftrace_avail_open(struct inode *inode, struct file *file) 2469ftrace_avail_open(struct inode *inode, struct file *file)
2470{ 2470{
2471 struct ftrace_iterator *iter; 2471 struct ftrace_iterator *iter;
2472 int ret;
2473 2472
2474 if (unlikely(ftrace_disabled)) 2473 if (unlikely(ftrace_disabled))
2475 return -ENODEV; 2474 return -ENODEV;
2476 2475
2477 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2476 iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
2478 if (!iter) 2477 if (iter) {
2479 return -ENOMEM; 2478 iter->pg = ftrace_pages_start;
2480 2479 iter->ops = &global_ops;
2481 iter->pg = ftrace_pages_start;
2482 iter->ops = &global_ops;
2483
2484 ret = seq_open(file, &show_ftrace_seq_ops);
2485 if (!ret) {
2486 struct seq_file *m = file->private_data;
2487
2488 m->private = iter;
2489 } else {
2490 kfree(iter);
2491 } 2480 }
2492 2481
2493 return ret; 2482 return iter ? 0 : -ENOMEM;
2494} 2483}
2495 2484
2496static int 2485static int
2497ftrace_enabled_open(struct inode *inode, struct file *file) 2486ftrace_enabled_open(struct inode *inode, struct file *file)
2498{ 2487{
2499 struct ftrace_iterator *iter; 2488 struct ftrace_iterator *iter;
2500 int ret;
2501 2489
2502 if (unlikely(ftrace_disabled)) 2490 if (unlikely(ftrace_disabled))
2503 return -ENODEV; 2491 return -ENODEV;
2504 2492
2505 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2493 iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
2506 if (!iter) 2494 if (iter) {
2507 return -ENOMEM; 2495 iter->pg = ftrace_pages_start;
2508 2496 iter->flags = FTRACE_ITER_ENABLED;
2509 iter->pg = ftrace_pages_start; 2497 iter->ops = &global_ops;
2510 iter->flags = FTRACE_ITER_ENABLED;
2511 iter->ops = &global_ops;
2512
2513 ret = seq_open(file, &show_ftrace_seq_ops);
2514 if (!ret) {
2515 struct seq_file *m = file->private_data;
2516
2517 m->private = iter;
2518 } else {
2519 kfree(iter);
2520 } 2498 }
2521 2499
2522 return ret; 2500 return iter ? 0 : -ENOMEM;
2523} 2501}
2524 2502
2525static void ftrace_filter_reset(struct ftrace_hash *hash) 2503static void ftrace_filter_reset(struct ftrace_hash *hash)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e91efd..2d5eb3320827 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -449,6 +449,7 @@ struct ring_buffer_per_cpu {
449 raw_spinlock_t reader_lock; /* serialize readers */ 449 raw_spinlock_t reader_lock; /* serialize readers */
450 arch_spinlock_t lock; 450 arch_spinlock_t lock;
451 struct lock_class_key lock_key; 451 struct lock_class_key lock_key;
452 unsigned int nr_pages;
452 struct list_head *pages; 453 struct list_head *pages;
453 struct buffer_page *head_page; /* read from head */ 454 struct buffer_page *head_page; /* read from head */
454 struct buffer_page *tail_page; /* write to tail */ 455 struct buffer_page *tail_page; /* write to tail */
@@ -466,10 +467,12 @@ struct ring_buffer_per_cpu {
466 unsigned long read_bytes; 467 unsigned long read_bytes;
467 u64 write_stamp; 468 u64 write_stamp;
468 u64 read_stamp; 469 u64 read_stamp;
470 /* ring buffer pages to update, > 0 to add, < 0 to remove */
471 int nr_pages_to_update;
472 struct list_head new_pages; /* new pages to add */
469}; 473};
470 474
471struct ring_buffer { 475struct ring_buffer {
472 unsigned pages;
473 unsigned flags; 476 unsigned flags;
474 int cpus; 477 int cpus;
475 atomic_t record_disabled; 478 atomic_t record_disabled;
@@ -963,14 +966,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
963 return 0; 966 return 0;
964} 967}
965 968
966static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 969static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
967 unsigned nr_pages)
968{ 970{
971 int i;
969 struct buffer_page *bpage, *tmp; 972 struct buffer_page *bpage, *tmp;
970 LIST_HEAD(pages);
971 unsigned i;
972
973 WARN_ON(!nr_pages);
974 973
975 for (i = 0; i < nr_pages; i++) { 974 for (i = 0; i < nr_pages; i++) {
976 struct page *page; 975 struct page *page;
@@ -981,15 +980,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
981 */ 980 */
982 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 981 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
983 GFP_KERNEL | __GFP_NORETRY, 982 GFP_KERNEL | __GFP_NORETRY,
984 cpu_to_node(cpu_buffer->cpu)); 983 cpu_to_node(cpu));
985 if (!bpage) 984 if (!bpage)
986 goto free_pages; 985 goto free_pages;
987 986
988 rb_check_bpage(cpu_buffer, bpage); 987 list_add(&bpage->list, pages);
989 988
990 list_add(&bpage->list, &pages); 989 page = alloc_pages_node(cpu_to_node(cpu),
991
992 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
993 GFP_KERNEL | __GFP_NORETRY, 0); 990 GFP_KERNEL | __GFP_NORETRY, 0);
994 if (!page) 991 if (!page)
995 goto free_pages; 992 goto free_pages;
@@ -997,6 +994,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 rb_init_page(bpage->page); 994 rb_init_page(bpage->page);
998 } 995 }
999 996
997 return 0;
998
999free_pages:
1000 list_for_each_entry_safe(bpage, tmp, pages, list) {
1001 list_del_init(&bpage->list);
1002 free_buffer_page(bpage);
1003 }
1004
1005 return -ENOMEM;
1006}
1007
1008static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1009 unsigned nr_pages)
1010{
1011 LIST_HEAD(pages);
1012
1013 WARN_ON(!nr_pages);
1014
1015 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1016 return -ENOMEM;
1017
1000 /* 1018 /*
1001 * The ring buffer page list is a circular list that does not 1019 * The ring buffer page list is a circular list that does not
1002 * start and end with a list head. All page list items point to 1020 * start and end with a list head. All page list items point to
@@ -1005,20 +1023,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1005 cpu_buffer->pages = pages.next; 1023 cpu_buffer->pages = pages.next;
1006 list_del(&pages); 1024 list_del(&pages);
1007 1025
1026 cpu_buffer->nr_pages = nr_pages;
1027
1008 rb_check_pages(cpu_buffer); 1028 rb_check_pages(cpu_buffer);
1009 1029
1010 return 0; 1030 return 0;
1011
1012 free_pages:
1013 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1014 list_del_init(&bpage->list);
1015 free_buffer_page(bpage);
1016 }
1017 return -ENOMEM;
1018} 1031}
1019 1032
1020static struct ring_buffer_per_cpu * 1033static struct ring_buffer_per_cpu *
1021rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 1034rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1022{ 1035{
1023 struct ring_buffer_per_cpu *cpu_buffer; 1036 struct ring_buffer_per_cpu *cpu_buffer;
1024 struct buffer_page *bpage; 1037 struct buffer_page *bpage;
@@ -1052,7 +1065,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1052 1065
1053 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1066 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1054 1067
1055 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 1068 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1056 if (ret < 0) 1069 if (ret < 0)
1057 goto fail_free_reader; 1070 goto fail_free_reader;
1058 1071
@@ -1113,7 +1126,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1113{ 1126{
1114 struct ring_buffer *buffer; 1127 struct ring_buffer *buffer;
1115 int bsize; 1128 int bsize;
1116 int cpu; 1129 int cpu, nr_pages;
1117 1130
1118 /* keep it in its own cache line */ 1131 /* keep it in its own cache line */
1119 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 1132 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1137,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1124 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 1137 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1125 goto fail_free_buffer; 1138 goto fail_free_buffer;
1126 1139
1127 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1140 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1128 buffer->flags = flags; 1141 buffer->flags = flags;
1129 buffer->clock = trace_clock_local; 1142 buffer->clock = trace_clock_local;
1130 buffer->reader_lock_key = key; 1143 buffer->reader_lock_key = key;
1131 1144
1132 /* need at least two pages */ 1145 /* need at least two pages */
1133 if (buffer->pages < 2) 1146 if (nr_pages < 2)
1134 buffer->pages = 2; 1147 nr_pages = 2;
1135 1148
1136 /* 1149 /*
1137 * In case of non-hotplug cpu, if the ring-buffer is allocated 1150 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1167,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1154 1167
1155 for_each_buffer_cpu(buffer, cpu) { 1168 for_each_buffer_cpu(buffer, cpu) {
1156 buffer->buffers[cpu] = 1169 buffer->buffers[cpu] =
1157 rb_allocate_cpu_buffer(buffer, cpu); 1170 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1158 if (!buffer->buffers[cpu]) 1171 if (!buffer->buffers[cpu])
1159 goto fail_free_buffers; 1172 goto fail_free_buffers;
1160 } 1173 }
@@ -1276,6 +1289,18 @@ out:
1276 raw_spin_unlock_irq(&cpu_buffer->reader_lock); 1289 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1277} 1290}
1278 1291
1292static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
1293{
1294 if (cpu_buffer->nr_pages_to_update > 0)
1295 rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages,
1296 cpu_buffer->nr_pages_to_update);
1297 else
1298 rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update);
1299 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1300 /* reset this value */
1301 cpu_buffer->nr_pages_to_update = 0;
1302}
1303
1279/** 1304/**
1280 * ring_buffer_resize - resize the ring buffer 1305 * ring_buffer_resize - resize the ring buffer
1281 * @buffer: the buffer to resize. 1306 * @buffer: the buffer to resize.
@@ -1285,14 +1310,12 @@ out:
1285 * 1310 *
1286 * Returns -1 on failure. 1311 * Returns -1 on failure.
1287 */ 1312 */
1288int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) 1313int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1314 int cpu_id)
1289{ 1315{
1290 struct ring_buffer_per_cpu *cpu_buffer; 1316 struct ring_buffer_per_cpu *cpu_buffer;
1291 unsigned nr_pages, rm_pages, new_pages; 1317 unsigned nr_pages;
1292 struct buffer_page *bpage, *tmp; 1318 int cpu;
1293 unsigned long buffer_size;
1294 LIST_HEAD(pages);
1295 int i, cpu;
1296 1319
1297 /* 1320 /*
1298 * Always succeed at resizing a non-existent buffer: 1321 * Always succeed at resizing a non-existent buffer:
@@ -1302,15 +1325,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1302 1325
1303 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1326 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1304 size *= BUF_PAGE_SIZE; 1327 size *= BUF_PAGE_SIZE;
1305 buffer_size = buffer->pages * BUF_PAGE_SIZE;
1306 1328
1307 /* we need a minimum of two pages */ 1329 /* we need a minimum of two pages */
1308 if (size < BUF_PAGE_SIZE * 2) 1330 if (size < BUF_PAGE_SIZE * 2)
1309 size = BUF_PAGE_SIZE * 2; 1331 size = BUF_PAGE_SIZE * 2;
1310 1332
1311 if (size == buffer_size)
1312 return size;
1313
1314 atomic_inc(&buffer->record_disabled); 1333 atomic_inc(&buffer->record_disabled);
1315 1334
1316 /* Make sure all writers are done with this buffer. */ 1335 /* Make sure all writers are done with this buffer. */
@@ -1321,68 +1340,56 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1321 1340
1322 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1341 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1323 1342
1324 if (size < buffer_size) { 1343 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1325 1344 /* calculate the pages to update */
1326 /* easy case, just free pages */
1327 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
1328 goto out_fail;
1329
1330 rm_pages = buffer->pages - nr_pages;
1331
1332 for_each_buffer_cpu(buffer, cpu) { 1345 for_each_buffer_cpu(buffer, cpu) {
1333 cpu_buffer = buffer->buffers[cpu]; 1346 cpu_buffer = buffer->buffers[cpu];
1334 rb_remove_pages(cpu_buffer, rm_pages);
1335 }
1336 goto out;
1337 }
1338 1347
1339 /* 1348 cpu_buffer->nr_pages_to_update = nr_pages -
1340 * This is a bit more difficult. We only want to add pages 1349 cpu_buffer->nr_pages;
1341 * when we can allocate enough for all CPUs. We do this
1342 * by allocating all the pages and storing them on a local
1343 * link list. If we succeed in our allocation, then we
1344 * add these pages to the cpu_buffers. Otherwise we just free
1345 * them all and return -ENOMEM;
1346 */
1347 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
1348 goto out_fail;
1349 1350
1350 new_pages = nr_pages - buffer->pages; 1351 /*
1352 * nothing more to do for removing pages or no update
1353 */
1354 if (cpu_buffer->nr_pages_to_update <= 0)
1355 continue;
1351 1356
1352 for_each_buffer_cpu(buffer, cpu) {
1353 for (i = 0; i < new_pages; i++) {
1354 struct page *page;
1355 /* 1357 /*
1356 * __GFP_NORETRY flag makes sure that the allocation 1358 * to add pages, make sure all new pages can be
1357 * fails gracefully without invoking oom-killer and 1359 * allocated without receiving ENOMEM
1358 * the system is not destabilized.
1359 */ 1360 */
1360 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1361 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1361 cache_line_size()), 1362 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1362 GFP_KERNEL | __GFP_NORETRY, 1363 &cpu_buffer->new_pages, cpu))
1363 cpu_to_node(cpu)); 1364 /* not enough memory for new pages */
1364 if (!bpage) 1365 goto no_mem;
1365 goto free_pages;
1366 list_add(&bpage->list, &pages);
1367 page = alloc_pages_node(cpu_to_node(cpu),
1368 GFP_KERNEL | __GFP_NORETRY, 0);
1369 if (!page)
1370 goto free_pages;
1371 bpage->page = page_address(page);
1372 rb_init_page(bpage->page);
1373 } 1366 }
1374 }
1375 1367
1376 for_each_buffer_cpu(buffer, cpu) { 1368 /* wait for all the updates to complete */
1377 cpu_buffer = buffer->buffers[cpu]; 1369 for_each_buffer_cpu(buffer, cpu) {
1378 rb_insert_pages(cpu_buffer, &pages, new_pages); 1370 cpu_buffer = buffer->buffers[cpu];
1379 } 1371 if (cpu_buffer->nr_pages_to_update) {
1372 update_pages_handler(cpu_buffer);
1373 }
1374 }
1375 } else {
1376 cpu_buffer = buffer->buffers[cpu_id];
1377 if (nr_pages == cpu_buffer->nr_pages)
1378 goto out;
1380 1379
1381 if (RB_WARN_ON(buffer, !list_empty(&pages))) 1380 cpu_buffer->nr_pages_to_update = nr_pages -
1382 goto out_fail; 1381 cpu_buffer->nr_pages;
1382
1383 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1384 if (cpu_buffer->nr_pages_to_update > 0 &&
1385 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1386 &cpu_buffer->new_pages, cpu_id))
1387 goto no_mem;
1388
1389 update_pages_handler(cpu_buffer);
1390 }
1383 1391
1384 out: 1392 out:
1385 buffer->pages = nr_pages;
1386 put_online_cpus(); 1393 put_online_cpus();
1387 mutex_unlock(&buffer->mutex); 1394 mutex_unlock(&buffer->mutex);
1388 1395
@@ -1390,25 +1397,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1390 1397
1391 return size; 1398 return size;
1392 1399
1393 free_pages: 1400 no_mem:
1394 list_for_each_entry_safe(bpage, tmp, &pages, list) { 1401 for_each_buffer_cpu(buffer, cpu) {
1395 list_del_init(&bpage->list); 1402 struct buffer_page *bpage, *tmp;
1396 free_buffer_page(bpage); 1403 cpu_buffer = buffer->buffers[cpu];
1404 /* reset this number regardless */
1405 cpu_buffer->nr_pages_to_update = 0;
1406 if (list_empty(&cpu_buffer->new_pages))
1407 continue;
1408 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1409 list) {
1410 list_del_init(&bpage->list);
1411 free_buffer_page(bpage);
1412 }
1397 } 1413 }
1398 put_online_cpus(); 1414 put_online_cpus();
1399 mutex_unlock(&buffer->mutex); 1415 mutex_unlock(&buffer->mutex);
1400 atomic_dec(&buffer->record_disabled); 1416 atomic_dec(&buffer->record_disabled);
1401 return -ENOMEM; 1417 return -ENOMEM;
1402
1403 /*
1404 * Something went totally wrong, and we are too paranoid
1405 * to even clean up the mess.
1406 */
1407 out_fail:
1408 put_online_cpus();
1409 mutex_unlock(&buffer->mutex);
1410 atomic_dec(&buffer->record_disabled);
1411 return -1;
1412} 1418}
1413EXPORT_SYMBOL_GPL(ring_buffer_resize); 1419EXPORT_SYMBOL_GPL(ring_buffer_resize);
1414 1420
@@ -1510,7 +1516,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1510 * assign the commit to the tail. 1516 * assign the commit to the tail.
1511 */ 1517 */
1512 again: 1518 again:
1513 max_count = cpu_buffer->buffer->pages * 100; 1519 max_count = cpu_buffer->nr_pages * 100;
1514 1520
1515 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1521 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1516 if (RB_WARN_ON(cpu_buffer, !(--max_count))) 1522 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3588,9 +3594,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
3588 * ring_buffer_size - return the size of the ring buffer (in bytes) 3594 * ring_buffer_size - return the size of the ring buffer (in bytes)
3589 * @buffer: The ring buffer. 3595 * @buffer: The ring buffer.
3590 */ 3596 */
3591unsigned long ring_buffer_size(struct ring_buffer *buffer) 3597unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
3592{ 3598{
3593 return BUF_PAGE_SIZE * buffer->pages; 3599 /*
3600 * Earlier, this method returned
3601 * BUF_PAGE_SIZE * buffer->nr_pages
3602 * Since the nr_pages field is now removed, we have converted this to
3603 * return the per cpu buffer value.
3604 */
3605 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3606 return 0;
3607
3608 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3594} 3609}
3595EXPORT_SYMBOL_GPL(ring_buffer_size); 3610EXPORT_SYMBOL_GPL(ring_buffer_size);
3596 3611
@@ -3765,8 +3780,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3765 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 3780 !cpumask_test_cpu(cpu, buffer_b->cpumask))
3766 goto out; 3781 goto out;
3767 3782
3783 cpu_buffer_a = buffer_a->buffers[cpu];
3784 cpu_buffer_b = buffer_b->buffers[cpu];
3785
3768 /* At least make sure the two buffers are somewhat the same */ 3786 /* At least make sure the two buffers are somewhat the same */
3769 if (buffer_a->pages != buffer_b->pages) 3787 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
3770 goto out; 3788 goto out;
3771 3789
3772 ret = -EAGAIN; 3790 ret = -EAGAIN;
@@ -3780,9 +3798,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3780 if (atomic_read(&buffer_b->record_disabled)) 3798 if (atomic_read(&buffer_b->record_disabled))
3781 goto out; 3799 goto out;
3782 3800
3783 cpu_buffer_a = buffer_a->buffers[cpu];
3784 cpu_buffer_b = buffer_b->buffers[cpu];
3785
3786 if (atomic_read(&cpu_buffer_a->record_disabled)) 3801 if (atomic_read(&cpu_buffer_a->record_disabled))
3787 goto out; 3802 goto out;
3788 3803
@@ -4071,6 +4086,8 @@ static int rb_cpu_notify(struct notifier_block *self,
4071 struct ring_buffer *buffer = 4086 struct ring_buffer *buffer =
4072 container_of(self, struct ring_buffer, cpu_notify); 4087 container_of(self, struct ring_buffer, cpu_notify);
4073 long cpu = (long)hcpu; 4088 long cpu = (long)hcpu;
4089 int cpu_i, nr_pages_same;
4090 unsigned int nr_pages;
4074 4091
4075 switch (action) { 4092 switch (action) {
4076 case CPU_UP_PREPARE: 4093 case CPU_UP_PREPARE:
@@ -4078,8 +4095,23 @@ static int rb_cpu_notify(struct notifier_block *self,
4078 if (cpumask_test_cpu(cpu, buffer->cpumask)) 4095 if (cpumask_test_cpu(cpu, buffer->cpumask))
4079 return NOTIFY_OK; 4096 return NOTIFY_OK;
4080 4097
4098 nr_pages = 0;
4099 nr_pages_same = 1;
4100 /* check if all cpu sizes are same */
4101 for_each_buffer_cpu(buffer, cpu_i) {
4102 /* fill in the size from first enabled cpu */
4103 if (nr_pages == 0)
4104 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4105 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4106 nr_pages_same = 0;
4107 break;
4108 }
4109 }
4110 /* allocate minimum pages, user can later expand it */
4111 if (!nr_pages_same)
4112 nr_pages = 2;
4081 buffer->buffers[cpu] = 4113 buffer->buffers[cpu] =
4082 rb_allocate_cpu_buffer(buffer, cpu); 4114 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4083 if (!buffer->buffers[cpu]) { 4115 if (!buffer->buffers[cpu]) {
4084 WARN(1, "failed to allocate ring buffer on CPU %ld\n", 4116 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4085 cpu); 4117 cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed7b5d1e12f4..48ef4960ec90 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,18 +87,6 @@ static int tracing_disabled = 1;
87 87
88DEFINE_PER_CPU(int, ftrace_cpu_disabled); 88DEFINE_PER_CPU(int, ftrace_cpu_disabled);
89 89
90static inline void ftrace_disable_cpu(void)
91{
92 preempt_disable();
93 __this_cpu_inc(ftrace_cpu_disabled);
94}
95
96static inline void ftrace_enable_cpu(void)
97{
98 __this_cpu_dec(ftrace_cpu_disabled);
99 preempt_enable();
100}
101
102cpumask_var_t __read_mostly tracing_buffer_mask; 90cpumask_var_t __read_mostly tracing_buffer_mask;
103 91
104/* 92/*
@@ -629,7 +617,6 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
629static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 617static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
630{ 618{
631 int len; 619 int len;
632 void *ret;
633 620
634 if (s->len <= s->readpos) 621 if (s->len <= s->readpos)
635 return -EBUSY; 622 return -EBUSY;
@@ -637,9 +624,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
637 len = s->len - s->readpos; 624 len = s->len - s->readpos;
638 if (cnt > len) 625 if (cnt > len)
639 cnt = len; 626 cnt = len;
640 ret = memcpy(buf, s->buffer + s->readpos, cnt); 627 memcpy(buf, s->buffer + s->readpos, cnt);
641 if (!ret)
642 return -EFAULT;
643 628
644 s->readpos += cnt; 629 s->readpos += cnt;
645 return cnt; 630 return cnt;
@@ -751,8 +736,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
751 736
752 arch_spin_lock(&ftrace_max_lock); 737 arch_spin_lock(&ftrace_max_lock);
753 738
754 ftrace_disable_cpu();
755
756 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 739 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
757 740
758 if (ret == -EBUSY) { 741 if (ret == -EBUSY) {
@@ -766,8 +749,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
766 "Failed to swap buffers due to commit in progress\n"); 749 "Failed to swap buffers due to commit in progress\n");
767 } 750 }
768 751
769 ftrace_enable_cpu();
770
771 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 752 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
772 753
773 __update_max_tr(tr, tsk, cpu); 754 __update_max_tr(tr, tsk, cpu);
@@ -841,7 +822,8 @@ __acquires(kernel_lock)
841 822
842 /* If we expanded the buffers, make sure the max is expanded too */ 823 /* If we expanded the buffers, make sure the max is expanded too */
843 if (ring_buffer_expanded && type->use_max_tr) 824 if (ring_buffer_expanded && type->use_max_tr)
844 ring_buffer_resize(max_tr.buffer, trace_buf_size); 825 ring_buffer_resize(max_tr.buffer, trace_buf_size,
826 RING_BUFFER_ALL_CPUS);
845 827
846 /* the test is responsible for initializing and enabling */ 828 /* the test is responsible for initializing and enabling */
847 pr_info("Testing tracer %s: ", type->name); 829 pr_info("Testing tracer %s: ", type->name);
@@ -857,7 +839,8 @@ __acquires(kernel_lock)
857 839
858 /* Shrink the max buffer again */ 840 /* Shrink the max buffer again */
859 if (ring_buffer_expanded && type->use_max_tr) 841 if (ring_buffer_expanded && type->use_max_tr)
860 ring_buffer_resize(max_tr.buffer, 1); 842 ring_buffer_resize(max_tr.buffer, 1,
843 RING_BUFFER_ALL_CPUS);
861 844
862 printk(KERN_CONT "PASSED\n"); 845 printk(KERN_CONT "PASSED\n");
863 } 846 }
@@ -917,13 +900,6 @@ out:
917 mutex_unlock(&trace_types_lock); 900 mutex_unlock(&trace_types_lock);
918} 901}
919 902
920static void __tracing_reset(struct ring_buffer *buffer, int cpu)
921{
922 ftrace_disable_cpu();
923 ring_buffer_reset_cpu(buffer, cpu);
924 ftrace_enable_cpu();
925}
926
927void tracing_reset(struct trace_array *tr, int cpu) 903void tracing_reset(struct trace_array *tr, int cpu)
928{ 904{
929 struct ring_buffer *buffer = tr->buffer; 905 struct ring_buffer *buffer = tr->buffer;
@@ -932,7 +908,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
932 908
933 /* Make sure all commits have finished */ 909 /* Make sure all commits have finished */
934 synchronize_sched(); 910 synchronize_sched();
935 __tracing_reset(buffer, cpu); 911 ring_buffer_reset_cpu(buffer, cpu);
936 912
937 ring_buffer_record_enable(buffer); 913 ring_buffer_record_enable(buffer);
938} 914}
@@ -950,7 +926,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
950 tr->time_start = ftrace_now(tr->cpu); 926 tr->time_start = ftrace_now(tr->cpu);
951 927
952 for_each_online_cpu(cpu) 928 for_each_online_cpu(cpu)
953 __tracing_reset(buffer, cpu); 929 ring_buffer_reset_cpu(buffer, cpu);
954 930
955 ring_buffer_record_enable(buffer); 931 ring_buffer_record_enable(buffer);
956} 932}
@@ -1498,25 +1474,119 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1498 1474
1499#endif /* CONFIG_STACKTRACE */ 1475#endif /* CONFIG_STACKTRACE */
1500 1476
1477/* created for use with alloc_percpu */
1478struct trace_buffer_struct {
1479 char buffer[TRACE_BUF_SIZE];
1480};
1481
1482static struct trace_buffer_struct *trace_percpu_buffer;
1483static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1484static struct trace_buffer_struct *trace_percpu_irq_buffer;
1485static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1486
1487/*
1488 * The buffer used is dependent on the context. There is a per cpu
1489 * buffer for normal context, softirq contex, hard irq context and
1490 * for NMI context. Thise allows for lockless recording.
1491 *
1492 * Note, if the buffers failed to be allocated, then this returns NULL
1493 */
1494static char *get_trace_buf(void)
1495{
1496 struct trace_buffer_struct *percpu_buffer;
1497 struct trace_buffer_struct *buffer;
1498
1499 /*
1500 * If we have allocated per cpu buffers, then we do not
1501 * need to do any locking.
1502 */
1503 if (in_nmi())
1504 percpu_buffer = trace_percpu_nmi_buffer;
1505 else if (in_irq())
1506 percpu_buffer = trace_percpu_irq_buffer;
1507 else if (in_softirq())
1508 percpu_buffer = trace_percpu_sirq_buffer;
1509 else
1510 percpu_buffer = trace_percpu_buffer;
1511
1512 if (!percpu_buffer)
1513 return NULL;
1514
1515 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
1516
1517 return buffer->buffer;
1518}
1519
1520static int alloc_percpu_trace_buffer(void)
1521{
1522 struct trace_buffer_struct *buffers;
1523 struct trace_buffer_struct *sirq_buffers;
1524 struct trace_buffer_struct *irq_buffers;
1525 struct trace_buffer_struct *nmi_buffers;
1526
1527 buffers = alloc_percpu(struct trace_buffer_struct);
1528 if (!buffers)
1529 goto err_warn;
1530
1531 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1532 if (!sirq_buffers)
1533 goto err_sirq;
1534
1535 irq_buffers = alloc_percpu(struct trace_buffer_struct);
1536 if (!irq_buffers)
1537 goto err_irq;
1538
1539 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1540 if (!nmi_buffers)
1541 goto err_nmi;
1542
1543 trace_percpu_buffer = buffers;
1544 trace_percpu_sirq_buffer = sirq_buffers;
1545 trace_percpu_irq_buffer = irq_buffers;
1546 trace_percpu_nmi_buffer = nmi_buffers;
1547
1548 return 0;
1549
1550 err_nmi:
1551 free_percpu(irq_buffers);
1552 err_irq:
1553 free_percpu(sirq_buffers);
1554 err_sirq:
1555 free_percpu(buffers);
1556 err_warn:
1557 WARN(1, "Could not allocate percpu trace_printk buffer");
1558 return -ENOMEM;
1559}
1560
1561void trace_printk_init_buffers(void)
1562{
1563 static int buffers_allocated;
1564
1565 if (buffers_allocated)
1566 return;
1567
1568 if (alloc_percpu_trace_buffer())
1569 return;
1570
1571 pr_info("ftrace: Allocated trace_printk buffers\n");
1572
1573 buffers_allocated = 1;
1574}
1575
1501/** 1576/**
1502 * trace_vbprintk - write binary msg to tracing buffer 1577 * trace_vbprintk - write binary msg to tracing buffer
1503 * 1578 *
1504 */ 1579 */
1505int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1580int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1506{ 1581{
1507 static arch_spinlock_t trace_buf_lock =
1508 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1509 static u32 trace_buf[TRACE_BUF_SIZE];
1510
1511 struct ftrace_event_call *call = &event_bprint; 1582 struct ftrace_event_call *call = &event_bprint;
1512 struct ring_buffer_event *event; 1583 struct ring_buffer_event *event;
1513 struct ring_buffer *buffer; 1584 struct ring_buffer *buffer;
1514 struct trace_array *tr = &global_trace; 1585 struct trace_array *tr = &global_trace;
1515 struct trace_array_cpu *data;
1516 struct bprint_entry *entry; 1586 struct bprint_entry *entry;
1517 unsigned long flags; 1587 unsigned long flags;
1518 int disable; 1588 char *tbuffer;
1519 int cpu, len = 0, size, pc; 1589 int len = 0, size, pc;
1520 1590
1521 if (unlikely(tracing_selftest_running || tracing_disabled)) 1591 if (unlikely(tracing_selftest_running || tracing_disabled))
1522 return 0; 1592 return 0;
@@ -1526,43 +1596,36 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1526 1596
1527 pc = preempt_count(); 1597 pc = preempt_count();
1528 preempt_disable_notrace(); 1598 preempt_disable_notrace();
1529 cpu = raw_smp_processor_id();
1530 data = tr->data[cpu];
1531 1599
1532 disable = atomic_inc_return(&data->disabled); 1600 tbuffer = get_trace_buf();
1533 if (unlikely(disable != 1)) 1601 if (!tbuffer) {
1602 len = 0;
1534 goto out; 1603 goto out;
1604 }
1535 1605
1536 /* Lockdep uses trace_printk for lock tracing */ 1606 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1537 local_irq_save(flags);
1538 arch_spin_lock(&trace_buf_lock);
1539 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1540 1607
1541 if (len > TRACE_BUF_SIZE || len < 0) 1608 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1542 goto out_unlock; 1609 goto out;
1543 1610
1611 local_save_flags(flags);
1544 size = sizeof(*entry) + sizeof(u32) * len; 1612 size = sizeof(*entry) + sizeof(u32) * len;
1545 buffer = tr->buffer; 1613 buffer = tr->buffer;
1546 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 1614 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1547 flags, pc); 1615 flags, pc);
1548 if (!event) 1616 if (!event)
1549 goto out_unlock; 1617 goto out;
1550 entry = ring_buffer_event_data(event); 1618 entry = ring_buffer_event_data(event);
1551 entry->ip = ip; 1619 entry->ip = ip;
1552 entry->fmt = fmt; 1620 entry->fmt = fmt;
1553 1621
1554 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1622 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
1555 if (!filter_check_discard(call, entry, buffer, event)) { 1623 if (!filter_check_discard(call, entry, buffer, event)) {
1556 ring_buffer_unlock_commit(buffer, event); 1624 ring_buffer_unlock_commit(buffer, event);
1557 ftrace_trace_stack(buffer, flags, 6, pc); 1625 ftrace_trace_stack(buffer, flags, 6, pc);
1558 } 1626 }
1559 1627
1560out_unlock:
1561 arch_spin_unlock(&trace_buf_lock);
1562 local_irq_restore(flags);
1563
1564out: 1628out:
1565 atomic_dec_return(&data->disabled);
1566 preempt_enable_notrace(); 1629 preempt_enable_notrace();
1567 unpause_graph_tracing(); 1630 unpause_graph_tracing();
1568 1631
@@ -1588,58 +1651,53 @@ int trace_array_printk(struct trace_array *tr,
1588int trace_array_vprintk(struct trace_array *tr, 1651int trace_array_vprintk(struct trace_array *tr,
1589 unsigned long ip, const char *fmt, va_list args) 1652 unsigned long ip, const char *fmt, va_list args)
1590{ 1653{
1591 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1592 static char trace_buf[TRACE_BUF_SIZE];
1593
1594 struct ftrace_event_call *call = &event_print; 1654 struct ftrace_event_call *call = &event_print;
1595 struct ring_buffer_event *event; 1655 struct ring_buffer_event *event;
1596 struct ring_buffer *buffer; 1656 struct ring_buffer *buffer;
1597 struct trace_array_cpu *data; 1657 int len = 0, size, pc;
1598 int cpu, len = 0, size, pc;
1599 struct print_entry *entry; 1658 struct print_entry *entry;
1600 unsigned long irq_flags; 1659 unsigned long flags;
1601 int disable; 1660 char *tbuffer;
1602 1661
1603 if (tracing_disabled || tracing_selftest_running) 1662 if (tracing_disabled || tracing_selftest_running)
1604 return 0; 1663 return 0;
1605 1664
1665 /* Don't pollute graph traces with trace_vprintk internals */
1666 pause_graph_tracing();
1667
1606 pc = preempt_count(); 1668 pc = preempt_count();
1607 preempt_disable_notrace(); 1669 preempt_disable_notrace();
1608 cpu = raw_smp_processor_id();
1609 data = tr->data[cpu];
1610 1670
1611 disable = atomic_inc_return(&data->disabled); 1671
1612 if (unlikely(disable != 1)) 1672 tbuffer = get_trace_buf();
1673 if (!tbuffer) {
1674 len = 0;
1613 goto out; 1675 goto out;
1676 }
1614 1677
1615 pause_graph_tracing(); 1678 len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
1616 raw_local_irq_save(irq_flags); 1679 if (len > TRACE_BUF_SIZE)
1617 arch_spin_lock(&trace_buf_lock); 1680 goto out;
1618 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1619 1681
1682 local_save_flags(flags);
1620 size = sizeof(*entry) + len + 1; 1683 size = sizeof(*entry) + len + 1;
1621 buffer = tr->buffer; 1684 buffer = tr->buffer;
1622 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1685 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1623 irq_flags, pc); 1686 flags, pc);
1624 if (!event) 1687 if (!event)
1625 goto out_unlock; 1688 goto out;
1626 entry = ring_buffer_event_data(event); 1689 entry = ring_buffer_event_data(event);
1627 entry->ip = ip; 1690 entry->ip = ip;
1628 1691
1629 memcpy(&entry->buf, trace_buf, len); 1692 memcpy(&entry->buf, tbuffer, len);
1630 entry->buf[len] = '\0'; 1693 entry->buf[len] = '\0';
1631 if (!filter_check_discard(call, entry, buffer, event)) { 1694 if (!filter_check_discard(call, entry, buffer, event)) {
1632 ring_buffer_unlock_commit(buffer, event); 1695 ring_buffer_unlock_commit(buffer, event);
1633 ftrace_trace_stack(buffer, irq_flags, 6, pc); 1696 ftrace_trace_stack(buffer, flags, 6, pc);
1634 } 1697 }
1635
1636 out_unlock:
1637 arch_spin_unlock(&trace_buf_lock);
1638 raw_local_irq_restore(irq_flags);
1639 unpause_graph_tracing();
1640 out: 1698 out:
1641 atomic_dec_return(&data->disabled);
1642 preempt_enable_notrace(); 1699 preempt_enable_notrace();
1700 unpause_graph_tracing();
1643 1701
1644 return len; 1702 return len;
1645} 1703}
@@ -1652,14 +1710,9 @@ EXPORT_SYMBOL_GPL(trace_vprintk);
1652 1710
1653static void trace_iterator_increment(struct trace_iterator *iter) 1711static void trace_iterator_increment(struct trace_iterator *iter)
1654{ 1712{
1655 /* Don't allow ftrace to trace into the ring buffers */
1656 ftrace_disable_cpu();
1657
1658 iter->idx++; 1713 iter->idx++;
1659 if (iter->buffer_iter[iter->cpu]) 1714 if (iter->buffer_iter[iter->cpu])
1660 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 1715 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1661
1662 ftrace_enable_cpu();
1663} 1716}
1664 1717
1665static struct trace_entry * 1718static struct trace_entry *
@@ -1669,17 +1722,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1669 struct ring_buffer_event *event; 1722 struct ring_buffer_event *event;
1670 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 1723 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1671 1724
1672 /* Don't allow ftrace to trace into the ring buffers */
1673 ftrace_disable_cpu();
1674
1675 if (buf_iter) 1725 if (buf_iter)
1676 event = ring_buffer_iter_peek(buf_iter, ts); 1726 event = ring_buffer_iter_peek(buf_iter, ts);
1677 else 1727 else
1678 event = ring_buffer_peek(iter->tr->buffer, cpu, ts, 1728 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1679 lost_events); 1729 lost_events);
1680 1730
1681 ftrace_enable_cpu();
1682
1683 if (event) { 1731 if (event) {
1684 iter->ent_size = ring_buffer_event_length(event); 1732 iter->ent_size = ring_buffer_event_length(event);
1685 return ring_buffer_event_data(event); 1733 return ring_buffer_event_data(event);
@@ -1769,11 +1817,8 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter)
1769 1817
1770static void trace_consume(struct trace_iterator *iter) 1818static void trace_consume(struct trace_iterator *iter)
1771{ 1819{
1772 /* Don't allow ftrace to trace into the ring buffers */
1773 ftrace_disable_cpu();
1774 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, 1820 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1775 &iter->lost_events); 1821 &iter->lost_events);
1776 ftrace_enable_cpu();
1777} 1822}
1778 1823
1779static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1824static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1862,16 +1907,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1862 iter->cpu = 0; 1907 iter->cpu = 0;
1863 iter->idx = -1; 1908 iter->idx = -1;
1864 1909
1865 ftrace_disable_cpu();
1866
1867 if (cpu_file == TRACE_PIPE_ALL_CPU) { 1910 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1868 for_each_tracing_cpu(cpu) 1911 for_each_tracing_cpu(cpu)
1869 tracing_iter_reset(iter, cpu); 1912 tracing_iter_reset(iter, cpu);
1870 } else 1913 } else
1871 tracing_iter_reset(iter, cpu_file); 1914 tracing_iter_reset(iter, cpu_file);
1872 1915
1873 ftrace_enable_cpu();
1874
1875 iter->leftover = 0; 1916 iter->leftover = 0;
1876 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1917 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1877 ; 1918 ;
@@ -2332,15 +2373,13 @@ static struct trace_iterator *
2332__tracing_open(struct inode *inode, struct file *file) 2373__tracing_open(struct inode *inode, struct file *file)
2333{ 2374{
2334 long cpu_file = (long) inode->i_private; 2375 long cpu_file = (long) inode->i_private;
2335 void *fail_ret = ERR_PTR(-ENOMEM);
2336 struct trace_iterator *iter; 2376 struct trace_iterator *iter;
2337 struct seq_file *m; 2377 int cpu;
2338 int cpu, ret;
2339 2378
2340 if (tracing_disabled) 2379 if (tracing_disabled)
2341 return ERR_PTR(-ENODEV); 2380 return ERR_PTR(-ENODEV);
2342 2381
2343 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2382 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2344 if (!iter) 2383 if (!iter)
2345 return ERR_PTR(-ENOMEM); 2384 return ERR_PTR(-ENOMEM);
2346 2385
@@ -2397,32 +2436,15 @@ __tracing_open(struct inode *inode, struct file *file)
2397 tracing_iter_reset(iter, cpu); 2436 tracing_iter_reset(iter, cpu);
2398 } 2437 }
2399 2438
2400 ret = seq_open(file, &tracer_seq_ops);
2401 if (ret < 0) {
2402 fail_ret = ERR_PTR(ret);
2403 goto fail_buffer;
2404 }
2405
2406 m = file->private_data;
2407 m->private = iter;
2408
2409 mutex_unlock(&trace_types_lock); 2439 mutex_unlock(&trace_types_lock);
2410 2440
2411 return iter; 2441 return iter;
2412 2442
2413 fail_buffer:
2414 for_each_tracing_cpu(cpu) {
2415 if (iter->buffer_iter[cpu])
2416 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2417 }
2418 free_cpumask_var(iter->started);
2419 tracing_start();
2420 fail: 2443 fail:
2421 mutex_unlock(&trace_types_lock); 2444 mutex_unlock(&trace_types_lock);
2422 kfree(iter->trace); 2445 kfree(iter->trace);
2423 kfree(iter); 2446 seq_release_private(inode, file);
2424 2447 return ERR_PTR(-ENOMEM);
2425 return fail_ret;
2426} 2448}
2427 2449
2428int tracing_open_generic(struct inode *inode, struct file *filp) 2450int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2458,11 +2480,10 @@ static int tracing_release(struct inode *inode, struct file *file)
2458 tracing_start(); 2480 tracing_start();
2459 mutex_unlock(&trace_types_lock); 2481 mutex_unlock(&trace_types_lock);
2460 2482
2461 seq_release(inode, file);
2462 mutex_destroy(&iter->mutex); 2483 mutex_destroy(&iter->mutex);
2463 free_cpumask_var(iter->started); 2484 free_cpumask_var(iter->started);
2464 kfree(iter->trace); 2485 kfree(iter->trace);
2465 kfree(iter); 2486 seq_release_private(inode, file);
2466 return 0; 2487 return 0;
2467} 2488}
2468 2489
@@ -2974,7 +2995,14 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
2974 return t->init(tr); 2995 return t->init(tr);
2975} 2996}
2976 2997
2977static int __tracing_resize_ring_buffer(unsigned long size) 2998static void set_buffer_entries(struct trace_array *tr, unsigned long val)
2999{
3000 int cpu;
3001 for_each_tracing_cpu(cpu)
3002 tr->data[cpu]->entries = val;
3003}
3004
3005static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
2978{ 3006{
2979 int ret; 3007 int ret;
2980 3008
@@ -2985,19 +3013,32 @@ static int __tracing_resize_ring_buffer(unsigned long size)
2985 */ 3013 */
2986 ring_buffer_expanded = 1; 3014 ring_buffer_expanded = 1;
2987 3015
2988 ret = ring_buffer_resize(global_trace.buffer, size); 3016 ret = ring_buffer_resize(global_trace.buffer, size, cpu);
2989 if (ret < 0) 3017 if (ret < 0)
2990 return ret; 3018 return ret;
2991 3019
2992 if (!current_trace->use_max_tr) 3020 if (!current_trace->use_max_tr)
2993 goto out; 3021 goto out;
2994 3022
2995 ret = ring_buffer_resize(max_tr.buffer, size); 3023 ret = ring_buffer_resize(max_tr.buffer, size, cpu);
2996 if (ret < 0) { 3024 if (ret < 0) {
2997 int r; 3025 int r = 0;
3026
3027 if (cpu == RING_BUFFER_ALL_CPUS) {
3028 int i;
3029 for_each_tracing_cpu(i) {
3030 r = ring_buffer_resize(global_trace.buffer,
3031 global_trace.data[i]->entries,
3032 i);
3033 if (r < 0)
3034 break;
3035 }
3036 } else {
3037 r = ring_buffer_resize(global_trace.buffer,
3038 global_trace.data[cpu]->entries,
3039 cpu);
3040 }
2998 3041
2999 r = ring_buffer_resize(global_trace.buffer,
3000 global_trace.entries);
3001 if (r < 0) { 3042 if (r < 0) {
3002 /* 3043 /*
3003 * AARGH! We are left with different 3044 * AARGH! We are left with different
@@ -3019,14 +3060,21 @@ static int __tracing_resize_ring_buffer(unsigned long size)
3019 return ret; 3060 return ret;
3020 } 3061 }
3021 3062
3022 max_tr.entries = size; 3063 if (cpu == RING_BUFFER_ALL_CPUS)
3064 set_buffer_entries(&max_tr, size);
3065 else
3066 max_tr.data[cpu]->entries = size;
3067
3023 out: 3068 out:
3024 global_trace.entries = size; 3069 if (cpu == RING_BUFFER_ALL_CPUS)
3070 set_buffer_entries(&global_trace, size);
3071 else
3072 global_trace.data[cpu]->entries = size;
3025 3073
3026 return ret; 3074 return ret;
3027} 3075}
3028 3076
3029static ssize_t tracing_resize_ring_buffer(unsigned long size) 3077static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
3030{ 3078{
3031 int cpu, ret = size; 3079 int cpu, ret = size;
3032 3080
@@ -3042,12 +3090,19 @@ static ssize_t tracing_resize_ring_buffer(unsigned long size)
3042 atomic_inc(&max_tr.data[cpu]->disabled); 3090 atomic_inc(&max_tr.data[cpu]->disabled);
3043 } 3091 }
3044 3092
3045 if (size != global_trace.entries) 3093 if (cpu_id != RING_BUFFER_ALL_CPUS) {
3046 ret = __tracing_resize_ring_buffer(size); 3094 /* make sure, this cpu is enabled in the mask */
3095 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3096 ret = -EINVAL;
3097 goto out;
3098 }
3099 }
3047 3100
3101 ret = __tracing_resize_ring_buffer(size, cpu_id);
3048 if (ret < 0) 3102 if (ret < 0)
3049 ret = -ENOMEM; 3103 ret = -ENOMEM;
3050 3104
3105out:
3051 for_each_tracing_cpu(cpu) { 3106 for_each_tracing_cpu(cpu) {
3052 if (global_trace.data[cpu]) 3107 if (global_trace.data[cpu])
3053 atomic_dec(&global_trace.data[cpu]->disabled); 3108 atomic_dec(&global_trace.data[cpu]->disabled);
@@ -3078,7 +3133,8 @@ int tracing_update_buffers(void)
3078 3133
3079 mutex_lock(&trace_types_lock); 3134 mutex_lock(&trace_types_lock);
3080 if (!ring_buffer_expanded) 3135 if (!ring_buffer_expanded)
3081 ret = __tracing_resize_ring_buffer(trace_buf_size); 3136 ret = __tracing_resize_ring_buffer(trace_buf_size,
3137 RING_BUFFER_ALL_CPUS);
3082 mutex_unlock(&trace_types_lock); 3138 mutex_unlock(&trace_types_lock);
3083 3139
3084 return ret; 3140 return ret;
@@ -3102,7 +3158,8 @@ static int tracing_set_tracer(const char *buf)
3102 mutex_lock(&trace_types_lock); 3158 mutex_lock(&trace_types_lock);
3103 3159
3104 if (!ring_buffer_expanded) { 3160 if (!ring_buffer_expanded) {
3105 ret = __tracing_resize_ring_buffer(trace_buf_size); 3161 ret = __tracing_resize_ring_buffer(trace_buf_size,
3162 RING_BUFFER_ALL_CPUS);
3106 if (ret < 0) 3163 if (ret < 0)
3107 goto out; 3164 goto out;
3108 ret = 0; 3165 ret = 0;
@@ -3128,8 +3185,8 @@ static int tracing_set_tracer(const char *buf)
3128 * The max_tr ring buffer has some state (e.g. ring->clock) and 3185 * The max_tr ring buffer has some state (e.g. ring->clock) and
3129 * we want preserve it. 3186 * we want preserve it.
3130 */ 3187 */
3131 ring_buffer_resize(max_tr.buffer, 1); 3188 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
3132 max_tr.entries = 1; 3189 set_buffer_entries(&max_tr, 1);
3133 } 3190 }
3134 destroy_trace_option_files(topts); 3191 destroy_trace_option_files(topts);
3135 3192
@@ -3137,10 +3194,17 @@ static int tracing_set_tracer(const char *buf)
3137 3194
3138 topts = create_trace_option_files(current_trace); 3195 topts = create_trace_option_files(current_trace);
3139 if (current_trace->use_max_tr) { 3196 if (current_trace->use_max_tr) {
3140 ret = ring_buffer_resize(max_tr.buffer, global_trace.entries); 3197 int cpu;
3141 if (ret < 0) 3198 /* we need to make per cpu buffer sizes equivalent */
3142 goto out; 3199 for_each_tracing_cpu(cpu) {
3143 max_tr.entries = global_trace.entries; 3200 ret = ring_buffer_resize(max_tr.buffer,
3201 global_trace.data[cpu]->entries,
3202 cpu);
3203 if (ret < 0)
3204 goto out;
3205 max_tr.data[cpu]->entries =
3206 global_trace.data[cpu]->entries;
3207 }
3144 } 3208 }
3145 3209
3146 if (t->init) { 3210 if (t->init) {
@@ -3642,30 +3706,82 @@ out_err:
3642 goto out; 3706 goto out;
3643} 3707}
3644 3708
3709struct ftrace_entries_info {
3710 struct trace_array *tr;
3711 int cpu;
3712};
3713
3714static int tracing_entries_open(struct inode *inode, struct file *filp)
3715{
3716 struct ftrace_entries_info *info;
3717
3718 if (tracing_disabled)
3719 return -ENODEV;
3720
3721 info = kzalloc(sizeof(*info), GFP_KERNEL);
3722 if (!info)
3723 return -ENOMEM;
3724
3725 info->tr = &global_trace;
3726 info->cpu = (unsigned long)inode->i_private;
3727
3728 filp->private_data = info;
3729
3730 return 0;
3731}
3732
3645static ssize_t 3733static ssize_t
3646tracing_entries_read(struct file *filp, char __user *ubuf, 3734tracing_entries_read(struct file *filp, char __user *ubuf,
3647 size_t cnt, loff_t *ppos) 3735 size_t cnt, loff_t *ppos)
3648{ 3736{
3649 struct trace_array *tr = filp->private_data; 3737 struct ftrace_entries_info *info = filp->private_data;
3650 char buf[96]; 3738 struct trace_array *tr = info->tr;
3651 int r; 3739 char buf[64];
3740 int r = 0;
3741 ssize_t ret;
3652 3742
3653 mutex_lock(&trace_types_lock); 3743 mutex_lock(&trace_types_lock);
3654 if (!ring_buffer_expanded) 3744
3655 r = sprintf(buf, "%lu (expanded: %lu)\n", 3745 if (info->cpu == RING_BUFFER_ALL_CPUS) {
3656 tr->entries >> 10, 3746 int cpu, buf_size_same;
3657 trace_buf_size >> 10); 3747 unsigned long size;
3658 else 3748
3659 r = sprintf(buf, "%lu\n", tr->entries >> 10); 3749 size = 0;
3750 buf_size_same = 1;
3751 /* check if all cpu sizes are same */
3752 for_each_tracing_cpu(cpu) {
3753 /* fill in the size from first enabled cpu */
3754 if (size == 0)
3755 size = tr->data[cpu]->entries;
3756 if (size != tr->data[cpu]->entries) {
3757 buf_size_same = 0;
3758 break;
3759 }
3760 }
3761
3762 if (buf_size_same) {
3763 if (!ring_buffer_expanded)
3764 r = sprintf(buf, "%lu (expanded: %lu)\n",
3765 size >> 10,
3766 trace_buf_size >> 10);
3767 else
3768 r = sprintf(buf, "%lu\n", size >> 10);
3769 } else
3770 r = sprintf(buf, "X\n");
3771 } else
3772 r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
3773
3660 mutex_unlock(&trace_types_lock); 3774 mutex_unlock(&trace_types_lock);
3661 3775
3662 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3776 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3777 return ret;
3663} 3778}
3664 3779
3665static ssize_t 3780static ssize_t
3666tracing_entries_write(struct file *filp, const char __user *ubuf, 3781tracing_entries_write(struct file *filp, const char __user *ubuf,
3667 size_t cnt, loff_t *ppos) 3782 size_t cnt, loff_t *ppos)
3668{ 3783{
3784 struct ftrace_entries_info *info = filp->private_data;
3669 unsigned long val; 3785 unsigned long val;
3670 int ret; 3786 int ret;
3671 3787
@@ -3680,7 +3796,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3680 /* value is in KB */ 3796 /* value is in KB */
3681 val <<= 10; 3797 val <<= 10;
3682 3798
3683 ret = tracing_resize_ring_buffer(val); 3799 ret = tracing_resize_ring_buffer(val, info->cpu);
3684 if (ret < 0) 3800 if (ret < 0)
3685 return ret; 3801 return ret;
3686 3802
@@ -3689,6 +3805,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3689 return cnt; 3805 return cnt;
3690} 3806}
3691 3807
3808static int
3809tracing_entries_release(struct inode *inode, struct file *filp)
3810{
3811 struct ftrace_entries_info *info = filp->private_data;
3812
3813 kfree(info);
3814
3815 return 0;
3816}
3817
3692static ssize_t 3818static ssize_t
3693tracing_total_entries_read(struct file *filp, char __user *ubuf, 3819tracing_total_entries_read(struct file *filp, char __user *ubuf,
3694 size_t cnt, loff_t *ppos) 3820 size_t cnt, loff_t *ppos)
@@ -3700,7 +3826,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
3700 3826
3701 mutex_lock(&trace_types_lock); 3827 mutex_lock(&trace_types_lock);
3702 for_each_tracing_cpu(cpu) { 3828 for_each_tracing_cpu(cpu) {
3703 size += tr->entries >> 10; 3829 size += tr->data[cpu]->entries >> 10;
3704 if (!ring_buffer_expanded) 3830 if (!ring_buffer_expanded)
3705 expanded_size += trace_buf_size >> 10; 3831 expanded_size += trace_buf_size >> 10;
3706 } 3832 }
@@ -3734,7 +3860,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
3734 if (trace_flags & TRACE_ITER_STOP_ON_FREE) 3860 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3735 tracing_off(); 3861 tracing_off();
3736 /* resize the ring buffer to 0 */ 3862 /* resize the ring buffer to 0 */
3737 tracing_resize_ring_buffer(0); 3863 tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS);
3738 3864
3739 return 0; 3865 return 0;
3740} 3866}
@@ -3933,9 +4059,10 @@ static const struct file_operations tracing_pipe_fops = {
3933}; 4059};
3934 4060
3935static const struct file_operations tracing_entries_fops = { 4061static const struct file_operations tracing_entries_fops = {
3936 .open = tracing_open_generic, 4062 .open = tracing_entries_open,
3937 .read = tracing_entries_read, 4063 .read = tracing_entries_read,
3938 .write = tracing_entries_write, 4064 .write = tracing_entries_write,
4065 .release = tracing_entries_release,
3939 .llseek = generic_file_llseek, 4066 .llseek = generic_file_llseek,
3940}; 4067};
3941 4068
@@ -4387,6 +4514,9 @@ static void tracing_init_debugfs_percpu(long cpu)
4387 4514
4388 trace_create_file("stats", 0444, d_cpu, 4515 trace_create_file("stats", 0444, d_cpu,
4389 (void *) cpu, &tracing_stats_fops); 4516 (void *) cpu, &tracing_stats_fops);
4517
4518 trace_create_file("buffer_size_kb", 0444, d_cpu,
4519 (void *) cpu, &tracing_entries_fops);
4390} 4520}
4391 4521
4392#ifdef CONFIG_FTRACE_SELFTEST 4522#ifdef CONFIG_FTRACE_SELFTEST
@@ -4716,7 +4846,7 @@ static __init int tracer_init_debugfs(void)
4716 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); 4846 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4717 4847
4718 trace_create_file("buffer_size_kb", 0644, d_tracer, 4848 trace_create_file("buffer_size_kb", 0644, d_tracer,
4719 &global_trace, &tracing_entries_fops); 4849 (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops);
4720 4850
4721 trace_create_file("buffer_total_size_kb", 0444, d_tracer, 4851 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
4722 &global_trace, &tracing_total_entries_fops); 4852 &global_trace, &tracing_total_entries_fops);
@@ -4955,6 +5085,10 @@ __init static int tracer_alloc_buffers(void)
4955 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 5085 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4956 goto out_free_buffer_mask; 5086 goto out_free_buffer_mask;
4957 5087
5088 /* Only allocate trace_printk buffers if a trace_printk exists */
5089 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
5090 trace_printk_init_buffers();
5091
4958 /* To save memory, keep the ring buffer size to its minimum */ 5092 /* To save memory, keep the ring buffer size to its minimum */
4959 if (ring_buffer_expanded) 5093 if (ring_buffer_expanded)
4960 ring_buf_size = trace_buf_size; 5094 ring_buf_size = trace_buf_size;
@@ -4973,7 +5107,6 @@ __init static int tracer_alloc_buffers(void)
4973 WARN_ON(1); 5107 WARN_ON(1);
4974 goto out_free_cpumask; 5108 goto out_free_cpumask;
4975 } 5109 }
4976 global_trace.entries = ring_buffer_size(global_trace.buffer);
4977 if (global_trace.buffer_disabled) 5110 if (global_trace.buffer_disabled)
4978 tracing_off(); 5111 tracing_off();
4979 5112
@@ -4986,7 +5119,6 @@ __init static int tracer_alloc_buffers(void)
4986 ring_buffer_free(global_trace.buffer); 5119 ring_buffer_free(global_trace.buffer);
4987 goto out_free_cpumask; 5120 goto out_free_cpumask;
4988 } 5121 }
4989 max_tr.entries = 1;
4990#endif 5122#endif
4991 5123
4992 /* Allocate the first page for all buffers */ 5124 /* Allocate the first page for all buffers */
@@ -4995,6 +5127,11 @@ __init static int tracer_alloc_buffers(void)
4995 max_tr.data[i] = &per_cpu(max_tr_data, i); 5127 max_tr.data[i] = &per_cpu(max_tr_data, i);
4996 } 5128 }
4997 5129
5130 set_buffer_entries(&global_trace, ring_buf_size);
5131#ifdef CONFIG_TRACER_MAX_TRACE
5132 set_buffer_entries(&max_tr, 1);
5133#endif
5134
4998 trace_init_cmdlines(); 5135 trace_init_cmdlines();
4999 5136
5000 register_tracer(&nop_trace); 5137 register_tracer(&nop_trace);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1bcdbec95a11..a7d28e033a96 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -136,6 +136,7 @@ struct trace_array_cpu {
136 atomic_t disabled; 136 atomic_t disabled;
137 void *buffer_page; /* ring buffer spare */ 137 void *buffer_page; /* ring buffer spare */
138 138
139 unsigned long entries;
139 unsigned long saved_latency; 140 unsigned long saved_latency;
140 unsigned long critical_start; 141 unsigned long critical_start;
141 unsigned long critical_end; 142 unsigned long critical_end;
@@ -157,7 +158,6 @@ struct trace_array_cpu {
157 */ 158 */
158struct trace_array { 159struct trace_array {
159 struct ring_buffer *buffer; 160 struct ring_buffer *buffer;
160 unsigned long entries;
161 int cpu; 161 int cpu;
162 int buffer_disabled; 162 int buffer_disabled;
163 cycle_t time_start; 163 cycle_t time_start;
@@ -831,6 +831,8 @@ extern struct list_head ftrace_events;
831extern const char *__start___trace_bprintk_fmt[]; 831extern const char *__start___trace_bprintk_fmt[];
832extern const char *__stop___trace_bprintk_fmt[]; 832extern const char *__stop___trace_bprintk_fmt[];
833 833
834void trace_printk_init_buffers(void);
835
834#undef FTRACE_ENTRY 836#undef FTRACE_ENTRY
835#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 837#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
836 extern struct ftrace_event_call \ 838 extern struct ftrace_event_call \
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 6fd4ffd042f9..a9077c1b4ad3 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -51,6 +51,10 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
51 const char **iter; 51 const char **iter;
52 char *fmt; 52 char *fmt;
53 53
54 /* allocate the trace_printk per cpu buffers */
55 if (start != end)
56 trace_printk_init_buffers();
57
54 mutex_lock(&btrace_mutex); 58 mutex_lock(&btrace_mutex);
55 for (iter = start; iter < end; iter++) { 59 for (iter = start; iter < end; iter++) {
56 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); 60 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);