aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hrtimer.c17
-rw-r--r--kernel/sched.c10
-rw-r--r--kernel/sched_debug.c41
-rw-r--r--kernel/sched_fair.c17
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/ring_buffer.c353
9 files changed, 309 insertions, 153 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5d9467..ae2b92be5fae 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -141,6 +141,11 @@ static void __exit_signal(struct task_struct *tsk)
141 if (sig) { 141 if (sig) {
142 flush_sigqueue(&sig->shared_pending); 142 flush_sigqueue(&sig->shared_pending);
143 taskstats_tgid_free(sig); 143 taskstats_tgid_free(sig);
144 /*
145 * Make sure ->signal can't go away under rq->lock,
146 * see account_group_exec_runtime().
147 */
148 task_rq_unlock_wait(tsk);
144 __cleanup_signal(sig); 149 __cleanup_signal(sig);
145 } 150 }
146} 151}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b465dfde426..95d3949f2ae5 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1209,6 +1209,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1209 enum hrtimer_restart (*fn)(struct hrtimer *); 1209 enum hrtimer_restart (*fn)(struct hrtimer *);
1210 struct hrtimer *timer; 1210 struct hrtimer *timer;
1211 int restart; 1211 int restart;
1212 int emulate_hardirq_ctx = 0;
1212 1213
1213 timer = list_entry(cpu_base->cb_pending.next, 1214 timer = list_entry(cpu_base->cb_pending.next,
1214 struct hrtimer, cb_entry); 1215 struct hrtimer, cb_entry);
@@ -1217,10 +1218,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1217 timer_stats_account_hrtimer(timer); 1218 timer_stats_account_hrtimer(timer);
1218 1219
1219 fn = timer->function; 1220 fn = timer->function;
1221 /*
1222 * A timer might have been added to the cb_pending list
1223 * when it was migrated during a cpu-offline operation.
1224 * Emulate hardirq context for such timers.
1225 */
1226 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1227 timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
1228 emulate_hardirq_ctx = 1;
1229
1220 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); 1230 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1221 spin_unlock_irq(&cpu_base->lock); 1231 spin_unlock_irq(&cpu_base->lock);
1222 1232
1223 restart = fn(timer); 1233 if (unlikely(emulate_hardirq_ctx)) {
1234 local_irq_disable();
1235 restart = fn(timer);
1236 local_irq_enable();
1237 } else
1238 restart = fn(timer);
1224 1239
1225 spin_lock_irq(&cpu_base->lock); 1240 spin_lock_irq(&cpu_base->lock);
1226 1241
diff --git a/kernel/sched.c b/kernel/sched.c
index 57c933ffbee1..50a21f964679 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -399,7 +399,7 @@ struct cfs_rq {
399 */ 399 */
400 struct sched_entity *curr, *next, *last; 400 struct sched_entity *curr, *next, *last;
401 401
402 unsigned long nr_spread_over; 402 unsigned int nr_spread_over;
403 403
404#ifdef CONFIG_FAIR_GROUP_SCHED 404#ifdef CONFIG_FAIR_GROUP_SCHED
405 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 405 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -969,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
969 } 969 }
970} 970}
971 971
972void task_rq_unlock_wait(struct task_struct *p)
973{
974 struct rq *rq = task_rq(p);
975
976 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
977 spin_unlock_wait(&rq->lock);
978}
979
972static void __task_rq_unlock(struct rq *rq) 980static void __task_rq_unlock(struct rq *rq)
973 __releases(rq->lock) 981 __releases(rq->lock)
974{ 982{
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ae17762ec32..48ecc51e7701 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 last = __pick_last_entity(cfs_rq); 144 last = __pick_last_entity(cfs_rq);
145 if (last) 145 if (last)
146 max_vruntime = last->vruntime; 146 max_vruntime = last->vruntime;
147 min_vruntime = rq->cfs.min_vruntime; 147 min_vruntime = cfs_rq->min_vruntime;
148 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; 148 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
149 spin_unlock_irqrestore(&rq->lock, flags); 149 spin_unlock_irqrestore(&rq->lock, flags);
150 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 150 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
161 SPLIT_NS(spread0)); 161 SPLIT_NS(spread0));
162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); 162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
164#ifdef CONFIG_SCHEDSTATS
165#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
166
167 P(yld_exp_empty);
168 P(yld_act_empty);
169 P(yld_both_empty);
170 P(yld_count);
171 164
172 P(sched_switch); 165 SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
173 P(sched_count);
174 P(sched_goidle);
175
176 P(ttwu_count);
177 P(ttwu_local);
178
179 P(bkl_count);
180
181#undef P
182#endif
183 SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
184 cfs_rq->nr_spread_over); 166 cfs_rq->nr_spread_over);
185#ifdef CONFIG_FAIR_GROUP_SCHED 167#ifdef CONFIG_FAIR_GROUP_SCHED
186#ifdef CONFIG_SMP 168#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
260#undef P 242#undef P
261#undef PN 243#undef PN
262 244
245#ifdef CONFIG_SCHEDSTATS
246#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
247
248 P(yld_exp_empty);
249 P(yld_act_empty);
250 P(yld_both_empty);
251 P(yld_count);
252
253 P(sched_switch);
254 P(sched_count);
255 P(sched_goidle);
256
257 P(ttwu_count);
258 P(ttwu_local);
259
260 P(bkl_count);
261
262#undef P
263#endif
263 print_cfs_stats(m, cpu); 264 print_cfs_stats(m, cpu);
264 print_rt_stats(m, cpu); 265 print_rt_stats(m, cpu);
265 266
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 51aa3e102acb..98345e45b059 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -716,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
716 __enqueue_entity(cfs_rq, se); 716 __enqueue_entity(cfs_rq, se);
717} 717}
718 718
719static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
720{
721 if (cfs_rq->last == se)
722 cfs_rq->last = NULL;
723
724 if (cfs_rq->next == se)
725 cfs_rq->next = NULL;
726}
727
719static void 728static void
720dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 729dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
721{ 730{
@@ -738,11 +747,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
738#endif 747#endif
739 } 748 }
740 749
741 if (cfs_rq->last == se) 750 clear_buddies(cfs_rq, se);
742 cfs_rq->last = NULL;
743
744 if (cfs_rq->next == se)
745 cfs_rq->next = NULL;
746 751
747 if (se != cfs_rq->curr) 752 if (se != cfs_rq->curr)
748 __dequeue_entity(cfs_rq, se); 753 __dequeue_entity(cfs_rq, se);
@@ -977,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
977 if (unlikely(cfs_rq->nr_running == 1)) 982 if (unlikely(cfs_rq->nr_running == 1))
978 return; 983 return;
979 984
985 clear_buddies(cfs_rq, se);
986
980 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 987 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
981 update_rq_clock(rq); 988 update_rq_clock(rq);
982 /* 989 /*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7110daeb9a90..e7c69a720d69 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,10 +269,11 @@ void irq_enter(void)
269{ 269{
270 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
271 271
272 if (idle_cpu(cpu) && !in_interrupt()) 272 if (idle_cpu(cpu) && !in_interrupt()) {
273 __irq_enter();
273 tick_check_idle(cpu); 274 tick_check_idle(cpu);
274 275 } else
275 __irq_enter(); 276 __irq_enter();
276} 277}
277 278
278#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 279#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5bbb1044f847..342fc9ccab46 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
568 */ 568 */
569static void tick_nohz_kick_tick(int cpu) 569static void tick_nohz_kick_tick(int cpu)
570{ 570{
571#if 0
572 /* Switch back to 2.6.27 behaviour */
573
571 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 574 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
572 ktime_t delta, now; 575 ktime_t delta, now;
573 576
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
584 return; 587 return;
585 588
586 tick_nohz_restart(ts, now); 589 tick_nohz_restart(ts, now);
590#endif
587} 591}
588 592
589#else 593#else
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4d2e751bfb11..25b803559f17 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -212,7 +212,6 @@ enum {
212}; 212};
213 213
214static int ftrace_filtered; 214static int ftrace_filtered;
215static int tracing_on;
216 215
217static LIST_HEAD(ftrace_new_addrs); 216static LIST_HEAD(ftrace_new_addrs);
218 217
@@ -533,13 +532,10 @@ static int __ftrace_modify_code(void *data)
533{ 532{
534 int *command = data; 533 int *command = data;
535 534
536 if (*command & FTRACE_ENABLE_CALLS) { 535 if (*command & FTRACE_ENABLE_CALLS)
537 ftrace_replace_code(1); 536 ftrace_replace_code(1);
538 tracing_on = 1; 537 else if (*command & FTRACE_DISABLE_CALLS)
539 } else if (*command & FTRACE_DISABLE_CALLS) {
540 ftrace_replace_code(0); 538 ftrace_replace_code(0);
541 tracing_on = 0;
542 }
543 539
544 if (*command & FTRACE_UPDATE_TRACE_FUNC) 540 if (*command & FTRACE_UPDATE_TRACE_FUNC)
545 ftrace_update_ftrace_func(ftrace_trace_function); 541 ftrace_update_ftrace_func(ftrace_trace_function);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a6b8f9d7ac96..c04c433fbc59 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,6 +18,35 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */
22static int ring_buffers_off __read_mostly;
23
24/**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30void tracing_on(void)
31{
32 ring_buffers_off = 0;
33}
34
35/**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43void tracing_off(void)
44{
45 ring_buffers_off = 1;
46}
47
48#include "trace.h"
49
21/* Up this if you want to test the TIME_EXTENTS and normalization */ 50/* Up this if you want to test the TIME_EXTENTS and normalization */
22#define DEBUG_SHIFT 0 51#define DEBUG_SHIFT 0
23 52
@@ -154,6 +183,7 @@ static inline int test_time_stamp(u64 delta)
154struct ring_buffer_per_cpu { 183struct ring_buffer_per_cpu {
155 int cpu; 184 int cpu;
156 struct ring_buffer *buffer; 185 struct ring_buffer *buffer;
186 spinlock_t reader_lock; /* serialize readers */
157 raw_spinlock_t lock; 187 raw_spinlock_t lock;
158 struct lock_class_key lock_key; 188 struct lock_class_key lock_key;
159 struct list_head pages; 189 struct list_head pages;
@@ -190,60 +220,14 @@ struct ring_buffer_iter {
190 220
191/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 221/* buffer may be either ring_buffer or ring_buffer_per_cpu */
192#define RB_WARN_ON(buffer, cond) \ 222#define RB_WARN_ON(buffer, cond) \
193 do { \ 223 ({ \
194 if (unlikely(cond)) { \ 224 int _____ret = unlikely(cond); \
225 if (_____ret) { \
195 atomic_inc(&buffer->record_disabled); \ 226 atomic_inc(&buffer->record_disabled); \
196 WARN_ON(1); \ 227 WARN_ON(1); \
197 } \ 228 } \
198 } while (0) 229 _____ret; \
199 230 })
200#define RB_WARN_ON_RET(buffer, cond) \
201 do { \
202 if (unlikely(cond)) { \
203 atomic_inc(&buffer->record_disabled); \
204 WARN_ON(1); \
205 return; \
206 } \
207 } while (0)
208
209#define RB_WARN_ON_RET_INT(buffer, cond) \
210 do { \
211 if (unlikely(cond)) { \
212 atomic_inc(&buffer->record_disabled); \
213 WARN_ON(1); \
214 return -1; \
215 } \
216 } while (0)
217
218#define RB_WARN_ON_RET_NULL(buffer, cond) \
219 do { \
220 if (unlikely(cond)) { \
221 atomic_inc(&buffer->record_disabled); \
222 WARN_ON(1); \
223 return NULL; \
224 } \
225 } while (0)
226
227#define RB_WARN_ON_ONCE(buffer, cond) \
228 do { \
229 static int once; \
230 if (unlikely(cond) && !once) { \
231 once++; \
232 atomic_inc(&buffer->record_disabled); \
233 WARN_ON(1); \
234 } \
235 } while (0)
236
237/* buffer must be ring_buffer not per_cpu */
238#define RB_WARN_ON_UNLOCK(buffer, cond) \
239 do { \
240 if (unlikely(cond)) { \
241 mutex_unlock(&buffer->mutex); \
242 atomic_inc(&buffer->record_disabled); \
243 WARN_ON(1); \
244 return -1; \
245 } \
246 } while (0)
247 231
248/** 232/**
249 * check_pages - integrity check of buffer pages 233 * check_pages - integrity check of buffer pages
@@ -257,14 +241,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
257 struct list_head *head = &cpu_buffer->pages; 241 struct list_head *head = &cpu_buffer->pages;
258 struct buffer_page *page, *tmp; 242 struct buffer_page *page, *tmp;
259 243
260 RB_WARN_ON_RET_INT(cpu_buffer, head->next->prev != head); 244 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
261 RB_WARN_ON_RET_INT(cpu_buffer, head->prev->next != head); 245 return -1;
246 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
247 return -1;
262 248
263 list_for_each_entry_safe(page, tmp, head, list) { 249 list_for_each_entry_safe(page, tmp, head, list) {
264 RB_WARN_ON_RET_INT(cpu_buffer, 250 if (RB_WARN_ON(cpu_buffer,
265 page->list.next->prev != &page->list); 251 page->list.next->prev != &page->list))
266 RB_WARN_ON_RET_INT(cpu_buffer, 252 return -1;
267 page->list.prev->next != &page->list); 253 if (RB_WARN_ON(cpu_buffer,
254 page->list.prev->next != &page->list))
255 return -1;
268 } 256 }
269 257
270 return 0; 258 return 0;
@@ -321,6 +309,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
321 309
322 cpu_buffer->cpu = cpu; 310 cpu_buffer->cpu = cpu;
323 cpu_buffer->buffer = buffer; 311 cpu_buffer->buffer = buffer;
312 spin_lock_init(&cpu_buffer->reader_lock);
324 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 313 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
325 INIT_LIST_HEAD(&cpu_buffer->pages); 314 INIT_LIST_HEAD(&cpu_buffer->pages);
326 315
@@ -470,13 +459,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
470 synchronize_sched(); 459 synchronize_sched();
471 460
472 for (i = 0; i < nr_pages; i++) { 461 for (i = 0; i < nr_pages; i++) {
473 RB_WARN_ON_RET(cpu_buffer, list_empty(&cpu_buffer->pages)); 462 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
463 return;
474 p = cpu_buffer->pages.next; 464 p = cpu_buffer->pages.next;
475 page = list_entry(p, struct buffer_page, list); 465 page = list_entry(p, struct buffer_page, list);
476 list_del_init(&page->list); 466 list_del_init(&page->list);
477 free_buffer_page(page); 467 free_buffer_page(page);
478 } 468 }
479 RB_WARN_ON_RET(cpu_buffer, list_empty(&cpu_buffer->pages)); 469 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
470 return;
480 471
481 rb_reset_cpu(cpu_buffer); 472 rb_reset_cpu(cpu_buffer);
482 473
@@ -498,7 +489,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
498 synchronize_sched(); 489 synchronize_sched();
499 490
500 for (i = 0; i < nr_pages; i++) { 491 for (i = 0; i < nr_pages; i++) {
501 RB_WARN_ON_RET(cpu_buffer, list_empty(pages)); 492 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
493 return;
502 p = pages->next; 494 p = pages->next;
503 page = list_entry(p, struct buffer_page, list); 495 page = list_entry(p, struct buffer_page, list);
504 list_del_init(&page->list); 496 list_del_init(&page->list);
@@ -553,7 +545,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
553 if (size < buffer_size) { 545 if (size < buffer_size) {
554 546
555 /* easy case, just free pages */ 547 /* easy case, just free pages */
556 RB_WARN_ON_UNLOCK(buffer, nr_pages >= buffer->pages); 548 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
549 mutex_unlock(&buffer->mutex);
550 return -1;
551 }
557 552
558 rm_pages = buffer->pages - nr_pages; 553 rm_pages = buffer->pages - nr_pages;
559 554
@@ -572,7 +567,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
572 * add these pages to the cpu_buffers. Otherwise we just free 567 * add these pages to the cpu_buffers. Otherwise we just free
573 * them all and return -ENOMEM; 568 * them all and return -ENOMEM;
574 */ 569 */
575 RB_WARN_ON_UNLOCK(buffer, nr_pages <= buffer->pages); 570 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
571 mutex_unlock(&buffer->mutex);
572 return -1;
573 }
576 574
577 new_pages = nr_pages - buffer->pages; 575 new_pages = nr_pages - buffer->pages;
578 576
@@ -596,7 +594,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
596 rb_insert_pages(cpu_buffer, &pages, new_pages); 594 rb_insert_pages(cpu_buffer, &pages, new_pages);
597 } 595 }
598 596
599 RB_WARN_ON_UNLOCK(buffer, !list_empty(&pages)); 597 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
598 mutex_unlock(&buffer->mutex);
599 return -1;
600 }
600 601
601 out: 602 out:
602 buffer->pages = nr_pages; 603 buffer->pages = nr_pages;
@@ -684,7 +685,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
684 head += rb_event_length(event)) { 685 head += rb_event_length(event)) {
685 686
686 event = __rb_page_index(cpu_buffer->head_page, head); 687 event = __rb_page_index(cpu_buffer->head_page, head);
687 RB_WARN_ON_RET(cpu_buffer, rb_null_event(event)); 688 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
689 return;
688 /* Only count data entries */ 690 /* Only count data entries */
689 if (event->type != RINGBUF_TYPE_DATA) 691 if (event->type != RINGBUF_TYPE_DATA)
690 continue; 692 continue;
@@ -737,8 +739,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
737 addr &= PAGE_MASK; 739 addr &= PAGE_MASK;
738 740
739 while (cpu_buffer->commit_page->page != (void *)addr) { 741 while (cpu_buffer->commit_page->page != (void *)addr) {
740 RB_WARN_ON(cpu_buffer, 742 if (RB_WARN_ON(cpu_buffer,
741 cpu_buffer->commit_page == cpu_buffer->tail_page); 743 cpu_buffer->commit_page == cpu_buffer->tail_page))
744 return;
742 cpu_buffer->commit_page->commit = 745 cpu_buffer->commit_page->commit =
743 cpu_buffer->commit_page->write; 746 cpu_buffer->commit_page->write;
744 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 747 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
@@ -894,7 +897,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 reader_page = cpu_buffer->reader_page; 897 reader_page = cpu_buffer->reader_page;
895 898
896 /* we grabbed the lock before incrementing */ 899 /* we grabbed the lock before incrementing */
897 RB_WARN_ON(cpu_buffer, next_page == reader_page); 900 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
901 goto out_unlock;
898 902
899 /* 903 /*
900 * If for some reason, we had an interrupt storm that made 904 * If for some reason, we had an interrupt storm that made
@@ -971,7 +975,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
971 975
972 /* We reserved something on the buffer */ 976 /* We reserved something on the buffer */
973 977
974 RB_WARN_ON_RET_NULL(cpu_buffer, write > BUF_PAGE_SIZE); 978 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
979 return NULL;
975 980
976 event = __rb_page_index(tail_page, tail); 981 event = __rb_page_index(tail_page, tail);
977 rb_update_event(event, type, length); 982 rb_update_event(event, type, length);
@@ -1070,10 +1075,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1070 * storm or we have something buggy. 1075 * storm or we have something buggy.
1071 * Bail! 1076 * Bail!
1072 */ 1077 */
1073 if (unlikely(++nr_loops > 1000)) { 1078 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1074 RB_WARN_ON(cpu_buffer, 1);
1075 return NULL; 1079 return NULL;
1076 }
1077 1080
1078 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1081 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1079 1082
@@ -1169,6 +1172,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1169 struct ring_buffer_event *event; 1172 struct ring_buffer_event *event;
1170 int cpu, resched; 1173 int cpu, resched;
1171 1174
1175 if (ring_buffers_off)
1176 return NULL;
1177
1172 if (atomic_read(&buffer->record_disabled)) 1178 if (atomic_read(&buffer->record_disabled))
1173 return NULL; 1179 return NULL;
1174 1180
@@ -1278,6 +1284,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
1278 int ret = -EBUSY; 1284 int ret = -EBUSY;
1279 int cpu, resched; 1285 int cpu, resched;
1280 1286
1287 if (ring_buffers_off)
1288 return -EBUSY;
1289
1281 if (atomic_read(&buffer->record_disabled)) 1290 if (atomic_read(&buffer->record_disabled))
1282 return -EBUSY; 1291 return -EBUSY;
1283 1292
@@ -1476,6 +1485,9 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1476void ring_buffer_iter_reset(struct ring_buffer_iter *iter) 1485void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1477{ 1486{
1478 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1487 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1488 unsigned long flags;
1489
1490 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1479 1491
1480 /* Iterator usage is expected to have record disabled */ 1492 /* Iterator usage is expected to have record disabled */
1481 if (list_empty(&cpu_buffer->reader_page->list)) { 1493 if (list_empty(&cpu_buffer->reader_page->list)) {
@@ -1489,6 +1501,8 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1489 iter->read_stamp = cpu_buffer->read_stamp; 1501 iter->read_stamp = cpu_buffer->read_stamp;
1490 else 1502 else
1491 iter->read_stamp = iter->head_page->time_stamp; 1503 iter->read_stamp = iter->head_page->time_stamp;
1504
1505 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1492} 1506}
1493 1507
1494/** 1508/**
@@ -1584,8 +1598,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1584 * a case where we will loop three times. There should be no 1598 * a case where we will loop three times. There should be no
1585 * reason to loop four times (that I know of). 1599 * reason to loop four times (that I know of).
1586 */ 1600 */
1587 if (unlikely(++nr_loops > 3)) { 1601 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1588 RB_WARN_ON(cpu_buffer, 1);
1589 reader = NULL; 1602 reader = NULL;
1590 goto out; 1603 goto out;
1591 } 1604 }
@@ -1597,8 +1610,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 goto out; 1610 goto out;
1598 1611
1599 /* Never should we have an index greater than the size */ 1612 /* Never should we have an index greater than the size */
1600 RB_WARN_ON(cpu_buffer, 1613 if (RB_WARN_ON(cpu_buffer,
1601 cpu_buffer->reader_page->read > rb_page_size(reader)); 1614 cpu_buffer->reader_page->read > rb_page_size(reader)))
1615 goto out;
1602 1616
1603 /* check if we caught up to the tail */ 1617 /* check if we caught up to the tail */
1604 reader = NULL; 1618 reader = NULL;
@@ -1652,7 +1666,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1652 reader = rb_get_reader_page(cpu_buffer); 1666 reader = rb_get_reader_page(cpu_buffer);
1653 1667
1654 /* This function should not be called when buffer is empty */ 1668 /* This function should not be called when buffer is empty */
1655 RB_WARN_ON_RET(cpu_buffer, !reader); 1669 if (RB_WARN_ON(cpu_buffer, !reader))
1670 return;
1656 1671
1657 event = rb_reader_event(cpu_buffer); 1672 event = rb_reader_event(cpu_buffer);
1658 1673
@@ -1679,8 +1694,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1679 * Check if we are at the end of the buffer. 1694 * Check if we are at the end of the buffer.
1680 */ 1695 */
1681 if (iter->head >= rb_page_size(iter->head_page)) { 1696 if (iter->head >= rb_page_size(iter->head_page)) {
1682 RB_WARN_ON_RET(buffer, 1697 if (RB_WARN_ON(buffer,
1683 iter->head_page == cpu_buffer->commit_page); 1698 iter->head_page == cpu_buffer->commit_page))
1699 return;
1684 rb_inc_iter(iter); 1700 rb_inc_iter(iter);
1685 return; 1701 return;
1686 } 1702 }
@@ -1693,9 +1709,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1693 * This should not be called to advance the header if we are 1709 * This should not be called to advance the header if we are
1694 * at the tail of the buffer. 1710 * at the tail of the buffer.
1695 */ 1711 */
1696 RB_WARN_ON_RET(cpu_buffer, 1712 if (RB_WARN_ON(cpu_buffer,
1697 (iter->head_page == cpu_buffer->commit_page) && 1713 (iter->head_page == cpu_buffer->commit_page) &&
1698 (iter->head + length > rb_commit_index(cpu_buffer))); 1714 (iter->head + length > rb_commit_index(cpu_buffer))))
1715 return;
1699 1716
1700 rb_update_iter_read_stamp(iter, event); 1717 rb_update_iter_read_stamp(iter, event);
1701 1718
@@ -1707,17 +1724,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1707 rb_advance_iter(iter); 1724 rb_advance_iter(iter);
1708} 1725}
1709 1726
1710/** 1727static struct ring_buffer_event *
1711 * ring_buffer_peek - peek at the next event to be read 1728rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1712 * @buffer: The ring buffer to read
1713 * @cpu: The cpu to peak at
1714 * @ts: The timestamp counter of this event.
1715 *
1716 * This will return the event that will be read next, but does
1717 * not consume the data.
1718 */
1719struct ring_buffer_event *
1720ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1721{ 1729{
1722 struct ring_buffer_per_cpu *cpu_buffer; 1730 struct ring_buffer_per_cpu *cpu_buffer;
1723 struct ring_buffer_event *event; 1731 struct ring_buffer_event *event;
@@ -1738,10 +1746,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1738 * can have. Nesting 10 deep of interrupts is clearly 1746 * can have. Nesting 10 deep of interrupts is clearly
1739 * an anomaly. 1747 * an anomaly.
1740 */ 1748 */
1741 if (unlikely(++nr_loops > 10)) { 1749 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1742 RB_WARN_ON(cpu_buffer, 1);
1743 return NULL; 1750 return NULL;
1744 }
1745 1751
1746 reader = rb_get_reader_page(cpu_buffer); 1752 reader = rb_get_reader_page(cpu_buffer);
1747 if (!reader) 1753 if (!reader)
@@ -1779,16 +1785,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1779 return NULL; 1785 return NULL;
1780} 1786}
1781 1787
1782/** 1788static struct ring_buffer_event *
1783 * ring_buffer_iter_peek - peek at the next event to be read 1789rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1784 * @iter: The ring buffer iterator
1785 * @ts: The timestamp counter of this event.
1786 *
1787 * This will return the event that will be read next, but does
1788 * not increment the iterator.
1789 */
1790struct ring_buffer_event *
1791ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1792{ 1790{
1793 struct ring_buffer *buffer; 1791 struct ring_buffer *buffer;
1794 struct ring_buffer_per_cpu *cpu_buffer; 1792 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1810,10 +1808,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1810 * can have. Nesting 10 deep of interrupts is clearly 1808 * can have. Nesting 10 deep of interrupts is clearly
1811 * an anomaly. 1809 * an anomaly.
1812 */ 1810 */
1813 if (unlikely(++nr_loops > 10)) { 1811 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1814 RB_WARN_ON(cpu_buffer, 1);
1815 return NULL; 1812 return NULL;
1816 }
1817 1813
1818 if (rb_per_cpu_empty(cpu_buffer)) 1814 if (rb_per_cpu_empty(cpu_buffer))
1819 return NULL; 1815 return NULL;
@@ -1850,6 +1846,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1850} 1846}
1851 1847
1852/** 1848/**
1849 * ring_buffer_peek - peek at the next event to be read
1850 * @buffer: The ring buffer to read
1851 * @cpu: The cpu to peak at
1852 * @ts: The timestamp counter of this event.
1853 *
1854 * This will return the event that will be read next, but does
1855 * not consume the data.
1856 */
1857struct ring_buffer_event *
1858ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1859{
1860 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1861 struct ring_buffer_event *event;
1862 unsigned long flags;
1863
1864 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1865 event = rb_buffer_peek(buffer, cpu, ts);
1866 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1867
1868 return event;
1869}
1870
1871/**
1872 * ring_buffer_iter_peek - peek at the next event to be read
1873 * @iter: The ring buffer iterator
1874 * @ts: The timestamp counter of this event.
1875 *
1876 * This will return the event that will be read next, but does
1877 * not increment the iterator.
1878 */
1879struct ring_buffer_event *
1880ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1881{
1882 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1883 struct ring_buffer_event *event;
1884 unsigned long flags;
1885
1886 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1887 event = rb_iter_peek(iter, ts);
1888 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1889
1890 return event;
1891}
1892
1893/**
1853 * ring_buffer_consume - return an event and consume it 1894 * ring_buffer_consume - return an event and consume it
1854 * @buffer: The ring buffer to get the next event from 1895 * @buffer: The ring buffer to get the next event from
1855 * 1896 *
@@ -1860,19 +1901,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1860struct ring_buffer_event * 1901struct ring_buffer_event *
1861ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1902ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1862{ 1903{
1863 struct ring_buffer_per_cpu *cpu_buffer; 1904 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1864 struct ring_buffer_event *event; 1905 struct ring_buffer_event *event;
1906 unsigned long flags;
1865 1907
1866 if (!cpu_isset(cpu, buffer->cpumask)) 1908 if (!cpu_isset(cpu, buffer->cpumask))
1867 return NULL; 1909 return NULL;
1868 1910
1869 event = ring_buffer_peek(buffer, cpu, ts); 1911 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1912
1913 event = rb_buffer_peek(buffer, cpu, ts);
1870 if (!event) 1914 if (!event)
1871 return NULL; 1915 goto out;
1872 1916
1873 cpu_buffer = buffer->buffers[cpu];
1874 rb_advance_reader(cpu_buffer); 1917 rb_advance_reader(cpu_buffer);
1875 1918
1919 out:
1920 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1921
1876 return event; 1922 return event;
1877} 1923}
1878 1924
@@ -1909,11 +1955,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1909 atomic_inc(&cpu_buffer->record_disabled); 1955 atomic_inc(&cpu_buffer->record_disabled);
1910 synchronize_sched(); 1956 synchronize_sched();
1911 1957
1912 local_irq_save(flags); 1958 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1913 __raw_spin_lock(&cpu_buffer->lock); 1959 __raw_spin_lock(&cpu_buffer->lock);
1914 ring_buffer_iter_reset(iter); 1960 ring_buffer_iter_reset(iter);
1915 __raw_spin_unlock(&cpu_buffer->lock); 1961 __raw_spin_unlock(&cpu_buffer->lock);
1916 local_irq_restore(flags); 1962 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1917 1963
1918 return iter; 1964 return iter;
1919} 1965}
@@ -1945,12 +1991,17 @@ struct ring_buffer_event *
1945ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 1991ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1946{ 1992{
1947 struct ring_buffer_event *event; 1993 struct ring_buffer_event *event;
1994 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1995 unsigned long flags;
1948 1996
1949 event = ring_buffer_iter_peek(iter, ts); 1997 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1998 event = rb_iter_peek(iter, ts);
1950 if (!event) 1999 if (!event)
1951 return NULL; 2000 goto out;
1952 2001
1953 rb_advance_iter(iter); 2002 rb_advance_iter(iter);
2003 out:
2004 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1954 2005
1955 return event; 2006 return event;
1956} 2007}
@@ -1999,13 +2050,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1999 if (!cpu_isset(cpu, buffer->cpumask)) 2050 if (!cpu_isset(cpu, buffer->cpumask))
2000 return; 2051 return;
2001 2052
2002 local_irq_save(flags); 2053 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2054
2003 __raw_spin_lock(&cpu_buffer->lock); 2055 __raw_spin_lock(&cpu_buffer->lock);
2004 2056
2005 rb_reset_cpu(cpu_buffer); 2057 rb_reset_cpu(cpu_buffer);
2006 2058
2007 __raw_spin_unlock(&cpu_buffer->lock); 2059 __raw_spin_unlock(&cpu_buffer->lock);
2008 local_irq_restore(flags); 2060
2061 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2009} 2062}
2010 2063
2011/** 2064/**
@@ -2103,3 +2156,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2103 return 0; 2156 return 0;
2104} 2157}
2105 2158
2159static ssize_t
2160rb_simple_read(struct file *filp, char __user *ubuf,
2161 size_t cnt, loff_t *ppos)
2162{
2163 int *p = filp->private_data;
2164 char buf[64];
2165 int r;
2166
2167 /* !ring_buffers_off == tracing_on */
2168 r = sprintf(buf, "%d\n", !*p);
2169
2170 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2171}
2172
2173static ssize_t
2174rb_simple_write(struct file *filp, const char __user *ubuf,
2175 size_t cnt, loff_t *ppos)
2176{
2177 int *p = filp->private_data;
2178 char buf[64];
2179 long val;
2180 int ret;
2181
2182 if (cnt >= sizeof(buf))
2183 return -EINVAL;
2184
2185 if (copy_from_user(&buf, ubuf, cnt))
2186 return -EFAULT;
2187
2188 buf[cnt] = 0;
2189
2190 ret = strict_strtoul(buf, 10, &val);
2191 if (ret < 0)
2192 return ret;
2193
2194 /* !ring_buffers_off == tracing_on */
2195 *p = !val;
2196
2197 (*ppos)++;
2198
2199 return cnt;
2200}
2201
2202static struct file_operations rb_simple_fops = {
2203 .open = tracing_open_generic,
2204 .read = rb_simple_read,
2205 .write = rb_simple_write,
2206};
2207
2208
2209static __init int rb_init_debugfs(void)
2210{
2211 struct dentry *d_tracer;
2212 struct dentry *entry;
2213
2214 d_tracer = tracing_init_dentry();
2215
2216 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2217 &ring_buffers_off, &rb_simple_fops);
2218 if (!entry)
2219 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2220
2221 return 0;
2222}
2223
2224fs_initcall(rb_init_debugfs);