aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c104
-rw-r--r--kernel/exit.c16
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/sched/fair.c34
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/watchdog.c2
6 files changed, 124 insertions, 56 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 32b48c889711..ba36013cfb21 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2300,6 +2300,9 @@ do { \
2300 return div64_u64(dividend, divisor); 2300 return div64_u64(dividend, divisor);
2301} 2301}
2302 2302
2303static DEFINE_PER_CPU(int, perf_throttled_count);
2304static DEFINE_PER_CPU(u64, perf_throttled_seq);
2305
2303static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 2306static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
2304{ 2307{
2305 struct hw_perf_event *hwc = &event->hw; 2308 struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
2325 } 2328 }
2326} 2329}
2327 2330
2328static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) 2331/*
2332 * combine freq adjustment with unthrottling to avoid two passes over the
2333 * events. At the same time, make sure, having freq events does not change
2334 * the rate of unthrottling as that would introduce bias.
2335 */
2336static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
2337 int needs_unthr)
2329{ 2338{
2330 struct perf_event *event; 2339 struct perf_event *event;
2331 struct hw_perf_event *hwc; 2340 struct hw_perf_event *hwc;
2332 u64 interrupts, now; 2341 u64 now, period = TICK_NSEC;
2333 s64 delta; 2342 s64 delta;
2334 2343
2335 if (!ctx->nr_freq) 2344 /*
2345 * only need to iterate over all events iff:
2346 * - context have events in frequency mode (needs freq adjust)
2347 * - there are events to unthrottle on this cpu
2348 */
2349 if (!(ctx->nr_freq || needs_unthr))
2336 return; 2350 return;
2337 2351
2352 raw_spin_lock(&ctx->lock);
2353
2338 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2354 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2339 if (event->state != PERF_EVENT_STATE_ACTIVE) 2355 if (event->state != PERF_EVENT_STATE_ACTIVE)
2340 continue; 2356 continue;
@@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2344 2360
2345 hwc = &event->hw; 2361 hwc = &event->hw;
2346 2362
2347 interrupts = hwc->interrupts; 2363 if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
2348 hwc->interrupts = 0; 2364 hwc->interrupts = 0;
2349
2350 /*
2351 * unthrottle events on the tick
2352 */
2353 if (interrupts == MAX_INTERRUPTS) {
2354 perf_log_throttle(event, 1); 2365 perf_log_throttle(event, 1);
2355 event->pmu->start(event, 0); 2366 event->pmu->start(event, 0);
2356 } 2367 }
@@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2358 if (!event->attr.freq || !event->attr.sample_freq) 2369 if (!event->attr.freq || !event->attr.sample_freq)
2359 continue; 2370 continue;
2360 2371
2361 event->pmu->read(event); 2372 /*
2373 * stop the event and update event->count
2374 */
2375 event->pmu->stop(event, PERF_EF_UPDATE);
2376
2362 now = local64_read(&event->count); 2377 now = local64_read(&event->count);
2363 delta = now - hwc->freq_count_stamp; 2378 delta = now - hwc->freq_count_stamp;
2364 hwc->freq_count_stamp = now; 2379 hwc->freq_count_stamp = now;
2365 2380
2381 /*
2382 * restart the event
2383 * reload only if value has changed
2384 */
2366 if (delta > 0) 2385 if (delta > 0)
2367 perf_adjust_period(event, period, delta); 2386 perf_adjust_period(event, period, delta);
2387
2388 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
2368 } 2389 }
2390
2391 raw_spin_unlock(&ctx->lock);
2369} 2392}
2370 2393
2371/* 2394/*
@@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
2388 */ 2411 */
2389static void perf_rotate_context(struct perf_cpu_context *cpuctx) 2412static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2390{ 2413{
2391 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
2392 struct perf_event_context *ctx = NULL; 2414 struct perf_event_context *ctx = NULL;
2393 int rotate = 0, remove = 1, freq = 0; 2415 int rotate = 0, remove = 1;
2394 2416
2395 if (cpuctx->ctx.nr_events) { 2417 if (cpuctx->ctx.nr_events) {
2396 remove = 0; 2418 remove = 0;
2397 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 2419 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2398 rotate = 1; 2420 rotate = 1;
2399 if (cpuctx->ctx.nr_freq)
2400 freq = 1;
2401 } 2421 }
2402 2422
2403 ctx = cpuctx->task_ctx; 2423 ctx = cpuctx->task_ctx;
@@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2405 remove = 0; 2425 remove = 0;
2406 if (ctx->nr_events != ctx->nr_active) 2426 if (ctx->nr_events != ctx->nr_active)
2407 rotate = 1; 2427 rotate = 1;
2408 if (ctx->nr_freq)
2409 freq = 1;
2410 } 2428 }
2411 2429
2412 if (!rotate && !freq) 2430 if (!rotate)
2413 goto done; 2431 goto done;
2414 2432
2415 perf_ctx_lock(cpuctx, cpuctx->task_ctx); 2433 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2416 perf_pmu_disable(cpuctx->ctx.pmu); 2434 perf_pmu_disable(cpuctx->ctx.pmu);
2417 2435
2418 if (freq) { 2436 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2419 perf_ctx_adjust_freq(&cpuctx->ctx, interval); 2437 if (ctx)
2420 if (ctx) 2438 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2421 perf_ctx_adjust_freq(ctx, interval);
2422 }
2423
2424 if (rotate) {
2425 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2426 if (ctx)
2427 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2428 2439
2429 rotate_ctx(&cpuctx->ctx); 2440 rotate_ctx(&cpuctx->ctx);
2430 if (ctx) 2441 if (ctx)
2431 rotate_ctx(ctx); 2442 rotate_ctx(ctx);
2432 2443
2433 perf_event_sched_in(cpuctx, ctx, current); 2444 perf_event_sched_in(cpuctx, ctx, current);
2434 }
2435 2445
2436 perf_pmu_enable(cpuctx->ctx.pmu); 2446 perf_pmu_enable(cpuctx->ctx.pmu);
2437 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 2447 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2438
2439done: 2448done:
2440 if (remove) 2449 if (remove)
2441 list_del_init(&cpuctx->rotation_list); 2450 list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
2445{ 2454{
2446 struct list_head *head = &__get_cpu_var(rotation_list); 2455 struct list_head *head = &__get_cpu_var(rotation_list);
2447 struct perf_cpu_context *cpuctx, *tmp; 2456 struct perf_cpu_context *cpuctx, *tmp;
2457 struct perf_event_context *ctx;
2458 int throttled;
2448 2459
2449 WARN_ON(!irqs_disabled()); 2460 WARN_ON(!irqs_disabled());
2450 2461
2462 __this_cpu_inc(perf_throttled_seq);
2463 throttled = __this_cpu_xchg(perf_throttled_count, 0);
2464
2451 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { 2465 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
2466 ctx = &cpuctx->ctx;
2467 perf_adjust_freq_unthr_context(ctx, throttled);
2468
2469 ctx = cpuctx->task_ctx;
2470 if (ctx)
2471 perf_adjust_freq_unthr_context(ctx, throttled);
2472
2452 if (cpuctx->jiffies_interval == 1 || 2473 if (cpuctx->jiffies_interval == 1 ||
2453 !(jiffies % cpuctx->jiffies_interval)) 2474 !(jiffies % cpuctx->jiffies_interval))
2454 perf_rotate_context(cpuctx); 2475 perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
4509{ 4530{
4510 int events = atomic_read(&event->event_limit); 4531 int events = atomic_read(&event->event_limit);
4511 struct hw_perf_event *hwc = &event->hw; 4532 struct hw_perf_event *hwc = &event->hw;
4533 u64 seq;
4512 int ret = 0; 4534 int ret = 0;
4513 4535
4514 /* 4536 /*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
4518 if (unlikely(!is_sampling_event(event))) 4540 if (unlikely(!is_sampling_event(event)))
4519 return 0; 4541 return 0;
4520 4542
4521 if (unlikely(hwc->interrupts >= max_samples_per_tick)) { 4543 seq = __this_cpu_read(perf_throttled_seq);
4522 if (throttle) { 4544 if (seq != hwc->interrupts_seq) {
4545 hwc->interrupts_seq = seq;
4546 hwc->interrupts = 1;
4547 } else {
4548 hwc->interrupts++;
4549 if (unlikely(throttle
4550 && hwc->interrupts >= max_samples_per_tick)) {
4551 __this_cpu_inc(perf_throttled_count);
4523 hwc->interrupts = MAX_INTERRUPTS; 4552 hwc->interrupts = MAX_INTERRUPTS;
4524 perf_log_throttle(event, 0); 4553 perf_log_throttle(event, 0);
4525 ret = 1; 4554 ret = 1;
4526 } 4555 }
4527 } else 4556 }
4528 hwc->interrupts++;
4529 4557
4530 if (event->attr.freq) { 4558 if (event->attr.freq) {
4531 u64 now = perf_clock(); 4559 u64 now = perf_clock();
diff --git a/kernel/exit.c b/kernel/exit.c
index 294b1709170d..4b4042f9bc6a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1038,6 +1038,22 @@ void do_exit(long code)
1038 if (tsk->nr_dirtied) 1038 if (tsk->nr_dirtied)
1039 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 1039 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
1040 exit_rcu(); 1040 exit_rcu();
1041
1042 /*
1043 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
1044 * when the following two conditions become true.
1045 * - There is race condition of mmap_sem (It is acquired by
1046 * exit_mm()), and
1047 * - SMI occurs before setting TASK_RUNINNG.
1048 * (or hypervisor of virtual machine switches to other guest)
1049 * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
1050 *
1051 * To avoid it, we have to wait for releasing tsk->pi_lock which
1052 * is held by try_to_wake_up()
1053 */
1054 smp_mb();
1055 raw_spin_unlock_wait(&tsk->pi_lock);
1056
1041 /* causes final put_task_struct in finish_task_switch(). */ 1057 /* causes final put_task_struct in finish_task_switch(). */
1042 tsk->state = TASK_DEAD; 1058 tsk->state = TASK_DEAD;
1043 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ 1059 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index df00cb09263e..5255c9d2e053 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
74 74
75#include <asm/tlb.h> 75#include <asm/tlb.h>
76#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
77#include <asm/mutex.h>
77#ifdef CONFIG_PARAVIRT 78#ifdef CONFIG_PARAVIRT
78#include <asm/paravirt.h> 79#include <asm/paravirt.h>
79#endif 80#endif
@@ -723,9 +724,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
723 p->sched_class->dequeue_task(rq, p, flags); 724 p->sched_class->dequeue_task(rq, p, flags);
724} 725}
725 726
726/*
727 * activate_task - move a task to the runqueue.
728 */
729void activate_task(struct rq *rq, struct task_struct *p, int flags) 727void activate_task(struct rq *rq, struct task_struct *p, int flags)
730{ 728{
731 if (task_contributes_to_load(p)) 729 if (task_contributes_to_load(p))
@@ -734,9 +732,6 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
734 enqueue_task(rq, p, flags); 732 enqueue_task(rq, p, flags);
735} 733}
736 734
737/*
738 * deactivate_task - remove a task from the runqueue.
739 */
740void deactivate_task(struct rq *rq, struct task_struct *p, int flags) 735void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
741{ 736{
742 if (task_contributes_to_load(p)) 737 if (task_contributes_to_load(p))
@@ -4134,7 +4129,7 @@ recheck:
4134 on_rq = p->on_rq; 4129 on_rq = p->on_rq;
4135 running = task_current(rq, p); 4130 running = task_current(rq, p);
4136 if (on_rq) 4131 if (on_rq)
4137 deactivate_task(rq, p, 0); 4132 dequeue_task(rq, p, 0);
4138 if (running) 4133 if (running)
4139 p->sched_class->put_prev_task(rq, p); 4134 p->sched_class->put_prev_task(rq, p);
4140 4135
@@ -4147,7 +4142,7 @@ recheck:
4147 if (running) 4142 if (running)
4148 p->sched_class->set_curr_task(rq); 4143 p->sched_class->set_curr_task(rq);
4149 if (on_rq) 4144 if (on_rq)
4150 activate_task(rq, p, 0); 4145 enqueue_task(rq, p, 0);
4151 4146
4152 check_class_changed(rq, p, prev_class, oldprio); 4147 check_class_changed(rq, p, prev_class, oldprio);
4153 task_rq_unlock(rq, p, &flags); 4148 task_rq_unlock(rq, p, &flags);
@@ -4998,9 +4993,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4998 * placed properly. 4993 * placed properly.
4999 */ 4994 */
5000 if (p->on_rq) { 4995 if (p->on_rq) {
5001 deactivate_task(rq_src, p, 0); 4996 dequeue_task(rq_src, p, 0);
5002 set_task_cpu(p, dest_cpu); 4997 set_task_cpu(p, dest_cpu);
5003 activate_task(rq_dest, p, 0); 4998 enqueue_task(rq_dest, p, 0);
5004 check_preempt_curr(rq_dest, p, 0); 4999 check_preempt_curr(rq_dest, p, 0);
5005 } 5000 }
5006done: 5001done:
@@ -7032,10 +7027,10 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
7032 7027
7033 on_rq = p->on_rq; 7028 on_rq = p->on_rq;
7034 if (on_rq) 7029 if (on_rq)
7035 deactivate_task(rq, p, 0); 7030 dequeue_task(rq, p, 0);
7036 __setscheduler(rq, p, SCHED_NORMAL, 0); 7031 __setscheduler(rq, p, SCHED_NORMAL, 0);
7037 if (on_rq) { 7032 if (on_rq) {
7038 activate_task(rq, p, 0); 7033 enqueue_task(rq, p, 0);
7039 resched_task(rq->curr); 7034 resched_task(rq->curr);
7040 } 7035 }
7041 7036
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 84adb2d66cbd..7c6414fc669d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4866,6 +4866,15 @@ static void nohz_balancer_kick(int cpu)
4866 return; 4866 return;
4867} 4867}
4868 4868
4869static inline void clear_nohz_tick_stopped(int cpu)
4870{
4871 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
4872 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
4873 atomic_dec(&nohz.nr_cpus);
4874 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
4875 }
4876}
4877
4869static inline void set_cpu_sd_state_busy(void) 4878static inline void set_cpu_sd_state_busy(void)
4870{ 4879{
4871 struct sched_domain *sd; 4880 struct sched_domain *sd;
@@ -4904,6 +4913,12 @@ void select_nohz_load_balancer(int stop_tick)
4904{ 4913{
4905 int cpu = smp_processor_id(); 4914 int cpu = smp_processor_id();
4906 4915
4916 /*
4917 * If this cpu is going down, then nothing needs to be done.
4918 */
4919 if (!cpu_active(cpu))
4920 return;
4921
4907 if (stop_tick) { 4922 if (stop_tick) {
4908 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) 4923 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
4909 return; 4924 return;
@@ -4914,6 +4929,18 @@ void select_nohz_load_balancer(int stop_tick)
4914 } 4929 }
4915 return; 4930 return;
4916} 4931}
4932
4933static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
4934 unsigned long action, void *hcpu)
4935{
4936 switch (action & ~CPU_TASKS_FROZEN) {
4937 case CPU_DYING:
4938 clear_nohz_tick_stopped(smp_processor_id());
4939 return NOTIFY_OK;
4940 default:
4941 return NOTIFY_DONE;
4942 }
4943}
4917#endif 4944#endif
4918 4945
4919static DEFINE_SPINLOCK(balancing); 4946static DEFINE_SPINLOCK(balancing);
@@ -5070,11 +5097,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
5070 * busy tick after returning from idle, we will update the busy stats. 5097 * busy tick after returning from idle, we will update the busy stats.
5071 */ 5098 */
5072 set_cpu_sd_state_busy(); 5099 set_cpu_sd_state_busy();
5073 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { 5100 clear_nohz_tick_stopped(cpu);
5074 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
5075 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
5076 atomic_dec(&nohz.nr_cpus);
5077 }
5078 5101
5079 /* 5102 /*
5080 * None are in tickless mode and hence no need for NOHZ idle load 5103 * None are in tickless mode and hence no need for NOHZ idle load
@@ -5590,6 +5613,7 @@ __init void init_sched_fair_class(void)
5590 5613
5591#ifdef CONFIG_NO_HZ 5614#ifdef CONFIG_NO_HZ
5592 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); 5615 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
5616 cpu_notifier(sched_ilb_notifier, 0);
5593#endif 5617#endif
5594#endif /* SMP */ 5618#endif /* SMP */
5595 5619
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3640ebbb466b..f42ae7fb5ec5 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1587,6 +1587,11 @@ static int push_rt_task(struct rq *rq)
1587 if (!next_task) 1587 if (!next_task)
1588 return 0; 1588 return 0;
1589 1589
1590#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1591 if (unlikely(task_running(rq, next_task)))
1592 return 0;
1593#endif
1594
1590retry: 1595retry:
1591 if (unlikely(next_task == rq->curr)) { 1596 if (unlikely(next_task == rq->curr)) {
1592 WARN_ON(1); 1597 WARN_ON(1);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1d7bca7f4f52..d117262deba3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -296,7 +296,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
296 if (__this_cpu_read(soft_watchdog_warn) == true) 296 if (__this_cpu_read(soft_watchdog_warn) == true)
297 return HRTIMER_RESTART; 297 return HRTIMER_RESTART;
298 298
299 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 299 printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
300 smp_processor_id(), duration, 300 smp_processor_id(), duration,
301 current->comm, task_pid_nr(current)); 301 current->comm, task_pid_nr(current));
302 print_modules(); 302 print_modules();