aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-31 11:32:31 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-05 16:30:01 -0500
commit9d45cf9e36bf9bcf16df6e1cbf049807c8402823 (patch)
tree2118a16701418af10d215d2174df7ee0a5cbe6bd /kernel
parenta146649bc19d5eba4f5bfac6720c5f252d517a71 (diff)
parent0cd5c3c80a0ebd68c08312fa7d8c13149cc61c4c (diff)
Merge branch 'x86/urgent' into x86/apic
Conflicts: arch/x86/mach-default/setup.c Semantic merge: arch/x86/kernel/irqinit_32.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c28
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/hrtimer.c41
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/module.c35
-rw-r--r--kernel/sched.c10
-rw-r--r--kernel/sched_fair.c32
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/smp.c36
-rw-r--r--kernel/time/tick-common.c26
-rw-r--r--kernel/trace/ftrace.c27
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_irqsoff.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
16 files changed, 229 insertions, 52 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c29831076e7..5a54ff42874 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) {
1115 } 1115 }
1116 write_unlock(&css_set_lock); 1116 write_unlock(&css_set_lock);
1117 1117
1118 list_del(&root->root_list); 1118 if (!list_empty(&root->root_list)) {
1119 root_count--; 1119 list_del(&root->root_list);
1120 root_count--;
1121 }
1120 1122
1121 mutex_unlock(&cgroup_mutex); 1123 mutex_unlock(&cgroup_mutex);
1122 1124
@@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2434 2436
2435 err_remove: 2437 err_remove:
2436 2438
2439 cgroup_lock_hierarchy(root);
2437 list_del(&cgrp->sibling); 2440 list_del(&cgrp->sibling);
2441 cgroup_unlock_hierarchy(root);
2438 root->number_of_cgroups--; 2442 root->number_of_cgroups--;
2439 2443
2440 err_destroy: 2444 err_destroy:
@@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
2507 for_each_subsys(cgrp->root, ss) { 2511 for_each_subsys(cgrp->root, ss) {
2508 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 2512 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2509 int refcnt; 2513 int refcnt;
2510 do { 2514 while (1) {
2511 /* We can only remove a CSS with a refcnt==1 */ 2515 /* We can only remove a CSS with a refcnt==1 */
2512 refcnt = atomic_read(&css->refcnt); 2516 refcnt = atomic_read(&css->refcnt);
2513 if (refcnt > 1) { 2517 if (refcnt > 1) {
@@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
2521 * css_tryget() to spin until we set the 2525 * css_tryget() to spin until we set the
2522 * CSS_REMOVED bits or abort 2526 * CSS_REMOVED bits or abort
2523 */ 2527 */
2524 } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); 2528 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
2529 break;
2530 cpu_relax();
2531 }
2525 } 2532 }
2526 done: 2533 done:
2527 for_each_subsys(cgrp->root, ss) { 2534 for_each_subsys(cgrp->root, ss) {
@@ -2991,20 +2998,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2991 mutex_unlock(&cgroup_mutex); 2998 mutex_unlock(&cgroup_mutex);
2992 return 0; 2999 return 0;
2993 } 3000 }
2994 task_lock(tsk);
2995 cg = tsk->cgroups;
2996 parent = task_cgroup(tsk, subsys->subsys_id);
2997 3001
2998 /* Pin the hierarchy */ 3002 /* Pin the hierarchy */
2999 if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { 3003 if (!atomic_inc_not_zero(&root->sb->s_active)) {
3000 /* We race with the final deactivate_super() */ 3004 /* We race with the final deactivate_super() */
3001 mutex_unlock(&cgroup_mutex); 3005 mutex_unlock(&cgroup_mutex);
3002 return 0; 3006 return 0;
3003 } 3007 }
3004 3008
3005 /* Keep the cgroup alive */ 3009 /* Keep the cgroup alive */
3010 task_lock(tsk);
3011 parent = task_cgroup(tsk, subsys->subsys_id);
3012 cg = tsk->cgroups;
3006 get_css_set(cg); 3013 get_css_set(cg);
3007 task_unlock(tsk); 3014 task_unlock(tsk);
3015
3008 mutex_unlock(&cgroup_mutex); 3016 mutex_unlock(&cgroup_mutex);
3009 3017
3010 /* Now do the VFS work to create a cgroup */ 3018 /* Now do the VFS work to create a cgroup */
@@ -3043,7 +3051,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3043 mutex_unlock(&inode->i_mutex); 3051 mutex_unlock(&inode->i_mutex);
3044 put_css_set(cg); 3052 put_css_set(cg);
3045 3053
3046 deactivate_super(parent->root->sb); 3054 deactivate_super(root->sb);
3047 /* The cgroup is still accessible in the VFS, but 3055 /* The cgroup is still accessible in the VFS, but
3048 * we're not going to try to rmdir() it at this 3056 * we're not going to try to rmdir() it at this
3049 * point. */ 3057 * point. */
@@ -3069,7 +3077,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3069 mutex_lock(&cgroup_mutex); 3077 mutex_lock(&cgroup_mutex);
3070 put_css_set(cg); 3078 put_css_set(cg);
3071 mutex_unlock(&cgroup_mutex); 3079 mutex_unlock(&cgroup_mutex);
3072 deactivate_super(parent->root->sb); 3080 deactivate_super(root->sb);
3073 return ret; 3081 return ret;
3074} 3082}
3075 3083
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a85678865c5..f76db9dcaa0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,6 +61,14 @@
61#include <linux/cgroup.h> 61#include <linux/cgroup.h>
62 62
63/* 63/*
64 * Workqueue for cpuset related tasks.
65 *
66 * Using kevent workqueue may cause deadlock when memory_migrate
67 * is set. So we create a separate workqueue thread for cpuset.
68 */
69static struct workqueue_struct *cpuset_wq;
70
71/*
64 * Tracks how many cpusets are currently defined in system. 72 * Tracks how many cpusets are currently defined in system.
65 * When there is only one cpuset (the root cpuset) we can 73 * When there is only one cpuset (the root cpuset) we can
66 * short circuit some hooks. 74 * short circuit some hooks.
@@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
831 */ 839 */
832static void async_rebuild_sched_domains(void) 840static void async_rebuild_sched_domains(void)
833{ 841{
834 schedule_work(&rebuild_sched_domains_work); 842 queue_work(cpuset_wq, &rebuild_sched_domains_work);
835} 843}
836 844
837/* 845/*
@@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void)
2111 2119
2112 hotcpu_notifier(cpuset_track_online_cpus, 0); 2120 hotcpu_notifier(cpuset_track_online_cpus, 0);
2113 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2121 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2122
2123 cpuset_wq = create_singlethread_workqueue("cpuset");
2124 BUG_ON(!cpuset_wq);
2114} 2125}
2115 2126
2116/** 2127/**
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f33afb0407b..f394d2a42ca 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
501 continue; 501 continue;
502 timer = rb_entry(base->first, struct hrtimer, node); 502 timer = rb_entry(base->first, struct hrtimer, node);
503 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 503 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
504 /*
505 * clock_was_set() has changed base->offset so the
506 * result might be negative. Fix it up to prevent a
507 * false positive in clockevents_program_event()
508 */
509 if (expires.tv64 < 0)
510 expires.tv64 = 0;
504 if (expires.tv64 < cpu_base->expires_next.tv64) 511 if (expires.tv64 < cpu_base->expires_next.tv64)
505 cpu_base->expires_next = expires; 512 cpu_base->expires_next = expires;
506 } 513 }
@@ -1158,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer)
1158 1165
1159#ifdef CONFIG_HIGH_RES_TIMERS 1166#ifdef CONFIG_HIGH_RES_TIMERS
1160 1167
1168static int force_clock_reprogram;
1169
1170/*
1171 * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1172 * is hanging, which could happen with something that slows the interrupt
1173 * such as the tracing. Then we force the clock reprogramming for each future
1174 * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1175 * threshold that we will overwrite.
1176 * The next tick event will be scheduled to 3 times we currently spend on
1177 * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1178 * 1/4 of their time to process the hrtimer interrupts. This is enough to
1179 * let it running without serious starvation.
1180 */
1181
1182static inline void
1183hrtimer_interrupt_hanging(struct clock_event_device *dev,
1184 ktime_t try_time)
1185{
1186 force_clock_reprogram = 1;
1187 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1188 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1189 "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
1190}
1161/* 1191/*
1162 * High resolution timer interrupt 1192 * High resolution timer interrupt
1163 * Called with interrupts disabled 1193 * Called with interrupts disabled
@@ -1167,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1167 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1197 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1168 struct hrtimer_clock_base *base; 1198 struct hrtimer_clock_base *base;
1169 ktime_t expires_next, now; 1199 ktime_t expires_next, now;
1200 int nr_retries = 0;
1170 int i; 1201 int i;
1171 1202
1172 BUG_ON(!cpu_base->hres_active); 1203 BUG_ON(!cpu_base->hres_active);
@@ -1174,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1174 dev->next_event.tv64 = KTIME_MAX; 1205 dev->next_event.tv64 = KTIME_MAX;
1175 1206
1176 retry: 1207 retry:
1208 /* 5 retries is enough to notice a hang */
1209 if (!(++nr_retries % 5))
1210 hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1211
1177 now = ktime_get(); 1212 now = ktime_get();
1178 1213
1179 expires_next.tv64 = KTIME_MAX; 1214 expires_next.tv64 = KTIME_MAX;
@@ -1226,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1226 1261
1227 /* Reprogramming necessary ? */ 1262 /* Reprogramming necessary ? */
1228 if (expires_next.tv64 != KTIME_MAX) { 1263 if (expires_next.tv64 != KTIME_MAX) {
1229 if (tick_program_event(expires_next, 0)) 1264 if (tick_program_event(expires_next, force_clock_reprogram))
1230 goto retry; 1265 goto retry;
1231 } 1266 }
1232} 1267}
@@ -1580,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1580 break; 1615 break;
1581 1616
1582#ifdef CONFIG_HOTPLUG_CPU 1617#ifdef CONFIG_HOTPLUG_CPU
1618 case CPU_DYING:
1619 case CPU_DYING_FROZEN:
1620 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
1621 break;
1583 case CPU_DEAD: 1622 case CPU_DEAD:
1584 case CPU_DEAD_FROZEN: 1623 case CPU_DEAD_FROZEN:
1585 { 1624 {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c248eba98b4..122fef4b0bd 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -386,6 +386,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
386out_unlock: 386out_unlock:
387 spin_unlock(&desc->lock); 387 spin_unlock(&desc->lock);
388} 388}
389EXPORT_SYMBOL_GPL(handle_level_irq);
389 390
390/** 391/**
391 * handle_fasteoi_irq - irq handler for transparent controllers 392 * handle_fasteoi_irq - irq handler for transparent controllers
@@ -596,6 +597,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
596 } 597 }
597 spin_unlock_irqrestore(&desc->lock, flags); 598 spin_unlock_irqrestore(&desc->lock, flags);
598} 599}
600EXPORT_SYMBOL_GPL(__set_irq_handler);
599 601
600void 602void
601set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, 603set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 666260e4c06..7f9b80434e3 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -78,7 +78,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
78 desc = irq_desc_ptrs[irq]; 78 desc = irq_desc_ptrs[irq];
79 79
80 if (desc && old_desc != desc) 80 if (desc && old_desc != desc)
81 goto out_unlock; 81 goto out_unlock;
82 82
83 node = cpu_to_node(cpu); 83 node = cpu_to_node(cpu);
84 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 84 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
@@ -97,10 +97,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
97 } 97 }
98 98
99 irq_desc_ptrs[irq] = desc; 99 irq_desc_ptrs[irq] = desc;
100 spin_unlock_irqrestore(&sparse_irq_lock, flags);
100 101
101 /* free the old one */ 102 /* free the old one */
102 free_one_irq_desc(old_desc, desc); 103 free_one_irq_desc(old_desc, desc);
104 spin_unlock(&old_desc->lock);
103 kfree(old_desc); 105 kfree(old_desc);
106 spin_lock(&desc->lock);
107
108 return desc;
104 109
105out_unlock: 110out_unlock:
106 spin_unlock_irqrestore(&sparse_irq_lock, flags); 111 spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/module.c b/kernel/module.c
index e8b51d41dd7..ba22484a987 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
573/* Init the unload section of the module. */ 573/* Init the unload section of the module. */
574static void module_unload_init(struct module *mod) 574static void module_unload_init(struct module *mod)
575{ 575{
576 unsigned int i; 576 int cpu;
577 577
578 INIT_LIST_HEAD(&mod->modules_which_use_me); 578 INIT_LIST_HEAD(&mod->modules_which_use_me);
579 for (i = 0; i < NR_CPUS; i++) 579 for_each_possible_cpu(cpu)
580 local_set(&mod->ref[i].count, 0); 580 local_set(__module_ref_addr(mod, cpu), 0);
581 /* Hold reference count during initialization. */ 581 /* Hold reference count during initialization. */
582 local_set(&mod->ref[raw_smp_processor_id()].count, 1); 582 local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
583 /* Backwards compatibility macros put refcount during init. */ 583 /* Backwards compatibility macros put refcount during init. */
584 mod->waiter = current; 584 mod->waiter = current;
585} 585}
@@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
717 717
718unsigned int module_refcount(struct module *mod) 718unsigned int module_refcount(struct module *mod)
719{ 719{
720 unsigned int i, total = 0; 720 unsigned int total = 0;
721 int cpu;
721 722
722 for (i = 0; i < NR_CPUS; i++) 723 for_each_possible_cpu(cpu)
723 total += local_read(&mod->ref[i].count); 724 total += local_read(__module_ref_addr(mod, cpu));
724 return total; 725 return total;
725} 726}
726EXPORT_SYMBOL(module_refcount); 727EXPORT_SYMBOL(module_refcount);
@@ -894,7 +895,7 @@ void module_put(struct module *module)
894{ 895{
895 if (module) { 896 if (module) {
896 unsigned int cpu = get_cpu(); 897 unsigned int cpu = get_cpu();
897 local_dec(&module->ref[cpu].count); 898 local_dec(__module_ref_addr(module, cpu));
898 /* Maybe they're waiting for us to drop reference? */ 899 /* Maybe they're waiting for us to drop reference? */
899 if (unlikely(!module_is_live(module))) 900 if (unlikely(!module_is_live(module)))
900 wake_up_process(module->waiter); 901 wake_up_process(module->waiter);
@@ -1464,7 +1465,10 @@ static void free_module(struct module *mod)
1464 kfree(mod->args); 1465 kfree(mod->args);
1465 if (mod->percpu) 1466 if (mod->percpu)
1466 percpu_modfree(mod->percpu); 1467 percpu_modfree(mod->percpu);
1467 1468#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
1469 if (mod->refptr)
1470 percpu_modfree(mod->refptr);
1471#endif
1468 /* Free lock-classes: */ 1472 /* Free lock-classes: */
1469 lockdep_free_key_range(mod->module_core, mod->core_size); 1473 lockdep_free_key_range(mod->module_core, mod->core_size);
1470 1474
@@ -2011,6 +2015,14 @@ static noinline struct module *load_module(void __user *umod,
2011 if (err < 0) 2015 if (err < 0)
2012 goto free_mod; 2016 goto free_mod;
2013 2017
2018#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2019 mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
2020 mod->name);
2021 if (!mod->refptr) {
2022 err = -ENOMEM;
2023 goto free_mod;
2024 }
2025#endif
2014 if (pcpuindex) { 2026 if (pcpuindex) {
2015 /* We have a special allocation for this section. */ 2027 /* We have a special allocation for this section. */
2016 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, 2028 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
@@ -2018,7 +2030,7 @@ static noinline struct module *load_module(void __user *umod,
2018 mod->name); 2030 mod->name);
2019 if (!percpu) { 2031 if (!percpu) {
2020 err = -ENOMEM; 2032 err = -ENOMEM;
2021 goto free_mod; 2033 goto free_percpu;
2022 } 2034 }
2023 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2035 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2024 mod->percpu = percpu; 2036 mod->percpu = percpu;
@@ -2282,6 +2294,9 @@ static noinline struct module *load_module(void __user *umod,
2282 free_percpu: 2294 free_percpu:
2283 if (percpu) 2295 if (percpu)
2284 percpu_modfree(percpu); 2296 percpu_modfree(percpu);
2297#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2298 percpu_modfree(mod->refptr);
2299#endif
2285 free_mod: 2300 free_mod:
2286 kfree(args); 2301 kfree(args);
2287 free_hdr: 2302 free_hdr:
diff --git a/kernel/sched.c b/kernel/sched.c
index c71d7d501ed..400756169aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2266,6 +2266,16 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2266 if (!sched_feat(SYNC_WAKEUPS)) 2266 if (!sched_feat(SYNC_WAKEUPS))
2267 sync = 0; 2267 sync = 0;
2268 2268
2269 if (!sync) {
2270 if (current->se.avg_overlap < sysctl_sched_migration_cost &&
2271 p->se.avg_overlap < sysctl_sched_migration_cost)
2272 sync = 1;
2273 } else {
2274 if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
2275 p->se.avg_overlap >= sysctl_sched_migration_cost)
2276 sync = 0;
2277 }
2278
2269#ifdef CONFIG_SMP 2279#ifdef CONFIG_SMP
2270 if (sched_feat(LB_WAKEUP_UPDATE)) { 2280 if (sched_feat(LB_WAKEUP_UPDATE)) {
2271 struct sched_domain *sd; 2281 struct sched_domain *sd;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5cc1c162044..a7e50ba185a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -719,7 +719,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
719 __enqueue_entity(cfs_rq, se); 719 __enqueue_entity(cfs_rq, se);
720} 720}
721 721
722static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 722static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
723{ 723{
724 if (cfs_rq->last == se) 724 if (cfs_rq->last == se)
725 cfs_rq->last = NULL; 725 cfs_rq->last = NULL;
@@ -728,6 +728,12 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
728 cfs_rq->next = NULL; 728 cfs_rq->next = NULL;
729} 729}
730 730
731static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
732{
733 for_each_sched_entity(se)
734 __clear_buddies(cfs_rq_of(se), se);
735}
736
731static void 737static void
732dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 738dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
733{ 739{
@@ -768,8 +774,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
768 774
769 ideal_runtime = sched_slice(cfs_rq, curr); 775 ideal_runtime = sched_slice(cfs_rq, curr);
770 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 776 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
771 if (delta_exec > ideal_runtime) 777 if (delta_exec > ideal_runtime) {
772 resched_task(rq_of(cfs_rq)->curr); 778 resched_task(rq_of(cfs_rq)->curr);
779 /*
780 * The current task ran long enough, ensure it doesn't get
781 * re-elected due to buddy favours.
782 */
783 clear_buddies(cfs_rq, curr);
784 }
773} 785}
774 786
775static void 787static void
@@ -1179,20 +1191,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1179 int idx, unsigned long load, unsigned long this_load, 1191 int idx, unsigned long load, unsigned long this_load,
1180 unsigned int imbalance) 1192 unsigned int imbalance)
1181{ 1193{
1182 struct task_struct *curr = this_rq->curr;
1183 struct task_group *tg;
1184 unsigned long tl = this_load; 1194 unsigned long tl = this_load;
1185 unsigned long tl_per_task; 1195 unsigned long tl_per_task;
1196 struct task_group *tg;
1186 unsigned long weight; 1197 unsigned long weight;
1187 int balanced; 1198 int balanced;
1188 1199
1189 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1200 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1190 return 0; 1201 return 0;
1191 1202
1192 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1193 p->se.avg_overlap > sysctl_sched_migration_cost))
1194 sync = 0;
1195
1196 /* 1203 /*
1197 * If sync wakeup then subtract the (maximum possible) 1204 * If sync wakeup then subtract the (maximum possible)
1198 * effect of the currently running task from the load 1205 * effect of the currently running task from the load
@@ -1419,9 +1426,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1419 if (!sched_feat(WAKEUP_PREEMPT)) 1426 if (!sched_feat(WAKEUP_PREEMPT))
1420 return; 1427 return;
1421 1428
1422 if (sched_feat(WAKEUP_OVERLAP) && (sync || 1429 if (sched_feat(WAKEUP_OVERLAP) && sync) {
1423 (se->avg_overlap < sysctl_sched_migration_cost &&
1424 pse->avg_overlap < sysctl_sched_migration_cost))) {
1425 resched_task(curr); 1430 resched_task(curr);
1426 return; 1431 return;
1427 } 1432 }
@@ -1452,6 +1457,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1452 1457
1453 do { 1458 do {
1454 se = pick_next_entity(cfs_rq); 1459 se = pick_next_entity(cfs_rq);
1460 /*
1461 * If se was a buddy, clear it so that it will have to earn
1462 * the favour again.
1463 */
1464 __clear_buddies(cfs_rq, se);
1455 set_next_entity(cfs_rq, se); 1465 set_next_entity(cfs_rq, se);
1456 cfs_rq = group_cfs_rq(se); 1466 cfs_rq = group_cfs_rq(se);
1457 } while (cfs_rq); 1467 } while (cfs_rq);
diff --git a/kernel/signal.c b/kernel/signal.c
index e73759783dc..b6b36768b75 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
909 } 909 }
910#endif 910#endif
911 printk("\n"); 911 printk("\n");
912 preempt_disable();
912 show_regs(regs); 913 show_regs(regs);
914 preempt_enable();
913} 915}
914 916
915static int __init setup_print_fatal_signals(char *str) 917static int __init setup_print_fatal_signals(char *str)
diff --git a/kernel/smp.c b/kernel/smp.c
index 5cfa0e5e3e8..bbedbb7efe3 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -18,6 +18,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
18enum { 18enum {
19 CSD_FLAG_WAIT = 0x01, 19 CSD_FLAG_WAIT = 0x01,
20 CSD_FLAG_ALLOC = 0x02, 20 CSD_FLAG_ALLOC = 0x02,
21 CSD_FLAG_LOCK = 0x04,
21}; 22};
22 23
23struct call_function_data { 24struct call_function_data {
@@ -186,6 +187,9 @@ void generic_smp_call_function_single_interrupt(void)
186 if (data_flags & CSD_FLAG_WAIT) { 187 if (data_flags & CSD_FLAG_WAIT) {
187 smp_wmb(); 188 smp_wmb();
188 data->flags &= ~CSD_FLAG_WAIT; 189 data->flags &= ~CSD_FLAG_WAIT;
190 } else if (data_flags & CSD_FLAG_LOCK) {
191 smp_wmb();
192 data->flags &= ~CSD_FLAG_LOCK;
189 } else if (data_flags & CSD_FLAG_ALLOC) 193 } else if (data_flags & CSD_FLAG_ALLOC)
190 kfree(data); 194 kfree(data);
191 } 195 }
@@ -196,6 +200,8 @@ void generic_smp_call_function_single_interrupt(void)
196 } 200 }
197} 201}
198 202
203static DEFINE_PER_CPU(struct call_single_data, csd_data);
204
199/* 205/*
200 * smp_call_function_single - Run a function on a specific CPU 206 * smp_call_function_single - Run a function on a specific CPU
201 * @func: The function to run. This must be fast and non-blocking. 207 * @func: The function to run. This must be fast and non-blocking.
@@ -224,14 +230,38 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
224 func(info); 230 func(info);
225 local_irq_restore(flags); 231 local_irq_restore(flags);
226 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 232 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
227 struct call_single_data *data = NULL; 233 struct call_single_data *data;
228 234
229 if (!wait) { 235 if (!wait) {
236 /*
237 * We are calling a function on a single CPU
238 * and we are not going to wait for it to finish.
239 * We first try to allocate the data, but if we
240 * fail, we fall back to use a per cpu data to pass
241 * the information to that CPU. Since all callers
242 * of this code will use the same data, we must
243 * synchronize the callers to prevent a new caller
244 * from corrupting the data before the callee
245 * can access it.
246 *
247 * The CSD_FLAG_LOCK is used to let us know when
248 * the IPI handler is done with the data.
249 * The first caller will set it, and the callee
250 * will clear it. The next caller must wait for
251 * it to clear before we set it again. This
252 * will make sure the callee is done with the
253 * data before a new caller will use it.
254 */
230 data = kmalloc(sizeof(*data), GFP_ATOMIC); 255 data = kmalloc(sizeof(*data), GFP_ATOMIC);
231 if (data) 256 if (data)
232 data->flags = CSD_FLAG_ALLOC; 257 data->flags = CSD_FLAG_ALLOC;
233 } 258 else {
234 if (!data) { 259 data = &per_cpu(csd_data, me);
260 while (data->flags & CSD_FLAG_LOCK)
261 cpu_relax();
262 data->flags = CSD_FLAG_LOCK;
263 }
264 } else {
235 data = &d; 265 data = &d;
236 data->flags = CSD_FLAG_WAIT; 266 data->flags = CSD_FLAG_WAIT;
237 } 267 }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 63e05d423a0..21a5ca84951 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -274,6 +274,21 @@ out_bc:
274} 274}
275 275
276/* 276/*
277 * Transfer the do_timer job away from a dying cpu.
278 *
279 * Called with interrupts disabled.
280 */
281static void tick_handover_do_timer(int *cpup)
282{
283 if (*cpup == tick_do_timer_cpu) {
284 int cpu = cpumask_first(cpu_online_mask);
285
286 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
287 TICK_DO_TIMER_NONE;
288 }
289}
290
291/*
277 * Shutdown an event device on a given cpu: 292 * Shutdown an event device on a given cpu:
278 * 293 *
279 * This is called on a life CPU, when a CPU is dead. So we cannot 294 * This is called on a life CPU, when a CPU is dead. So we cannot
@@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup)
297 clockevents_exchange_device(dev, NULL); 312 clockevents_exchange_device(dev, NULL);
298 td->evtdev = NULL; 313 td->evtdev = NULL;
299 } 314 }
300 /* Transfer the do_timer job away from this cpu */
301 if (*cpup == tick_do_timer_cpu) {
302 int cpu = cpumask_first(cpu_online_mask);
303
304 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
305 TICK_DO_TIMER_NONE;
306 }
307 spin_unlock_irqrestore(&tick_device_lock, flags); 315 spin_unlock_irqrestore(&tick_device_lock, flags);
308} 316}
309 317
@@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
357 tick_broadcast_oneshot_control(reason); 365 tick_broadcast_oneshot_control(reason);
358 break; 366 break;
359 367
368 case CLOCK_EVT_NOTIFY_CPU_DYING:
369 tick_handover_do_timer(dev);
370 break;
371
360 case CLOCK_EVT_NOTIFY_CPU_DEAD: 372 case CLOCK_EVT_NOTIFY_CPU_DEAD:
361 tick_shutdown_broadcast_oneshot(dev); 373 tick_shutdown_broadcast_oneshot(dev);
362 tick_shutdown_broadcast(dev); 374 tick_shutdown_broadcast(dev);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09d..7dcf6e9f2b0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
17#include <linux/clocksource.h> 17#include <linux/clocksource.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/suspend.h>
20#include <linux/debugfs.h> 21#include <linux/debugfs.h>
21#include <linux/hardirq.h> 22#include <linux/hardirq.h>
22#include <linux/kthread.h> 23#include <linux/kthread.h>
@@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1965#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1966#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1966 1967
1967static atomic_t ftrace_graph_active; 1968static atomic_t ftrace_graph_active;
1969static struct notifier_block ftrace_suspend_notifier;
1968 1970
1969int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) 1971int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
1970{ 1972{
@@ -2043,6 +2045,27 @@ static int start_graph_tracing(void)
2043 return ret; 2045 return ret;
2044} 2046}
2045 2047
2048/*
2049 * Hibernation protection.
2050 * The state of the current task is too much unstable during
2051 * suspend/restore to disk. We want to protect against that.
2052 */
2053static int
2054ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
2055 void *unused)
2056{
2057 switch (state) {
2058 case PM_HIBERNATION_PREPARE:
2059 pause_graph_tracing();
2060 break;
2061
2062 case PM_POST_HIBERNATION:
2063 unpause_graph_tracing();
2064 break;
2065 }
2066 return NOTIFY_DONE;
2067}
2068
2046int register_ftrace_graph(trace_func_graph_ret_t retfunc, 2069int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2047 trace_func_graph_ent_t entryfunc) 2070 trace_func_graph_ent_t entryfunc)
2048{ 2071{
@@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2050 2073
2051 mutex_lock(&ftrace_sysctl_lock); 2074 mutex_lock(&ftrace_sysctl_lock);
2052 2075
2076 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2077 register_pm_notifier(&ftrace_suspend_notifier);
2078
2053 atomic_inc(&ftrace_graph_active); 2079 atomic_inc(&ftrace_graph_active);
2054 ret = start_graph_tracing(); 2080 ret = start_graph_tracing();
2055 if (ret) { 2081 if (ret) {
@@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void)
2075 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2101 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2076 ftrace_graph_entry = ftrace_graph_entry_stub; 2102 ftrace_graph_entry = ftrace_graph_entry_stub;
2077 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2103 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2104 unregister_pm_notifier(&ftrace_suspend_notifier);
2078 2105
2079 mutex_unlock(&ftrace_sysctl_lock); 2106 mutex_unlock(&ftrace_sysctl_lock);
2080} 2107}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662e..bd38c5cfd8a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
246 return 0; 246 return 0;
247} 247}
248 248
249#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) 249#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
250 250
251/* 251/*
252 * head_page == tail_page && head == tail then buffer is empty. 252 * head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1025 } 1025 }
1026 1026
1027 if (next_page == head_page) { 1027 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) { 1028 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 /* reset write */
1030 if (tail <= BUF_PAGE_SIZE)
1031 local_set(&tail_page->write, tail);
1032 goto out_unlock; 1029 goto out_unlock;
1033 }
1034 1030
1035 /* tail_page has not moved yet? */ 1031 /* tail_page has not moved yet? */
1036 if (tail_page == cpu_buffer->tail_page) { 1032 if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1105 return event; 1101 return event;
1106 1102
1107 out_unlock: 1103 out_unlock:
1104 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail);
1107
1108 __raw_spin_unlock(&cpu_buffer->lock); 1108 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1109 local_irq_restore(flags);
1110 return NULL; 1110 return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2174 2174
2175 cpu_buffer->overrun = 0; 2175 cpu_buffer->overrun = 0;
2176 cpu_buffer->entries = 0; 2176 cpu_buffer->entries = 0;
2177
2178 cpu_buffer->write_stamp = 0;
2179 cpu_buffer->read_stamp = 0;
2177} 2180}
2178 2181
2179/** 2182/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add9..17bb88d86ac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
40 40
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 42
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency;
44unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
45 45
46/* 46/*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
3736 * it if we decide to change what log level the ftrace dump 3736 * it if we decide to change what log level the ftrace dump
3737 * should be at. 3737 * should be at.
3738 */ 3738 */
3739#define KERN_TRACE KERN_INFO 3739#define KERN_TRACE KERN_EMERG
3740 3740
3741static void 3741static void
3742trace_printk_seq(struct trace_seq *s) 3742trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
3770 dump_ran = 1; 3770 dump_ran = 1;
3771 3771
3772 /* No turning back! */ 3772 /* No turning back! */
3773 tracing_off();
3773 ftrace_kill(); 3774 ftrace_kill();
3774 3775
3775 for_each_tracing_cpu(cpu) { 3776 for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8..62a78d94353 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
380 380
381static void __irqsoff_tracer_init(struct trace_array *tr) 381static void __irqsoff_tracer_init(struct trace_array *tr)
382{ 382{
383 tracing_max_latency = 0;
383 irqsoff_trace = tr; 384 irqsoff_trace = tr;
384 /* make sure that the tracer is visible */ 385 /* make sure that the tracer is visible */
385 smp_wmb(); 386 smp_wmb();
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e3..42ae1e77b6b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
333 333
334static int wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
335{ 335{
336 tracing_max_latency = 0;
336 wakeup_trace = tr; 337 wakeup_trace = tr;
337 start_wakeup_tracer(tr); 338 start_wakeup_tracer(tr);
338 return 0; 339 return 0;