aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c3
-rw-r--r--kernel/audit_tree.c13
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/cpu.c26
-rw-r--r--kernel/cpuset.c18
-rw-r--r--kernel/exit.c40
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/futex.c60
-rw-r--r--kernel/hrtimer.c171
-rw-r--r--kernel/hw_breakpoint.c146
-rw-r--r--kernel/irq/autoprobe.c20
-rw-r--r--kernel/irq/chip.c86
-rw-r--r--kernel/irq/handle.c22
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/manage.c50
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/irq/numa_migrate.c8
-rw-r--r--kernel/irq/pm.c8
-rw-r--r--kernel/irq/proc.c4
-rw-r--r--kernel/irq/spurious.c14
-rw-r--r--kernel/kexec.c61
-rw-r--r--kernel/kgdb.c56
-rw-r--r--kernel/ksysfs.c21
-rw-r--r--kernel/kthread.c23
-rw-r--r--kernel/lockdep.c47
-rw-r--r--kernel/module.c191
-rw-r--r--kernel/mutex-debug.h12
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/params.c8
-rw-r--r--kernel/perf_event.c198
-rw-r--r--kernel/pid.c12
-rw-r--r--kernel/power/console.c7
-rw-r--r--kernel/printk.c119
-rw-r--r--kernel/rcutorture.c8
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/resource.c26
-rw-r--r--kernel/rtmutex-debug.c4
-rw-r--r--kernel/rtmutex.c106
-rw-r--r--kernel/sched.c836
-rw-r--r--kernel/sched_clock.c23
-rw-r--r--kernel/sched_cpupri.c10
-rw-r--r--kernel/sched_cpupri.h2
-rw-r--r--kernel/sched_debug.c17
-rw-r--r--kernel/sched_fair.c208
-rw-r--r--kernel/sched_features.h5
-rw-r--r--kernel/sched_idletask.c8
-rw-r--r--kernel/sched_rt.c66
-rw-r--r--kernel/signal.c63
-rw-r--r--kernel/smp.c35
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c54
-rw-r--r--kernel/spinlock.c306
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c50
-rw-r--r--kernel/sysctl_binary.c7
-rw-r--r--kernel/time/clockevents.c32
-rw-r--r--kernel/time/tick-broadcast.c42
-rw-r--r--kernel/time/tick-common.c20
-rw-r--r--kernel/time/tick-internal.h1
-rw-r--r--kernel/time/timecompare.c2
-rw-r--r--kernel/time/timer_list.c15
-rw-r--r--kernel/time/timer_stats.c18
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/ftrace.c30
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c45
-rw-r--r--kernel/trace/trace.c307
-rw-r--r--kernel/trace/trace.h27
-rw-r--r--kernel/trace/trace_clock.c8
-rw-r--r--kernel/trace/trace_event_profile.c6
-rw-r--r--kernel/trace/trace_events.c41
-rw-r--r--kernel/trace/trace_export.c4
-rw-r--r--kernel/trace/trace_functions_graph.c169
-rw-r--r--kernel/trace/trace_hw_branches.c51
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c46
-rw-r--r--kernel/trace/trace_ksym.c61
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c4
-rw-r--r--kernel/trace/trace_stack.c16
-rw-r--r--kernel/trace/trace_syscalls.c18
82 files changed, 2446 insertions, 1932 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 9a4715a2f6bf..a6605ca921b6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -536,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
536 do_div(elapsed, AHZ); 536 do_div(elapsed, AHZ);
537 ac.ac_btime = get_seconds() - elapsed; 537 ac.ac_btime = get_seconds() - elapsed;
538 /* we really need to bite the bullet and change layout */ 538 /* we really need to bite the bullet and change layout */
539 current_uid_gid(&ac.ac_uid, &ac.ac_gid); 539 ac.ac_uid = orig_cred->uid;
540 ac.ac_gid = orig_cred->gid;
540#if ACCT_VERSION==2 541#if ACCT_VERSION==2
541 ac.ac_ahz = AHZ; 542 ac.ac_ahz = AHZ;
542#endif 543#endif
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
277 owner->root = NULL; 277 owner->root = NULL;
278 } 278 }
279 279
280 for (i = j = 0; i < size; i++, j++) { 280 for (i = j = 0; j <= size; i++, j++) {
281 struct audit_tree *s; 281 struct audit_tree *s;
282 if (&chunk->owners[j] == p) { 282 if (&chunk->owners[j] == p) {
283 list_del_init(&p->list); 283 list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
290 if (!s) /* result of earlier fallback */ 290 if (!s) /* result of earlier fallback */
291 continue; 291 continue;
292 get_tree(s); 292 get_tree(s);
293 list_replace_init(&chunk->owners[i].list, &new->owners[j].list); 293 list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
294 } 294 }
295 295
296 list_replace_rcu(&chunk->hash, &new->hash); 296 list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
373 for (n = 0; n < old->count; n++) { 373 for (n = 0; n < old->count; n++) {
374 if (old->owners[n].owner == tree) { 374 if (old->owners[n].owner == tree) {
375 spin_unlock(&hash_lock); 375 spin_unlock(&hash_lock);
376 put_inotify_watch(watch); 376 put_inotify_watch(&old->watch);
377 return 0; 377 return 0;
378 } 378 }
379 } 379 }
380 spin_unlock(&hash_lock); 380 spin_unlock(&hash_lock);
381 381
382 chunk = alloc_chunk(old->count + 1); 382 chunk = alloc_chunk(old->count + 1);
383 if (!chunk) 383 if (!chunk) {
384 put_inotify_watch(&old->watch);
384 return -ENOMEM; 385 return -ENOMEM;
386 }
385 387
386 mutex_lock(&inode->inotify_mutex); 388 mutex_lock(&inode->inotify_mutex);
387 if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { 389 if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
425 spin_unlock(&hash_lock); 427 spin_unlock(&hash_lock);
426 inotify_evict_watch(&old->watch); 428 inotify_evict_watch(&old->watch);
427 mutex_unlock(&inode->inotify_mutex); 429 mutex_unlock(&inode->inotify_mutex);
428 put_inotify_watch(&old->watch); 430 put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
431 put_inotify_watch(&old->watch); /* and kill it */
429 return 0; 432 return 0;
430} 433}
431 434
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
12 12
13void foo(void) 13void foo(void)
14{ 14{
15 /* The enum constants to put into include/linux/bounds.h */ 15 /* The enum constants to put into include/generated/bounds.h */
16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
18 /* End of constants */ 18 /* End of constants */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 7c4e2713df0a..1c8ddd6ee940 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -209,9 +209,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 cpu_hotplug_begin(); 211 cpu_hotplug_begin();
212 set_cpu_active(cpu, false);
212 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, 213 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
213 hcpu, -1, &nr_calls); 214 hcpu, -1, &nr_calls);
214 if (err == NOTIFY_BAD) { 215 if (err == NOTIFY_BAD) {
216 set_cpu_active(cpu, true);
217
215 nr_calls--; 218 nr_calls--;
216 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 219 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
217 hcpu, nr_calls, NULL); 220 hcpu, nr_calls, NULL);
@@ -223,11 +226,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
223 226
224 /* Ensure that we are not runnable on dying cpu */ 227 /* Ensure that we are not runnable on dying cpu */
225 cpumask_copy(old_allowed, &current->cpus_allowed); 228 cpumask_copy(old_allowed, &current->cpus_allowed);
226 set_cpus_allowed_ptr(current, 229 set_cpus_allowed_ptr(current, cpu_active_mask);
227 cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
228 230
229 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 231 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
230 if (err) { 232 if (err) {
233 set_cpu_active(cpu, true);
231 /* CPU didn't die: tell everyone. Can't complain. */ 234 /* CPU didn't die: tell everyone. Can't complain. */
232 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 235 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
233 hcpu) == NOTIFY_BAD) 236 hcpu) == NOTIFY_BAD)
@@ -278,23 +281,8 @@ int __ref cpu_down(unsigned int cpu)
278 goto out; 281 goto out;
279 } 282 }
280 283
281 set_cpu_active(cpu, false);
282
283 /*
284 * Make sure the all cpus did the reschedule and are not
285 * using stale version of the cpu_active_mask.
286 * This is not strictly necessary becuase stop_machine()
287 * that we run down the line already provides the required
288 * synchronization. But it's really a side effect and we do not
289 * want to depend on the innards of the stop_machine here.
290 */
291 synchronize_sched();
292
293 err = _cpu_down(cpu, 0); 284 err = _cpu_down(cpu, 0);
294 285
295 if (cpu_online(cpu))
296 set_cpu_active(cpu, true);
297
298out: 286out:
299 cpu_maps_update_done(); 287 cpu_maps_update_done();
300 stop_machine_destroy(); 288 stop_machine_destroy();
@@ -383,10 +371,12 @@ int disable_nonboot_cpus(void)
383 return error; 371 return error;
384 cpu_maps_update_begin(); 372 cpu_maps_update_begin();
385 first_cpu = cpumask_first(cpu_online_mask); 373 first_cpu = cpumask_first(cpu_online_mask);
386 /* We take down all of the non-boot CPUs in one shot to avoid races 374 /*
375 * We take down all of the non-boot CPUs in one shot to avoid races
387 * with the userspace trying to use the CPU hotplug at the same time 376 * with the userspace trying to use the CPU hotplug at the same time
388 */ 377 */
389 cpumask_clear(frozen_cpus); 378 cpumask_clear(frozen_cpus);
379
390 printk("Disabling non-boot CPUs ...\n"); 380 printk("Disabling non-boot CPUs ...\n");
391 for_each_online_cpu(cpu) { 381 for_each_online_cpu(cpu) {
392 if (cpu == first_cpu) 382 if (cpu == first_cpu)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3cf2183b472d..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
737{ 737{
738} 738}
739 739
740static int generate_sched_domains(struct cpumask **domains, 740static int generate_sched_domains(cpumask_var_t **domains,
741 struct sched_domain_attr **attributes) 741 struct sched_domain_attr **attributes)
742{ 742{
743 *domains = NULL; 743 *domains = NULL;
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
872 if (retval < 0) 872 if (retval < 0)
873 return retval; 873 return retval;
874 874
875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) 875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
876 return -EINVAL; 876 return -EINVAL;
877 } 877 }
878 retval = validate_change(cs, trialcs); 878 retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2010 } 2010 }
2011 2011
2012 /* Continue past cpusets with all cpus, mems online */ 2012 /* Continue past cpusets with all cpus, mems online */
2013 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && 2013 if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2015 continue; 2015 continue;
2016 2016
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2019 /* Remove offline cpus and mems from this cpuset. */ 2019 /* Remove offline cpus and mems from this cpuset. */
2020 mutex_lock(&callback_mutex); 2020 mutex_lock(&callback_mutex);
2021 cpumask_and(cp->cpus_allowed, cp->cpus_allowed, 2021 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2022 cpu_online_mask); 2022 cpu_active_mask);
2023 nodes_and(cp->mems_allowed, cp->mems_allowed, 2023 nodes_and(cp->mems_allowed, cp->mems_allowed,
2024 node_states[N_HIGH_MEMORY]); 2024 node_states[N_HIGH_MEMORY]);
2025 mutex_unlock(&callback_mutex); 2025 mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2057 switch (phase) { 2057 switch (phase) {
2058 case CPU_ONLINE: 2058 case CPU_ONLINE:
2059 case CPU_ONLINE_FROZEN: 2059 case CPU_ONLINE_FROZEN:
2060 case CPU_DEAD: 2060 case CPU_DOWN_PREPARE:
2061 case CPU_DEAD_FROZEN: 2061 case CPU_DOWN_PREPARE_FROZEN:
2062 case CPU_DOWN_FAILED:
2063 case CPU_DOWN_FAILED_FROZEN:
2062 break; 2064 break;
2063 2065
2064 default: 2066 default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2067 2069
2068 cgroup_lock(); 2070 cgroup_lock();
2069 mutex_lock(&callback_mutex); 2071 mutex_lock(&callback_mutex);
2070 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2072 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2071 mutex_unlock(&callback_mutex); 2073 mutex_unlock(&callback_mutex);
2072 scan_for_empty_cpusets(&top_cpuset); 2074 scan_for_empty_cpusets(&top_cpuset);
2073 ndoms = generate_sched_domains(&doms, &attr); 2075 ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2114 2116
2115void __init cpuset_init_smp(void) 2117void __init cpuset_init_smp(void)
2116{ 2118{
2117 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2119 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2118 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2120 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2119 2121
2120 hotcpu_notifier(cpuset_track_online_cpus, 0); 2122 hotcpu_notifier(cpuset_track_online_cpus, 0);
diff --git a/kernel/exit.c b/kernel/exit.c
index 1143012951e9..546774a31a66 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,10 +68,10 @@ static void __unhash_process(struct task_struct *p)
68 detach_pid(p, PIDTYPE_SID); 68 detach_pid(p, PIDTYPE_SID);
69 69
70 list_del_rcu(&p->tasks); 70 list_del_rcu(&p->tasks);
71 list_del_init(&p->sibling);
71 __get_cpu_var(process_counts)--; 72 __get_cpu_var(process_counts)--;
72 } 73 }
73 list_del_rcu(&p->thread_group); 74 list_del_rcu(&p->thread_group);
74 list_del_init(&p->sibling);
75} 75}
76 76
77/* 77/*
@@ -736,12 +736,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
736/* 736/*
737* Any that need to be release_task'd are put on the @dead list. 737* Any that need to be release_task'd are put on the @dead list.
738 */ 738 */
739static void reparent_thread(struct task_struct *father, struct task_struct *p, 739static void reparent_leader(struct task_struct *father, struct task_struct *p,
740 struct list_head *dead) 740 struct list_head *dead)
741{ 741{
742 if (p->pdeath_signal)
743 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
744
745 list_move_tail(&p->sibling, &p->real_parent->children); 742 list_move_tail(&p->sibling, &p->real_parent->children);
746 743
747 if (task_detached(p)) 744 if (task_detached(p))
@@ -780,12 +777,18 @@ static void forget_original_parent(struct task_struct *father)
780 reaper = find_new_reaper(father); 777 reaper = find_new_reaper(father);
781 778
782 list_for_each_entry_safe(p, n, &father->children, sibling) { 779 list_for_each_entry_safe(p, n, &father->children, sibling) {
783 p->real_parent = reaper; 780 struct task_struct *t = p;
784 if (p->parent == father) { 781 do {
785 BUG_ON(task_ptrace(p)); 782 t->real_parent = reaper;
786 p->parent = p->real_parent; 783 if (t->parent == father) {
787 } 784 BUG_ON(task_ptrace(t));
788 reparent_thread(father, p, &dead_children); 785 t->parent = t->real_parent;
786 }
787 if (t->pdeath_signal)
788 group_send_sig_info(t->pdeath_signal,
789 SEND_SIG_NOINFO, t);
790 } while_each_thread(p, t);
791 reparent_leader(father, p, &dead_children);
789 } 792 }
790 write_unlock_irq(&tasklist_lock); 793 write_unlock_irq(&tasklist_lock);
791 794
@@ -933,7 +936,7 @@ NORET_TYPE void do_exit(long code)
933 * an exiting task cleaning up the robust pi futexes. 936 * an exiting task cleaning up the robust pi futexes.
934 */ 937 */
935 smp_mb(); 938 smp_mb();
936 spin_unlock_wait(&tsk->pi_lock); 939 raw_spin_unlock_wait(&tsk->pi_lock);
937 940
938 if (unlikely(in_atomic())) 941 if (unlikely(in_atomic()))
939 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 942 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -971,7 +974,7 @@ NORET_TYPE void do_exit(long code)
971 exit_thread(); 974 exit_thread();
972 cgroup_exit(tsk, 1); 975 cgroup_exit(tsk, 1);
973 976
974 if (group_dead && tsk->signal->leader) 977 if (group_dead)
975 disassociate_ctty(1); 978 disassociate_ctty(1);
976 979
977 module_put(task_thread_info(tsk)->exec_domain->module); 980 module_put(task_thread_info(tsk)->exec_domain->module);
@@ -1551,14 +1554,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1551 struct task_struct *p; 1554 struct task_struct *p;
1552 1555
1553 list_for_each_entry(p, &tsk->children, sibling) { 1556 list_for_each_entry(p, &tsk->children, sibling) {
1554 /* 1557 int ret = wait_consider_task(wo, 0, p);
1555 * Do not consider detached threads. 1558 if (ret)
1556 */ 1559 return ret;
1557 if (!task_detached(p)) {
1558 int ret = wait_consider_task(wo, 0, p);
1559 if (ret)
1560 return ret;
1561 }
1562 } 1560 }
1563 1561
1564 return 0; 1562 return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1415dc4598ae..5b2959b3ffc2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -939,9 +939,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
939 939
940static void rt_mutex_init_task(struct task_struct *p) 940static void rt_mutex_init_task(struct task_struct *p)
941{ 941{
942 spin_lock_init(&p->pi_lock); 942 raw_spin_lock_init(&p->pi_lock);
943#ifdef CONFIG_RT_MUTEXES 943#ifdef CONFIG_RT_MUTEXES
944 plist_head_init(&p->pi_waiters, &p->pi_lock); 944 plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
945 p->pi_blocked_on = NULL; 945 p->pi_blocked_on = NULL;
946#endif 946#endif
947} 947}
@@ -1127,6 +1127,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1127#ifdef CONFIG_DEBUG_MUTEXES 1127#ifdef CONFIG_DEBUG_MUTEXES
1128 p->blocked_on = NULL; /* not blocked yet */ 1128 p->blocked_on = NULL; /* not blocked yet */
1129#endif 1129#endif
1130#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1131 p->memcg_batch.do_batch = 0;
1132 p->memcg_batch.memcg = NULL;
1133#endif
1130 1134
1131 p->bts = NULL; 1135 p->bts = NULL;
1132 1136
@@ -1206,9 +1210,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1206 p->sas_ss_sp = p->sas_ss_size = 0; 1210 p->sas_ss_sp = p->sas_ss_size = 0;
1207 1211
1208 /* 1212 /*
1209 * Syscall tracing should be turned off in the child regardless 1213 * Syscall tracing and stepping should be turned off in the
1210 * of CLONE_PTRACE. 1214 * child regardless of CLONE_PTRACE.
1211 */ 1215 */
1216 user_disable_single_step(p);
1212 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 1217 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1213#ifdef TIF_SYSCALL_EMU 1218#ifdef TIF_SYSCALL_EMU
1214 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 1219 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1286,7 +1291,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1286 } 1291 }
1287 1292
1288 if (likely(p->pid)) { 1293 if (likely(p->pid)) {
1289 list_add_tail(&p->sibling, &p->real_parent->children);
1290 tracehook_finish_clone(p, clone_flags, trace); 1294 tracehook_finish_clone(p, clone_flags, trace);
1291 1295
1292 if (thread_group_leader(p)) { 1296 if (thread_group_leader(p)) {
@@ -1298,6 +1302,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1298 p->signal->tty = tty_kref_get(current->signal->tty); 1302 p->signal->tty = tty_kref_get(current->signal->tty);
1299 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1303 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1300 attach_pid(p, PIDTYPE_SID, task_session(current)); 1304 attach_pid(p, PIDTYPE_SID, task_session(current));
1305 list_add_tail(&p->sibling, &p->real_parent->children);
1301 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1306 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1302 __get_cpu_var(process_counts)++; 1307 __get_cpu_var(process_counts)++;
1303 } 1308 }
diff --git a/kernel/futex.c b/kernel/futex.c
index fb65e822fc41..8e3c3ffe1b9a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -304,8 +304,14 @@ void put_futex_key(int fshared, union futex_key *key)
304 */ 304 */
305static int fault_in_user_writeable(u32 __user *uaddr) 305static int fault_in_user_writeable(u32 __user *uaddr)
306{ 306{
307 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, 307 struct mm_struct *mm = current->mm;
308 1, 1, 0, NULL, NULL); 308 int ret;
309
310 down_read(&mm->mmap_sem);
311 ret = get_user_pages(current, mm, (unsigned long)uaddr,
312 1, 1, 0, NULL, NULL);
313 up_read(&mm->mmap_sem);
314
309 return ret < 0 ? ret : 0; 315 return ret < 0 ? ret : 0;
310} 316}
311 317
@@ -397,9 +403,9 @@ static void free_pi_state(struct futex_pi_state *pi_state)
397 * and has cleaned up the pi_state already 403 * and has cleaned up the pi_state already
398 */ 404 */
399 if (pi_state->owner) { 405 if (pi_state->owner) {
400 spin_lock_irq(&pi_state->owner->pi_lock); 406 raw_spin_lock_irq(&pi_state->owner->pi_lock);
401 list_del_init(&pi_state->list); 407 list_del_init(&pi_state->list);
402 spin_unlock_irq(&pi_state->owner->pi_lock); 408 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
403 409
404 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); 410 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
405 } 411 }
@@ -464,18 +470,18 @@ void exit_pi_state_list(struct task_struct *curr)
464 * pi_state_list anymore, but we have to be careful 470 * pi_state_list anymore, but we have to be careful
465 * versus waiters unqueueing themselves: 471 * versus waiters unqueueing themselves:
466 */ 472 */
467 spin_lock_irq(&curr->pi_lock); 473 raw_spin_lock_irq(&curr->pi_lock);
468 while (!list_empty(head)) { 474 while (!list_empty(head)) {
469 475
470 next = head->next; 476 next = head->next;
471 pi_state = list_entry(next, struct futex_pi_state, list); 477 pi_state = list_entry(next, struct futex_pi_state, list);
472 key = pi_state->key; 478 key = pi_state->key;
473 hb = hash_futex(&key); 479 hb = hash_futex(&key);
474 spin_unlock_irq(&curr->pi_lock); 480 raw_spin_unlock_irq(&curr->pi_lock);
475 481
476 spin_lock(&hb->lock); 482 spin_lock(&hb->lock);
477 483
478 spin_lock_irq(&curr->pi_lock); 484 raw_spin_lock_irq(&curr->pi_lock);
479 /* 485 /*
480 * We dropped the pi-lock, so re-check whether this 486 * We dropped the pi-lock, so re-check whether this
481 * task still owns the PI-state: 487 * task still owns the PI-state:
@@ -489,15 +495,15 @@ void exit_pi_state_list(struct task_struct *curr)
489 WARN_ON(list_empty(&pi_state->list)); 495 WARN_ON(list_empty(&pi_state->list));
490 list_del_init(&pi_state->list); 496 list_del_init(&pi_state->list);
491 pi_state->owner = NULL; 497 pi_state->owner = NULL;
492 spin_unlock_irq(&curr->pi_lock); 498 raw_spin_unlock_irq(&curr->pi_lock);
493 499
494 rt_mutex_unlock(&pi_state->pi_mutex); 500 rt_mutex_unlock(&pi_state->pi_mutex);
495 501
496 spin_unlock(&hb->lock); 502 spin_unlock(&hb->lock);
497 503
498 spin_lock_irq(&curr->pi_lock); 504 raw_spin_lock_irq(&curr->pi_lock);
499 } 505 }
500 spin_unlock_irq(&curr->pi_lock); 506 raw_spin_unlock_irq(&curr->pi_lock);
501} 507}
502 508
503static int 509static int
@@ -552,7 +558,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
552 * change of the task flags, we do this protected by 558 * change of the task flags, we do this protected by
553 * p->pi_lock: 559 * p->pi_lock:
554 */ 560 */
555 spin_lock_irq(&p->pi_lock); 561 raw_spin_lock_irq(&p->pi_lock);
556 if (unlikely(p->flags & PF_EXITING)) { 562 if (unlikely(p->flags & PF_EXITING)) {
557 /* 563 /*
558 * The task is on the way out. When PF_EXITPIDONE is 564 * The task is on the way out. When PF_EXITPIDONE is
@@ -561,7 +567,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
561 */ 567 */
562 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; 568 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
563 569
564 spin_unlock_irq(&p->pi_lock); 570 raw_spin_unlock_irq(&p->pi_lock);
565 put_task_struct(p); 571 put_task_struct(p);
566 return ret; 572 return ret;
567 } 573 }
@@ -580,7 +586,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
580 WARN_ON(!list_empty(&pi_state->list)); 586 WARN_ON(!list_empty(&pi_state->list));
581 list_add(&pi_state->list, &p->pi_state_list); 587 list_add(&pi_state->list, &p->pi_state_list);
582 pi_state->owner = p; 588 pi_state->owner = p;
583 spin_unlock_irq(&p->pi_lock); 589 raw_spin_unlock_irq(&p->pi_lock);
584 590
585 put_task_struct(p); 591 put_task_struct(p);
586 592
@@ -754,7 +760,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
754 if (!pi_state) 760 if (!pi_state)
755 return -EINVAL; 761 return -EINVAL;
756 762
757 spin_lock(&pi_state->pi_mutex.wait_lock); 763 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
758 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 764 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
759 765
760 /* 766 /*
@@ -783,23 +789,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
783 else if (curval != uval) 789 else if (curval != uval)
784 ret = -EINVAL; 790 ret = -EINVAL;
785 if (ret) { 791 if (ret) {
786 spin_unlock(&pi_state->pi_mutex.wait_lock); 792 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
787 return ret; 793 return ret;
788 } 794 }
789 } 795 }
790 796
791 spin_lock_irq(&pi_state->owner->pi_lock); 797 raw_spin_lock_irq(&pi_state->owner->pi_lock);
792 WARN_ON(list_empty(&pi_state->list)); 798 WARN_ON(list_empty(&pi_state->list));
793 list_del_init(&pi_state->list); 799 list_del_init(&pi_state->list);
794 spin_unlock_irq(&pi_state->owner->pi_lock); 800 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
795 801
796 spin_lock_irq(&new_owner->pi_lock); 802 raw_spin_lock_irq(&new_owner->pi_lock);
797 WARN_ON(!list_empty(&pi_state->list)); 803 WARN_ON(!list_empty(&pi_state->list));
798 list_add(&pi_state->list, &new_owner->pi_state_list); 804 list_add(&pi_state->list, &new_owner->pi_state_list);
799 pi_state->owner = new_owner; 805 pi_state->owner = new_owner;
800 spin_unlock_irq(&new_owner->pi_lock); 806 raw_spin_unlock_irq(&new_owner->pi_lock);
801 807
802 spin_unlock(&pi_state->pi_mutex.wait_lock); 808 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
803 rt_mutex_unlock(&pi_state->pi_mutex); 809 rt_mutex_unlock(&pi_state->pi_mutex);
804 810
805 return 0; 811 return 0;
@@ -1004,7 +1010,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1004 plist_add(&q->list, &hb2->chain); 1010 plist_add(&q->list, &hb2->chain);
1005 q->lock_ptr = &hb2->lock; 1011 q->lock_ptr = &hb2->lock;
1006#ifdef CONFIG_DEBUG_PI_LIST 1012#ifdef CONFIG_DEBUG_PI_LIST
1007 q->list.plist.lock = &hb2->lock; 1013 q->list.plist.spinlock = &hb2->lock;
1008#endif 1014#endif
1009 } 1015 }
1010 get_futex_key_refs(key2); 1016 get_futex_key_refs(key2);
@@ -1040,7 +1046,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1040 1046
1041 q->lock_ptr = &hb->lock; 1047 q->lock_ptr = &hb->lock;
1042#ifdef CONFIG_DEBUG_PI_LIST 1048#ifdef CONFIG_DEBUG_PI_LIST
1043 q->list.plist.lock = &hb->lock; 1049 q->list.plist.spinlock = &hb->lock;
1044#endif 1050#endif
1045 1051
1046 wake_up_state(q->task, TASK_NORMAL); 1052 wake_up_state(q->task, TASK_NORMAL);
@@ -1388,7 +1394,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1388 1394
1389 plist_node_init(&q->list, prio); 1395 plist_node_init(&q->list, prio);
1390#ifdef CONFIG_DEBUG_PI_LIST 1396#ifdef CONFIG_DEBUG_PI_LIST
1391 q->list.plist.lock = &hb->lock; 1397 q->list.plist.spinlock = &hb->lock;
1392#endif 1398#endif
1393 plist_add(&q->list, &hb->chain); 1399 plist_add(&q->list, &hb->chain);
1394 q->task = current; 1400 q->task = current;
@@ -1523,18 +1529,18 @@ retry:
1523 * itself. 1529 * itself.
1524 */ 1530 */
1525 if (pi_state->owner != NULL) { 1531 if (pi_state->owner != NULL) {
1526 spin_lock_irq(&pi_state->owner->pi_lock); 1532 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1527 WARN_ON(list_empty(&pi_state->list)); 1533 WARN_ON(list_empty(&pi_state->list));
1528 list_del_init(&pi_state->list); 1534 list_del_init(&pi_state->list);
1529 spin_unlock_irq(&pi_state->owner->pi_lock); 1535 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1530 } 1536 }
1531 1537
1532 pi_state->owner = newowner; 1538 pi_state->owner = newowner;
1533 1539
1534 spin_lock_irq(&newowner->pi_lock); 1540 raw_spin_lock_irq(&newowner->pi_lock);
1535 WARN_ON(!list_empty(&pi_state->list)); 1541 WARN_ON(!list_empty(&pi_state->list));
1536 list_add(&pi_state->list, &newowner->pi_state_list); 1542 list_add(&pi_state->list, &newowner->pi_state_list);
1537 spin_unlock_irq(&newowner->pi_lock); 1543 raw_spin_unlock_irq(&newowner->pi_lock);
1538 return 0; 1544 return 0;
1539 1545
1540 /* 1546 /*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ede527708123..0086628b6e97 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
127 for (;;) { 127 for (;;) {
128 base = timer->base; 128 base = timer->base;
129 if (likely(base != NULL)) { 129 if (likely(base != NULL)) {
130 spin_lock_irqsave(&base->cpu_base->lock, *flags); 130 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
131 if (likely(base == timer->base)) 131 if (likely(base == timer->base))
132 return base; 132 return base;
133 /* The timer has migrated to another CPU: */ 133 /* The timer has migrated to another CPU: */
134 spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 134 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
135 } 135 }
136 cpu_relax(); 136 cpu_relax();
137 } 137 }
@@ -208,13 +208,13 @@ again:
208 208
209 /* See the comment in lock_timer_base() */ 209 /* See the comment in lock_timer_base() */
210 timer->base = NULL; 210 timer->base = NULL;
211 spin_unlock(&base->cpu_base->lock); 211 raw_spin_unlock(&base->cpu_base->lock);
212 spin_lock(&new_base->cpu_base->lock); 212 raw_spin_lock(&new_base->cpu_base->lock);
213 213
214 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 214 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
215 cpu = this_cpu; 215 cpu = this_cpu;
216 spin_unlock(&new_base->cpu_base->lock); 216 raw_spin_unlock(&new_base->cpu_base->lock);
217 spin_lock(&base->cpu_base->lock); 217 raw_spin_lock(&base->cpu_base->lock);
218 timer->base = base; 218 timer->base = base;
219 goto again; 219 goto again;
220 } 220 }
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
230{ 230{
231 struct hrtimer_clock_base *base = timer->base; 231 struct hrtimer_clock_base *base = timer->base;
232 232
233 spin_lock_irqsave(&base->cpu_base->lock, *flags); 233 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
234 234
235 return base; 235 return base;
236} 236}
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
557static int hrtimer_reprogram(struct hrtimer *timer, 557static int hrtimer_reprogram(struct hrtimer *timer,
558 struct hrtimer_clock_base *base) 558 struct hrtimer_clock_base *base)
559{ 559{
560 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; 560 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
562 int res; 562 int res;
563 563
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
582 if (expires.tv64 < 0) 582 if (expires.tv64 < 0)
583 return -ETIME; 583 return -ETIME;
584 584
585 if (expires.tv64 >= expires_next->tv64) 585 if (expires.tv64 >= cpu_base->expires_next.tv64)
586 return 0;
587
588 /*
589 * If a hang was detected in the last timer interrupt then we
590 * do not schedule a timer which is earlier than the expiry
591 * which we enforced in the hang detection. We want the system
592 * to make progress.
593 */
594 if (cpu_base->hang_detected)
586 return 0; 595 return 0;
587 596
588 /* 597 /*
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
590 */ 599 */
591 res = tick_program_event(expires, 0); 600 res = tick_program_event(expires, 0);
592 if (!IS_ERR_VALUE(res)) 601 if (!IS_ERR_VALUE(res))
593 *expires_next = expires; 602 cpu_base->expires_next = expires;
594 return res; 603 return res;
595} 604}
596 605
@@ -619,12 +628,12 @@ static void retrigger_next_event(void *arg)
619 base = &__get_cpu_var(hrtimer_bases); 628 base = &__get_cpu_var(hrtimer_bases);
620 629
621 /* Adjust CLOCK_REALTIME offset */ 630 /* Adjust CLOCK_REALTIME offset */
622 spin_lock(&base->lock); 631 raw_spin_lock(&base->lock);
623 base->clock_base[CLOCK_REALTIME].offset = 632 base->clock_base[CLOCK_REALTIME].offset =
624 timespec_to_ktime(realtime_offset); 633 timespec_to_ktime(realtime_offset);
625 634
626 hrtimer_force_reprogram(base, 0); 635 hrtimer_force_reprogram(base, 0);
627 spin_unlock(&base->lock); 636 raw_spin_unlock(&base->lock);
628} 637}
629 638
630/* 639/*
@@ -685,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
685{ 694{
686 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { 695 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
687 if (wakeup) { 696 if (wakeup) {
688 spin_unlock(&base->cpu_base->lock); 697 raw_spin_unlock(&base->cpu_base->lock);
689 raise_softirq_irqoff(HRTIMER_SOFTIRQ); 698 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
690 spin_lock(&base->cpu_base->lock); 699 raw_spin_lock(&base->cpu_base->lock);
691 } else 700 } else
692 __raise_softirq_irqoff(HRTIMER_SOFTIRQ); 701 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
693 702
@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
747 756
748#endif /* CONFIG_HIGH_RES_TIMERS */ 757#endif /* CONFIG_HIGH_RES_TIMERS */
749 758
750#ifdef CONFIG_TIMER_STATS 759static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
751void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
752{ 760{
761#ifdef CONFIG_TIMER_STATS
753 if (timer->start_site) 762 if (timer->start_site)
754 return; 763 return;
755 764 timer->start_site = __builtin_return_address(0);
756 timer->start_site = addr;
757 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 765 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
758 timer->start_pid = current->pid; 766 timer->start_pid = current->pid;
767#endif
759} 768}
769
770static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
771{
772#ifdef CONFIG_TIMER_STATS
773 timer->start_site = NULL;
774#endif
775}
776
777static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
778{
779#ifdef CONFIG_TIMER_STATS
780 if (likely(!timer_stats_active))
781 return;
782 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
783 timer->function, timer->start_comm, 0);
760#endif 784#endif
785}
761 786
762/* 787/*
763 * Counterpart to lock_hrtimer_base above: 788 * Counterpart to lock_hrtimer_base above:
@@ -765,7 +790,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
765static inline 790static inline
766void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 791void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
767{ 792{
768 spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 793 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
769} 794}
770 795
771/** 796/**
@@ -1098,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
1098 unsigned long flags; 1123 unsigned long flags;
1099 int i; 1124 int i;
1100 1125
1101 spin_lock_irqsave(&cpu_base->lock, flags); 1126 raw_spin_lock_irqsave(&cpu_base->lock, flags);
1102 1127
1103 if (!hrtimer_hres_active()) { 1128 if (!hrtimer_hres_active()) {
1104 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 1129 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
@@ -1115,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void)
1115 } 1140 }
1116 } 1141 }
1117 1142
1118 spin_unlock_irqrestore(&cpu_base->lock, flags); 1143 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1119 1144
1120 if (mindelta.tv64 < 0) 1145 if (mindelta.tv64 < 0)
1121 mindelta.tv64 = 0; 1146 mindelta.tv64 = 0;
@@ -1197,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1197 * they get migrated to another cpu, therefore its safe to unlock 1222 * they get migrated to another cpu, therefore its safe to unlock
1198 * the timer base. 1223 * the timer base.
1199 */ 1224 */
1200 spin_unlock(&cpu_base->lock); 1225 raw_spin_unlock(&cpu_base->lock);
1201 trace_hrtimer_expire_entry(timer, now); 1226 trace_hrtimer_expire_entry(timer, now);
1202 restart = fn(timer); 1227 restart = fn(timer);
1203 trace_hrtimer_expire_exit(timer); 1228 trace_hrtimer_expire_exit(timer);
1204 spin_lock(&cpu_base->lock); 1229 raw_spin_lock(&cpu_base->lock);
1205 1230
1206 /* 1231 /*
1207 * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1232 * Note: We clear the CALLBACK bit after enqueue_hrtimer and
@@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1217 1242
1218#ifdef CONFIG_HIGH_RES_TIMERS 1243#ifdef CONFIG_HIGH_RES_TIMERS
1219 1244
1220static int force_clock_reprogram;
1221
1222/*
1223 * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1224 * is hanging, which could happen with something that slows the interrupt
1225 * such as the tracing. Then we force the clock reprogramming for each future
1226 * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1227 * threshold that we will overwrite.
1228 * The next tick event will be scheduled to 3 times we currently spend on
1229 * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1230 * 1/4 of their time to process the hrtimer interrupts. This is enough to
1231 * let it running without serious starvation.
1232 */
1233
1234static inline void
1235hrtimer_interrupt_hanging(struct clock_event_device *dev,
1236 ktime_t try_time)
1237{
1238 force_clock_reprogram = 1;
1239 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1240 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1241 "forcing clock min delta to %llu ns\n",
1242 (unsigned long long) dev->min_delta_ns);
1243}
1244/* 1245/*
1245 * High resolution timer interrupt 1246 * High resolution timer interrupt
1246 * Called with interrupts disabled 1247 * Called with interrupts disabled
@@ -1249,24 +1250,18 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1249{ 1250{
1250 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1251 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1251 struct hrtimer_clock_base *base; 1252 struct hrtimer_clock_base *base;
1252 ktime_t expires_next, now; 1253 ktime_t expires_next, now, entry_time, delta;
1253 int nr_retries = 0; 1254 int i, retries = 0;
1254 int i;
1255 1255
1256 BUG_ON(!cpu_base->hres_active); 1256 BUG_ON(!cpu_base->hres_active);
1257 cpu_base->nr_events++; 1257 cpu_base->nr_events++;
1258 dev->next_event.tv64 = KTIME_MAX; 1258 dev->next_event.tv64 = KTIME_MAX;
1259 1259
1260 retry: 1260 entry_time = now = ktime_get();
1261 /* 5 retries is enough to notice a hang */ 1261retry:
1262 if (!(++nr_retries % 5))
1263 hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1264
1265 now = ktime_get();
1266
1267 expires_next.tv64 = KTIME_MAX; 1262 expires_next.tv64 = KTIME_MAX;
1268 1263
1269 spin_lock(&cpu_base->lock); 1264 raw_spin_lock(&cpu_base->lock);
1270 /* 1265 /*
1271 * We set expires_next to KTIME_MAX here with cpu_base->lock 1266 * We set expires_next to KTIME_MAX here with cpu_base->lock
1272 * held to prevent that a timer is enqueued in our queue via 1267 * held to prevent that a timer is enqueued in our queue via
@@ -1322,13 +1317,51 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1322 * against it. 1317 * against it.
1323 */ 1318 */
1324 cpu_base->expires_next = expires_next; 1319 cpu_base->expires_next = expires_next;
1325 spin_unlock(&cpu_base->lock); 1320 raw_spin_unlock(&cpu_base->lock);
1326 1321
1327 /* Reprogramming necessary ? */ 1322 /* Reprogramming necessary ? */
1328 if (expires_next.tv64 != KTIME_MAX) { 1323 if (expires_next.tv64 == KTIME_MAX ||
1329 if (tick_program_event(expires_next, force_clock_reprogram)) 1324 !tick_program_event(expires_next, 0)) {
1330 goto retry; 1325 cpu_base->hang_detected = 0;
1326 return;
1331 } 1327 }
1328
1329 /*
1330 * The next timer was already expired due to:
1331 * - tracing
1332 * - long lasting callbacks
1333 * - being scheduled away when running in a VM
1334 *
1335 * We need to prevent that we loop forever in the hrtimer
1336 * interrupt routine. We give it 3 attempts to avoid
1337 * overreacting on some spurious event.
1338 */
1339 now = ktime_get();
1340 cpu_base->nr_retries++;
1341 if (++retries < 3)
1342 goto retry;
1343 /*
1344 * Give the system a chance to do something else than looping
1345 * here. We stored the entry time, so we know exactly how long
1346 * we spent here. We schedule the next event this amount of
1347 * time away.
1348 */
1349 cpu_base->nr_hangs++;
1350 cpu_base->hang_detected = 1;
1351 delta = ktime_sub(now, entry_time);
1352 if (delta.tv64 > cpu_base->max_hang_time.tv64)
1353 cpu_base->max_hang_time = delta;
1354 /*
1355 * Limit it to a sensible value as we enforce a longer
1356 * delay. Give the CPU at least 100ms to catch up.
1357 */
1358 if (delta.tv64 > 100 * NSEC_PER_MSEC)
1359 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1360 else
1361 expires_next = ktime_add(now, delta);
1362 tick_program_event(expires_next, 1);
1363 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
1364 ktime_to_ns(delta));
1332} 1365}
1333 1366
1334/* 1367/*
@@ -1424,7 +1457,7 @@ void hrtimer_run_queues(void)
1424 gettime = 0; 1457 gettime = 0;
1425 } 1458 }
1426 1459
1427 spin_lock(&cpu_base->lock); 1460 raw_spin_lock(&cpu_base->lock);
1428 1461
1429 while ((node = base->first)) { 1462 while ((node = base->first)) {
1430 struct hrtimer *timer; 1463 struct hrtimer *timer;
@@ -1436,7 +1469,7 @@ void hrtimer_run_queues(void)
1436 1469
1437 __run_hrtimer(timer, &base->softirq_time); 1470 __run_hrtimer(timer, &base->softirq_time);
1438 } 1471 }
1439 spin_unlock(&cpu_base->lock); 1472 raw_spin_unlock(&cpu_base->lock);
1440 } 1473 }
1441} 1474}
1442 1475
@@ -1592,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1592 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 1625 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1593 int i; 1626 int i;
1594 1627
1595 spin_lock_init(&cpu_base->lock); 1628 raw_spin_lock_init(&cpu_base->lock);
1596 1629
1597 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 1630 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1598 cpu_base->clock_base[i].cpu_base = cpu_base; 1631 cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1650,16 +1683,16 @@ static void migrate_hrtimers(int scpu)
1650 * The caller is globally serialized and nobody else 1683 * The caller is globally serialized and nobody else
1651 * takes two locks at once, deadlock is not possible. 1684 * takes two locks at once, deadlock is not possible.
1652 */ 1685 */
1653 spin_lock(&new_base->lock); 1686 raw_spin_lock(&new_base->lock);
1654 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1687 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1655 1688
1656 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1689 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1657 migrate_hrtimer_list(&old_base->clock_base[i], 1690 migrate_hrtimer_list(&old_base->clock_base[i],
1658 &new_base->clock_base[i]); 1691 &new_base->clock_base[i]);
1659 } 1692 }
1660 1693
1661 spin_unlock(&old_base->lock); 1694 raw_spin_unlock(&old_base->lock);
1662 spin_unlock(&new_base->lock); 1695 raw_spin_unlock(&new_base->lock);
1663 1696
1664 /* Check, if we got expired work to do */ 1697 /* Check, if we got expired work to do */
1665 __hrtimer_peek_ahead_timers(); 1698 __hrtimer_peek_ahead_timers();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index cf5ee1628411..dbcbf6a33a08 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -52,7 +52,7 @@
52static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); 52static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
53 53
54/* Number of pinned task breakpoints in a cpu */ 54/* Number of pinned task breakpoints in a cpu */
55static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); 55static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
56 56
57/* Number of non-pinned cpu/task breakpoints in a cpu */ 57/* Number of non-pinned cpu/task breakpoints in a cpu */
58static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); 58static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
73static unsigned int max_task_bp_pinned(int cpu) 73static unsigned int max_task_bp_pinned(int cpu)
74{ 74{
75 int i; 75 int i;
76 unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); 76 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
77 77
78 for (i = HBP_NUM -1; i >= 0; i--) { 78 for (i = HBP_NUM -1; i >= 0; i--) {
79 if (tsk_pinned[i] > 0) 79 if (tsk_pinned[i] > 0)
@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu)
83 return 0; 83 return 0;
84} 84}
85 85
86static int task_bp_pinned(struct task_struct *tsk)
87{
88 struct perf_event_context *ctx = tsk->perf_event_ctxp;
89 struct list_head *list;
90 struct perf_event *bp;
91 unsigned long flags;
92 int count = 0;
93
94 if (WARN_ONCE(!ctx, "No perf context for this task"))
95 return 0;
96
97 list = &ctx->event_list;
98
99 raw_spin_lock_irqsave(&ctx->lock, flags);
100
101 /*
102 * The current breakpoint counter is not included in the list
103 * at the open() callback time
104 */
105 list_for_each_entry(bp, list, event_entry) {
106 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
107 count++;
108 }
109
110 raw_spin_unlock_irqrestore(&ctx->lock, flags);
111
112 return count;
113}
114
86/* 115/*
87 * Report the number of pinned/un-pinned breakpoints we have in 116 * Report the number of pinned/un-pinned breakpoints we have in
88 * a given cpu (cpu > -1) or in all of them (cpu = -1). 117 * a given cpu (cpu > -1) or in all of them (cpu = -1).
89 */ 118 */
90static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) 119static void
120fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
91{ 121{
122 int cpu = bp->cpu;
123 struct task_struct *tsk = bp->ctx->task;
124
92 if (cpu >= 0) { 125 if (cpu >= 0) {
93 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); 126 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
94 slots->pinned += max_task_bp_pinned(cpu); 127 if (!tsk)
128 slots->pinned += max_task_bp_pinned(cpu);
129 else
130 slots->pinned += task_bp_pinned(tsk);
95 slots->flexible = per_cpu(nr_bp_flexible, cpu); 131 slots->flexible = per_cpu(nr_bp_flexible, cpu);
96 132
97 return; 133 return;
@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
101 unsigned int nr; 137 unsigned int nr;
102 138
103 nr = per_cpu(nr_cpu_bp_pinned, cpu); 139 nr = per_cpu(nr_cpu_bp_pinned, cpu);
104 nr += max_task_bp_pinned(cpu); 140 if (!tsk)
141 nr += max_task_bp_pinned(cpu);
142 else
143 nr += task_bp_pinned(tsk);
105 144
106 if (nr > slots->pinned) 145 if (nr > slots->pinned)
107 slots->pinned = nr; 146 slots->pinned = nr;
@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
118 */ 157 */
119static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) 158static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
120{ 159{
121 int count = 0;
122 struct perf_event *bp;
123 struct perf_event_context *ctx = tsk->perf_event_ctxp;
124 unsigned int *tsk_pinned; 160 unsigned int *tsk_pinned;
125 struct list_head *list; 161 int count = 0;
126 unsigned long flags;
127
128 if (WARN_ONCE(!ctx, "No perf context for this task"))
129 return;
130
131 list = &ctx->event_list;
132
133 spin_lock_irqsave(&ctx->lock, flags);
134
135 /*
136 * The current breakpoint counter is not included in the list
137 * at the open() callback time
138 */
139 list_for_each_entry(bp, list, event_entry) {
140 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
141 count++;
142 }
143
144 spin_unlock_irqrestore(&ctx->lock, flags);
145 162
146 if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) 163 count = task_bp_pinned(tsk);
147 return;
148 164
149 tsk_pinned = per_cpu(task_bp_pinned, cpu); 165 tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
150 if (enable) { 166 if (enable) {
151 tsk_pinned[count]++; 167 tsk_pinned[count]++;
152 if (count > 0) 168 if (count > 0)
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
193 * - If attached to a single cpu, check: 209 * - If attached to a single cpu, check:
194 * 210 *
195 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) 211 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
196 * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM 212 * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
197 * 213 *
198 * -> If there are already non-pinned counters in this cpu, it means 214 * -> If there are already non-pinned counters in this cpu, it means
199 * there is already a free slot for them. 215 * there is already a free slot for them.
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
204 * - If attached to every cpus, check: 220 * - If attached to every cpus, check:
205 * 221 *
206 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) 222 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
207 * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM 223 * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
208 * 224 *
209 * -> This is roughly the same, except we check the number of per cpu 225 * -> This is roughly the same, except we check the number of per cpu
210 * bp for every cpu and we keep the max one. Same for the per tasks 226 * bp for every cpu and we keep the max one. Same for the per tasks
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
216 * - If attached to a single cpu, check: 232 * - If attached to a single cpu, check:
217 * 233 *
218 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) 234 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
219 * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM 235 * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
220 * 236 *
221 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep 237 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
222 * one register at least (or they will never be fed). 238 * one register at least (or they will never be fed).
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
224 * - If attached to every cpus, check: 240 * - If attached to every cpus, check:
225 * 241 *
226 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 242 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
227 * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM 243 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
228 */ 244 */
229int reserve_bp_slot(struct perf_event *bp) 245int reserve_bp_slot(struct perf_event *bp)
230{ 246{
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
233 249
234 mutex_lock(&nr_bp_mutex); 250 mutex_lock(&nr_bp_mutex);
235 251
236 fetch_bp_busy_slots(&slots, bp->cpu); 252 fetch_bp_busy_slots(&slots, bp);
237 253
238 /* Flexible counters need to keep at least one slot */ 254 /* Flexible counters need to keep at least one slot */
239 if (slots.pinned + (!!slots.flexible) == HBP_NUM) { 255 if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
259} 275}
260 276
261 277
262int __register_perf_hw_breakpoint(struct perf_event *bp) 278int register_perf_hw_breakpoint(struct perf_event *bp)
263{ 279{
264 int ret; 280 int ret;
265 281
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
276 * This is a quick hack that will be removed soon, once we remove 292 * This is a quick hack that will be removed soon, once we remove
277 * the tmp breakpoints from ptrace 293 * the tmp breakpoints from ptrace
278 */ 294 */
279 if (!bp->attr.disabled || bp->callback == perf_bp_event) 295 if (!bp->attr.disabled || !bp->overflow_handler)
280 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); 296 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
281 297
282 return ret; 298 return ret;
283} 299}
284 300
285int register_perf_hw_breakpoint(struct perf_event *bp)
286{
287 bp->callback = perf_bp_event;
288
289 return __register_perf_hw_breakpoint(bp);
290}
291
292/** 301/**
293 * register_user_hw_breakpoint - register a hardware breakpoint for user space 302 * register_user_hw_breakpoint - register a hardware breakpoint for user space
294 * @attr: breakpoint attributes 303 * @attr: breakpoint attributes
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
297 */ 306 */
298struct perf_event * 307struct perf_event *
299register_user_hw_breakpoint(struct perf_event_attr *attr, 308register_user_hw_breakpoint(struct perf_event_attr *attr,
300 perf_callback_t triggered, 309 perf_overflow_handler_t triggered,
301 struct task_struct *tsk) 310 struct task_struct *tsk)
302{ 311{
303 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 312 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
311 * @triggered: callback to trigger when we hit the breakpoint 320 * @triggered: callback to trigger when we hit the breakpoint
312 * @tsk: pointer to 'task_struct' of the process to which the address belongs 321 * @tsk: pointer to 'task_struct' of the process to which the address belongs
313 */ 322 */
314struct perf_event * 323int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
315modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
316 perf_callback_t triggered,
317 struct task_struct *tsk)
318{ 324{
319 /* 325 u64 old_addr = bp->attr.bp_addr;
320 * FIXME: do it without unregistering 326 int old_type = bp->attr.bp_type;
321 * - We don't want to lose our slot 327 int old_len = bp->attr.bp_len;
322 * - If the new bp is incorrect, don't lose the older one 328 int err = 0;
323 */
324 unregister_hw_breakpoint(bp);
325 329
326 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 330 perf_event_disable(bp);
331
332 bp->attr.bp_addr = attr->bp_addr;
333 bp->attr.bp_type = attr->bp_type;
334 bp->attr.bp_len = attr->bp_len;
335
336 if (attr->disabled)
337 goto end;
338
339 err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
340 if (!err)
341 perf_event_enable(bp);
342
343 if (err) {
344 bp->attr.bp_addr = old_addr;
345 bp->attr.bp_type = old_type;
346 bp->attr.bp_len = old_len;
347 if (!bp->attr.disabled)
348 perf_event_enable(bp);
349
350 return err;
351 }
352
353end:
354 bp->attr.disabled = attr->disabled;
355
356 return 0;
327} 357}
328EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 358EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
329 359
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
348 */ 378 */
349struct perf_event ** 379struct perf_event **
350register_wide_hw_breakpoint(struct perf_event_attr *attr, 380register_wide_hw_breakpoint(struct perf_event_attr *attr,
351 perf_callback_t triggered) 381 perf_overflow_handler_t triggered)
352{ 382{
353 struct perf_event **cpu_events, **pevent, *bp; 383 struct perf_event **cpu_events, **pevent, *bp;
354 long err; 384 long err;
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 1de9700f416e..2295a31ef110 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -45,7 +45,7 @@ unsigned long probe_irq_on(void)
45 * flush such a longstanding irq before considering it as spurious. 45 * flush such a longstanding irq before considering it as spurious.
46 */ 46 */
47 for_each_irq_desc_reverse(i, desc) { 47 for_each_irq_desc_reverse(i, desc) {
48 spin_lock_irq(&desc->lock); 48 raw_spin_lock_irq(&desc->lock);
49 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 49 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
50 /* 50 /*
51 * An old-style architecture might still have 51 * An old-style architecture might still have
@@ -61,7 +61,7 @@ unsigned long probe_irq_on(void)
61 desc->chip->set_type(i, IRQ_TYPE_PROBE); 61 desc->chip->set_type(i, IRQ_TYPE_PROBE);
62 desc->chip->startup(i); 62 desc->chip->startup(i);
63 } 63 }
64 spin_unlock_irq(&desc->lock); 64 raw_spin_unlock_irq(&desc->lock);
65 } 65 }
66 66
67 /* Wait for longstanding interrupts to trigger. */ 67 /* Wait for longstanding interrupts to trigger. */
@@ -73,13 +73,13 @@ unsigned long probe_irq_on(void)
73 * happened in the previous stage, it may have masked itself) 73 * happened in the previous stage, it may have masked itself)
74 */ 74 */
75 for_each_irq_desc_reverse(i, desc) { 75 for_each_irq_desc_reverse(i, desc) {
76 spin_lock_irq(&desc->lock); 76 raw_spin_lock_irq(&desc->lock);
77 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 77 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
78 desc->status |= IRQ_AUTODETECT | IRQ_WAITING; 78 desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
79 if (desc->chip->startup(i)) 79 if (desc->chip->startup(i))
80 desc->status |= IRQ_PENDING; 80 desc->status |= IRQ_PENDING;
81 } 81 }
82 spin_unlock_irq(&desc->lock); 82 raw_spin_unlock_irq(&desc->lock);
83 } 83 }
84 84
85 /* 85 /*
@@ -91,7 +91,7 @@ unsigned long probe_irq_on(void)
91 * Now filter out any obviously spurious interrupts 91 * Now filter out any obviously spurious interrupts
92 */ 92 */
93 for_each_irq_desc(i, desc) { 93 for_each_irq_desc(i, desc) {
94 spin_lock_irq(&desc->lock); 94 raw_spin_lock_irq(&desc->lock);
95 status = desc->status; 95 status = desc->status;
96 96
97 if (status & IRQ_AUTODETECT) { 97 if (status & IRQ_AUTODETECT) {
@@ -103,7 +103,7 @@ unsigned long probe_irq_on(void)
103 if (i < 32) 103 if (i < 32)
104 mask |= 1 << i; 104 mask |= 1 << i;
105 } 105 }
106 spin_unlock_irq(&desc->lock); 106 raw_spin_unlock_irq(&desc->lock);
107 } 107 }
108 108
109 return mask; 109 return mask;
@@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val)
129 int i; 129 int i;
130 130
131 for_each_irq_desc(i, desc) { 131 for_each_irq_desc(i, desc) {
132 spin_lock_irq(&desc->lock); 132 raw_spin_lock_irq(&desc->lock);
133 status = desc->status; 133 status = desc->status;
134 134
135 if (status & IRQ_AUTODETECT) { 135 if (status & IRQ_AUTODETECT) {
@@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val)
139 desc->status = status & ~IRQ_AUTODETECT; 139 desc->status = status & ~IRQ_AUTODETECT;
140 desc->chip->shutdown(i); 140 desc->chip->shutdown(i);
141 } 141 }
142 spin_unlock_irq(&desc->lock); 142 raw_spin_unlock_irq(&desc->lock);
143 } 143 }
144 mutex_unlock(&probing_active); 144 mutex_unlock(&probing_active);
145 145
@@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val)
171 unsigned int status; 171 unsigned int status;
172 172
173 for_each_irq_desc(i, desc) { 173 for_each_irq_desc(i, desc) {
174 spin_lock_irq(&desc->lock); 174 raw_spin_lock_irq(&desc->lock);
175 status = desc->status; 175 status = desc->status;
176 176
177 if (status & IRQ_AUTODETECT) { 177 if (status & IRQ_AUTODETECT) {
@@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val)
183 desc->status = status & ~IRQ_AUTODETECT; 183 desc->status = status & ~IRQ_AUTODETECT;
184 desc->chip->shutdown(i); 184 desc->chip->shutdown(i);
185 } 185 }
186 spin_unlock_irq(&desc->lock); 186 raw_spin_unlock_irq(&desc->lock);
187 } 187 }
188 mutex_unlock(&probing_active); 188 mutex_unlock(&probing_active);
189 189
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ba566c261adc..ecc3fa28f666 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq)
34 } 34 }
35 35
36 /* Ensure we don't have left over values from a previous use of this irq */ 36 /* Ensure we don't have left over values from a previous use of this irq */
37 spin_lock_irqsave(&desc->lock, flags); 37 raw_spin_lock_irqsave(&desc->lock, flags);
38 desc->status = IRQ_DISABLED; 38 desc->status = IRQ_DISABLED;
39 desc->chip = &no_irq_chip; 39 desc->chip = &no_irq_chip;
40 desc->handle_irq = handle_bad_irq; 40 desc->handle_irq = handle_bad_irq;
@@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq)
51 cpumask_clear(desc->pending_mask); 51 cpumask_clear(desc->pending_mask);
52#endif 52#endif
53#endif 53#endif
54 spin_unlock_irqrestore(&desc->lock, flags); 54 raw_spin_unlock_irqrestore(&desc->lock, flags);
55} 55}
56 56
57/** 57/**
@@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq)
68 return; 68 return;
69 } 69 }
70 70
71 spin_lock_irqsave(&desc->lock, flags); 71 raw_spin_lock_irqsave(&desc->lock, flags);
72 if (desc->action) { 72 if (desc->action) {
73 spin_unlock_irqrestore(&desc->lock, flags); 73 raw_spin_unlock_irqrestore(&desc->lock, flags);
74 WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", 74 WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
75 irq); 75 irq);
76 return; 76 return;
@@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq)
82 desc->chip = &no_irq_chip; 82 desc->chip = &no_irq_chip;
83 desc->name = NULL; 83 desc->name = NULL;
84 clear_kstat_irqs(desc); 84 clear_kstat_irqs(desc);
85 spin_unlock_irqrestore(&desc->lock, flags); 85 raw_spin_unlock_irqrestore(&desc->lock, flags);
86} 86}
87 87
88 88
@@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
104 if (!chip) 104 if (!chip)
105 chip = &no_irq_chip; 105 chip = &no_irq_chip;
106 106
107 spin_lock_irqsave(&desc->lock, flags); 107 raw_spin_lock_irqsave(&desc->lock, flags);
108 irq_chip_set_defaults(chip); 108 irq_chip_set_defaults(chip);
109 desc->chip = chip; 109 desc->chip = chip;
110 spin_unlock_irqrestore(&desc->lock, flags); 110 raw_spin_unlock_irqrestore(&desc->lock, flags);
111 111
112 return 0; 112 return 0;
113} 113}
@@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type)
133 if (type == IRQ_TYPE_NONE) 133 if (type == IRQ_TYPE_NONE)
134 return 0; 134 return 0;
135 135
136 spin_lock_irqsave(&desc->lock, flags); 136 raw_spin_lock_irqsave(&desc->lock, flags);
137 ret = __irq_set_trigger(desc, irq, type); 137 ret = __irq_set_trigger(desc, irq, type);
138 spin_unlock_irqrestore(&desc->lock, flags); 138 raw_spin_unlock_irqrestore(&desc->lock, flags);
139 return ret; 139 return ret;
140} 140}
141EXPORT_SYMBOL(set_irq_type); 141EXPORT_SYMBOL(set_irq_type);
@@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data)
158 return -EINVAL; 158 return -EINVAL;
159 } 159 }
160 160
161 spin_lock_irqsave(&desc->lock, flags); 161 raw_spin_lock_irqsave(&desc->lock, flags);
162 desc->handler_data = data; 162 desc->handler_data = data;
163 spin_unlock_irqrestore(&desc->lock, flags); 163 raw_spin_unlock_irqrestore(&desc->lock, flags);
164 return 0; 164 return 0;
165} 165}
166EXPORT_SYMBOL(set_irq_data); 166EXPORT_SYMBOL(set_irq_data);
@@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
183 return -EINVAL; 183 return -EINVAL;
184 } 184 }
185 185
186 spin_lock_irqsave(&desc->lock, flags); 186 raw_spin_lock_irqsave(&desc->lock, flags);
187 desc->msi_desc = entry; 187 desc->msi_desc = entry;
188 if (entry) 188 if (entry)
189 entry->irq = irq; 189 entry->irq = irq;
190 spin_unlock_irqrestore(&desc->lock, flags); 190 raw_spin_unlock_irqrestore(&desc->lock, flags);
191 return 0; 191 return 0;
192} 192}
193 193
@@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data)
214 return -EINVAL; 214 return -EINVAL;
215 } 215 }
216 216
217 spin_lock_irqsave(&desc->lock, flags); 217 raw_spin_lock_irqsave(&desc->lock, flags);
218 desc->chip_data = data; 218 desc->chip_data = data;
219 spin_unlock_irqrestore(&desc->lock, flags); 219 raw_spin_unlock_irqrestore(&desc->lock, flags);
220 220
221 return 0; 221 return 0;
222} 222}
@@ -241,12 +241,12 @@ void set_irq_nested_thread(unsigned int irq, int nest)
241 if (!desc) 241 if (!desc)
242 return; 242 return;
243 243
244 spin_lock_irqsave(&desc->lock, flags); 244 raw_spin_lock_irqsave(&desc->lock, flags);
245 if (nest) 245 if (nest)
246 desc->status |= IRQ_NESTED_THREAD; 246 desc->status |= IRQ_NESTED_THREAD;
247 else 247 else
248 desc->status &= ~IRQ_NESTED_THREAD; 248 desc->status &= ~IRQ_NESTED_THREAD;
249 spin_unlock_irqrestore(&desc->lock, flags); 249 raw_spin_unlock_irqrestore(&desc->lock, flags);
250} 250}
251EXPORT_SYMBOL_GPL(set_irq_nested_thread); 251EXPORT_SYMBOL_GPL(set_irq_nested_thread);
252 252
@@ -343,7 +343,7 @@ void handle_nested_irq(unsigned int irq)
343 343
344 might_sleep(); 344 might_sleep();
345 345
346 spin_lock_irq(&desc->lock); 346 raw_spin_lock_irq(&desc->lock);
347 347
348 kstat_incr_irqs_this_cpu(irq, desc); 348 kstat_incr_irqs_this_cpu(irq, desc);
349 349
@@ -352,17 +352,17 @@ void handle_nested_irq(unsigned int irq)
352 goto out_unlock; 352 goto out_unlock;
353 353
354 desc->status |= IRQ_INPROGRESS; 354 desc->status |= IRQ_INPROGRESS;
355 spin_unlock_irq(&desc->lock); 355 raw_spin_unlock_irq(&desc->lock);
356 356
357 action_ret = action->thread_fn(action->irq, action->dev_id); 357 action_ret = action->thread_fn(action->irq, action->dev_id);
358 if (!noirqdebug) 358 if (!noirqdebug)
359 note_interrupt(irq, desc, action_ret); 359 note_interrupt(irq, desc, action_ret);
360 360
361 spin_lock_irq(&desc->lock); 361 raw_spin_lock_irq(&desc->lock);
362 desc->status &= ~IRQ_INPROGRESS; 362 desc->status &= ~IRQ_INPROGRESS;
363 363
364out_unlock: 364out_unlock:
365 spin_unlock_irq(&desc->lock); 365 raw_spin_unlock_irq(&desc->lock);
366} 366}
367EXPORT_SYMBOL_GPL(handle_nested_irq); 367EXPORT_SYMBOL_GPL(handle_nested_irq);
368 368
@@ -384,7 +384,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
384 struct irqaction *action; 384 struct irqaction *action;
385 irqreturn_t action_ret; 385 irqreturn_t action_ret;
386 386
387 spin_lock(&desc->lock); 387 raw_spin_lock(&desc->lock);
388 388
389 if (unlikely(desc->status & IRQ_INPROGRESS)) 389 if (unlikely(desc->status & IRQ_INPROGRESS))
390 goto out_unlock; 390 goto out_unlock;
@@ -396,16 +396,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
396 goto out_unlock; 396 goto out_unlock;
397 397
398 desc->status |= IRQ_INPROGRESS; 398 desc->status |= IRQ_INPROGRESS;
399 spin_unlock(&desc->lock); 399 raw_spin_unlock(&desc->lock);
400 400
401 action_ret = handle_IRQ_event(irq, action); 401 action_ret = handle_IRQ_event(irq, action);
402 if (!noirqdebug) 402 if (!noirqdebug)
403 note_interrupt(irq, desc, action_ret); 403 note_interrupt(irq, desc, action_ret);
404 404
405 spin_lock(&desc->lock); 405 raw_spin_lock(&desc->lock);
406 desc->status &= ~IRQ_INPROGRESS; 406 desc->status &= ~IRQ_INPROGRESS;
407out_unlock: 407out_unlock:
408 spin_unlock(&desc->lock); 408 raw_spin_unlock(&desc->lock);
409} 409}
410 410
411/** 411/**
@@ -424,7 +424,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
424 struct irqaction *action; 424 struct irqaction *action;
425 irqreturn_t action_ret; 425 irqreturn_t action_ret;
426 426
427 spin_lock(&desc->lock); 427 raw_spin_lock(&desc->lock);
428 mask_ack_irq(desc, irq); 428 mask_ack_irq(desc, irq);
429 429
430 if (unlikely(desc->status & IRQ_INPROGRESS)) 430 if (unlikely(desc->status & IRQ_INPROGRESS))
@@ -441,13 +441,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
441 goto out_unlock; 441 goto out_unlock;
442 442
443 desc->status |= IRQ_INPROGRESS; 443 desc->status |= IRQ_INPROGRESS;
444 spin_unlock(&desc->lock); 444 raw_spin_unlock(&desc->lock);
445 445
446 action_ret = handle_IRQ_event(irq, action); 446 action_ret = handle_IRQ_event(irq, action);
447 if (!noirqdebug) 447 if (!noirqdebug)
448 note_interrupt(irq, desc, action_ret); 448 note_interrupt(irq, desc, action_ret);
449 449
450 spin_lock(&desc->lock); 450 raw_spin_lock(&desc->lock);
451 desc->status &= ~IRQ_INPROGRESS; 451 desc->status &= ~IRQ_INPROGRESS;
452 452
453 if (unlikely(desc->status & IRQ_ONESHOT)) 453 if (unlikely(desc->status & IRQ_ONESHOT))
@@ -455,7 +455,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
455 else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) 455 else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
456 desc->chip->unmask(irq); 456 desc->chip->unmask(irq);
457out_unlock: 457out_unlock:
458 spin_unlock(&desc->lock); 458 raw_spin_unlock(&desc->lock);
459} 459}
460EXPORT_SYMBOL_GPL(handle_level_irq); 460EXPORT_SYMBOL_GPL(handle_level_irq);
461 461
@@ -475,7 +475,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
475 struct irqaction *action; 475 struct irqaction *action;
476 irqreturn_t action_ret; 476 irqreturn_t action_ret;
477 477
478 spin_lock(&desc->lock); 478 raw_spin_lock(&desc->lock);
479 479
480 if (unlikely(desc->status & IRQ_INPROGRESS)) 480 if (unlikely(desc->status & IRQ_INPROGRESS))
481 goto out; 481 goto out;
@@ -497,18 +497,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
497 497
498 desc->status |= IRQ_INPROGRESS; 498 desc->status |= IRQ_INPROGRESS;
499 desc->status &= ~IRQ_PENDING; 499 desc->status &= ~IRQ_PENDING;
500 spin_unlock(&desc->lock); 500 raw_spin_unlock(&desc->lock);
501 501
502 action_ret = handle_IRQ_event(irq, action); 502 action_ret = handle_IRQ_event(irq, action);
503 if (!noirqdebug) 503 if (!noirqdebug)
504 note_interrupt(irq, desc, action_ret); 504 note_interrupt(irq, desc, action_ret);
505 505
506 spin_lock(&desc->lock); 506 raw_spin_lock(&desc->lock);
507 desc->status &= ~IRQ_INPROGRESS; 507 desc->status &= ~IRQ_INPROGRESS;
508out: 508out:
509 desc->chip->eoi(irq); 509 desc->chip->eoi(irq);
510 510
511 spin_unlock(&desc->lock); 511 raw_spin_unlock(&desc->lock);
512} 512}
513 513
514/** 514/**
@@ -530,7 +530,7 @@ out:
530void 530void
531handle_edge_irq(unsigned int irq, struct irq_desc *desc) 531handle_edge_irq(unsigned int irq, struct irq_desc *desc)
532{ 532{
533 spin_lock(&desc->lock); 533 raw_spin_lock(&desc->lock);
534 534
535 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 535 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
536 536
@@ -576,17 +576,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
576 } 576 }
577 577
578 desc->status &= ~IRQ_PENDING; 578 desc->status &= ~IRQ_PENDING;
579 spin_unlock(&desc->lock); 579 raw_spin_unlock(&desc->lock);
580 action_ret = handle_IRQ_event(irq, action); 580 action_ret = handle_IRQ_event(irq, action);
581 if (!noirqdebug) 581 if (!noirqdebug)
582 note_interrupt(irq, desc, action_ret); 582 note_interrupt(irq, desc, action_ret);
583 spin_lock(&desc->lock); 583 raw_spin_lock(&desc->lock);
584 584
585 } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); 585 } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
586 586
587 desc->status &= ~IRQ_INPROGRESS; 587 desc->status &= ~IRQ_INPROGRESS;
588out_unlock: 588out_unlock:
589 spin_unlock(&desc->lock); 589 raw_spin_unlock(&desc->lock);
590} 590}
591 591
592/** 592/**
@@ -643,7 +643,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
643 } 643 }
644 644
645 chip_bus_lock(irq, desc); 645 chip_bus_lock(irq, desc);
646 spin_lock_irqsave(&desc->lock, flags); 646 raw_spin_lock_irqsave(&desc->lock, flags);
647 647
648 /* Uninstall? */ 648 /* Uninstall? */
649 if (handle == handle_bad_irq) { 649 if (handle == handle_bad_irq) {
@@ -661,7 +661,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
661 desc->depth = 0; 661 desc->depth = 0;
662 desc->chip->startup(irq); 662 desc->chip->startup(irq);
663 } 663 }
664 spin_unlock_irqrestore(&desc->lock, flags); 664 raw_spin_unlock_irqrestore(&desc->lock, flags);
665 chip_bus_sync_unlock(irq, desc); 665 chip_bus_sync_unlock(irq, desc);
666} 666}
667EXPORT_SYMBOL_GPL(__set_irq_handler); 667EXPORT_SYMBOL_GPL(__set_irq_handler);
@@ -692,9 +692,9 @@ void __init set_irq_noprobe(unsigned int irq)
692 return; 692 return;
693 } 693 }
694 694
695 spin_lock_irqsave(&desc->lock, flags); 695 raw_spin_lock_irqsave(&desc->lock, flags);
696 desc->status |= IRQ_NOPROBE; 696 desc->status |= IRQ_NOPROBE;
697 spin_unlock_irqrestore(&desc->lock, flags); 697 raw_spin_unlock_irqrestore(&desc->lock, flags);
698} 698}
699 699
700void __init set_irq_probe(unsigned int irq) 700void __init set_irq_probe(unsigned int irq)
@@ -707,7 +707,7 @@ void __init set_irq_probe(unsigned int irq)
707 return; 707 return;
708 } 708 }
709 709
710 spin_lock_irqsave(&desc->lock, flags); 710 raw_spin_lock_irqsave(&desc->lock, flags);
711 desc->status &= ~IRQ_NOPROBE; 711 desc->status &= ~IRQ_NOPROBE;
712 spin_unlock_irqrestore(&desc->lock, flags); 712 raw_spin_unlock_irqrestore(&desc->lock, flags);
713} 713}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 17c71bb565c6..814940e7f485 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -80,7 +80,7 @@ static struct irq_desc irq_desc_init = {
80 .chip = &no_irq_chip, 80 .chip = &no_irq_chip,
81 .handle_irq = handle_bad_irq, 81 .handle_irq = handle_bad_irq,
82 .depth = 1, 82 .depth = 1,
83 .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), 83 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
84}; 84};
85 85
86void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) 86void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
@@ -108,7 +108,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
108{ 108{
109 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); 109 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
110 110
111 spin_lock_init(&desc->lock); 111 raw_spin_lock_init(&desc->lock);
112 desc->irq = irq; 112 desc->irq = irq;
113#ifdef CONFIG_SMP 113#ifdef CONFIG_SMP
114 desc->node = node; 114 desc->node = node;
@@ -130,7 +130,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
130/* 130/*
131 * Protect the sparse_irqs: 131 * Protect the sparse_irqs:
132 */ 132 */
133DEFINE_SPINLOCK(sparse_irq_lock); 133DEFINE_RAW_SPINLOCK(sparse_irq_lock);
134 134
135struct irq_desc **irq_desc_ptrs __read_mostly; 135struct irq_desc **irq_desc_ptrs __read_mostly;
136 136
@@ -141,7 +141,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
141 .chip = &no_irq_chip, 141 .chip = &no_irq_chip,
142 .handle_irq = handle_bad_irq, 142 .handle_irq = handle_bad_irq,
143 .depth = 1, 143 .depth = 1,
144 .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), 144 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
145 } 145 }
146}; 146};
147 147
@@ -212,7 +212,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
212 if (desc) 212 if (desc)
213 return desc; 213 return desc;
214 214
215 spin_lock_irqsave(&sparse_irq_lock, flags); 215 raw_spin_lock_irqsave(&sparse_irq_lock, flags);
216 216
217 /* We have to check it to avoid races with another CPU */ 217 /* We have to check it to avoid races with another CPU */
218 desc = irq_desc_ptrs[irq]; 218 desc = irq_desc_ptrs[irq];
@@ -234,7 +234,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
234 irq_desc_ptrs[irq] = desc; 234 irq_desc_ptrs[irq] = desc;
235 235
236out_unlock: 236out_unlock:
237 spin_unlock_irqrestore(&sparse_irq_lock, flags); 237 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
238 238
239 return desc; 239 return desc;
240} 240}
@@ -247,7 +247,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
247 .chip = &no_irq_chip, 247 .chip = &no_irq_chip,
248 .handle_irq = handle_bad_irq, 248 .handle_irq = handle_bad_irq,
249 .depth = 1, 249 .depth = 1,
250 .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), 250 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
251 } 251 }
252}; 252};
253 253
@@ -473,7 +473,7 @@ unsigned int __do_IRQ(unsigned int irq)
473 return 1; 473 return 1;
474 } 474 }
475 475
476 spin_lock(&desc->lock); 476 raw_spin_lock(&desc->lock);
477 if (desc->chip->ack) 477 if (desc->chip->ack)
478 desc->chip->ack(irq); 478 desc->chip->ack(irq);
479 /* 479 /*
@@ -517,13 +517,13 @@ unsigned int __do_IRQ(unsigned int irq)
517 for (;;) { 517 for (;;) {
518 irqreturn_t action_ret; 518 irqreturn_t action_ret;
519 519
520 spin_unlock(&desc->lock); 520 raw_spin_unlock(&desc->lock);
521 521
522 action_ret = handle_IRQ_event(irq, action); 522 action_ret = handle_IRQ_event(irq, action);
523 if (!noirqdebug) 523 if (!noirqdebug)
524 note_interrupt(irq, desc, action_ret); 524 note_interrupt(irq, desc, action_ret);
525 525
526 spin_lock(&desc->lock); 526 raw_spin_lock(&desc->lock);
527 if (likely(!(desc->status & IRQ_PENDING))) 527 if (likely(!(desc->status & IRQ_PENDING)))
528 break; 528 break;
529 desc->status &= ~IRQ_PENDING; 529 desc->status &= ~IRQ_PENDING;
@@ -536,7 +536,7 @@ out:
536 * disabled while the handler was running. 536 * disabled while the handler was running.
537 */ 537 */
538 desc->chip->end(irq); 538 desc->chip->end(irq);
539 spin_unlock(&desc->lock); 539 raw_spin_unlock(&desc->lock);
540 540
541 return 1; 541 return 1;
542} 542}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 1b5d742c6a77..b2821f070a3d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
18extern struct lock_class_key irq_desc_lock_class; 18extern struct lock_class_key irq_desc_lock_class;
19extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); 19extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
20extern void clear_kstat_irqs(struct irq_desc *desc); 20extern void clear_kstat_irqs(struct irq_desc *desc);
21extern spinlock_t sparse_irq_lock; 21extern raw_spinlock_t sparse_irq_lock;
22 22
23#ifdef CONFIG_SPARSE_IRQ 23#ifdef CONFIG_SPARSE_IRQ
24/* irq_desc_ptrs allocated at boot time */ 24/* irq_desc_ptrs allocated at boot time */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7305b297d1eb..eb6078ca60c7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq)
46 cpu_relax(); 46 cpu_relax();
47 47
48 /* Ok, that indicated we're done: double-check carefully. */ 48 /* Ok, that indicated we're done: double-check carefully. */
49 spin_lock_irqsave(&desc->lock, flags); 49 raw_spin_lock_irqsave(&desc->lock, flags);
50 status = desc->status; 50 status = desc->status;
51 spin_unlock_irqrestore(&desc->lock, flags); 51 raw_spin_unlock_irqrestore(&desc->lock, flags);
52 52
53 /* Oops, that failed? */ 53 /* Oops, that failed? */
54 } while (status & IRQ_INPROGRESS); 54 } while (status & IRQ_INPROGRESS);
@@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
114 if (!desc->chip->set_affinity) 114 if (!desc->chip->set_affinity)
115 return -EINVAL; 115 return -EINVAL;
116 116
117 spin_lock_irqsave(&desc->lock, flags); 117 raw_spin_lock_irqsave(&desc->lock, flags);
118 118
119#ifdef CONFIG_GENERIC_PENDING_IRQ 119#ifdef CONFIG_GENERIC_PENDING_IRQ
120 if (desc->status & IRQ_MOVE_PCNTXT) { 120 if (desc->status & IRQ_MOVE_PCNTXT) {
@@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
134 } 134 }
135#endif 135#endif
136 desc->status |= IRQ_AFFINITY_SET; 136 desc->status |= IRQ_AFFINITY_SET;
137 spin_unlock_irqrestore(&desc->lock, flags); 137 raw_spin_unlock_irqrestore(&desc->lock, flags);
138 return 0; 138 return 0;
139} 139}
140 140
@@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq)
181 unsigned long flags; 181 unsigned long flags;
182 int ret; 182 int ret;
183 183
184 spin_lock_irqsave(&desc->lock, flags); 184 raw_spin_lock_irqsave(&desc->lock, flags);
185 ret = setup_affinity(irq, desc); 185 ret = setup_affinity(irq, desc);
186 if (!ret) 186 if (!ret)
187 irq_set_thread_affinity(desc); 187 irq_set_thread_affinity(desc);
188 spin_unlock_irqrestore(&desc->lock, flags); 188 raw_spin_unlock_irqrestore(&desc->lock, flags);
189 189
190 return ret; 190 return ret;
191} 191}
@@ -231,9 +231,9 @@ void disable_irq_nosync(unsigned int irq)
231 return; 231 return;
232 232
233 chip_bus_lock(irq, desc); 233 chip_bus_lock(irq, desc);
234 spin_lock_irqsave(&desc->lock, flags); 234 raw_spin_lock_irqsave(&desc->lock, flags);
235 __disable_irq(desc, irq, false); 235 __disable_irq(desc, irq, false);
236 spin_unlock_irqrestore(&desc->lock, flags); 236 raw_spin_unlock_irqrestore(&desc->lock, flags);
237 chip_bus_sync_unlock(irq, desc); 237 chip_bus_sync_unlock(irq, desc);
238} 238}
239EXPORT_SYMBOL(disable_irq_nosync); 239EXPORT_SYMBOL(disable_irq_nosync);
@@ -308,9 +308,9 @@ void enable_irq(unsigned int irq)
308 return; 308 return;
309 309
310 chip_bus_lock(irq, desc); 310 chip_bus_lock(irq, desc);
311 spin_lock_irqsave(&desc->lock, flags); 311 raw_spin_lock_irqsave(&desc->lock, flags);
312 __enable_irq(desc, irq, false); 312 __enable_irq(desc, irq, false);
313 spin_unlock_irqrestore(&desc->lock, flags); 313 raw_spin_unlock_irqrestore(&desc->lock, flags);
314 chip_bus_sync_unlock(irq, desc); 314 chip_bus_sync_unlock(irq, desc);
315} 315}
316EXPORT_SYMBOL(enable_irq); 316EXPORT_SYMBOL(enable_irq);
@@ -347,7 +347,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
347 /* wakeup-capable irqs can be shared between drivers that 347 /* wakeup-capable irqs can be shared between drivers that
348 * don't need to have the same sleep mode behaviors. 348 * don't need to have the same sleep mode behaviors.
349 */ 349 */
350 spin_lock_irqsave(&desc->lock, flags); 350 raw_spin_lock_irqsave(&desc->lock, flags);
351 if (on) { 351 if (on) {
352 if (desc->wake_depth++ == 0) { 352 if (desc->wake_depth++ == 0) {
353 ret = set_irq_wake_real(irq, on); 353 ret = set_irq_wake_real(irq, on);
@@ -368,7 +368,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
368 } 368 }
369 } 369 }
370 370
371 spin_unlock_irqrestore(&desc->lock, flags); 371 raw_spin_unlock_irqrestore(&desc->lock, flags);
372 return ret; 372 return ret;
373} 373}
374EXPORT_SYMBOL(set_irq_wake); 374EXPORT_SYMBOL(set_irq_wake);
@@ -484,12 +484,12 @@ static int irq_wait_for_interrupt(struct irqaction *action)
484static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) 484static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
485{ 485{
486 chip_bus_lock(irq, desc); 486 chip_bus_lock(irq, desc);
487 spin_lock_irq(&desc->lock); 487 raw_spin_lock_irq(&desc->lock);
488 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { 488 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
489 desc->status &= ~IRQ_MASKED; 489 desc->status &= ~IRQ_MASKED;
490 desc->chip->unmask(irq); 490 desc->chip->unmask(irq);
491 } 491 }
492 spin_unlock_irq(&desc->lock); 492 raw_spin_unlock_irq(&desc->lock);
493 chip_bus_sync_unlock(irq, desc); 493 chip_bus_sync_unlock(irq, desc);
494} 494}
495 495
@@ -514,9 +514,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
514 return; 514 return;
515 } 515 }
516 516
517 spin_lock_irq(&desc->lock); 517 raw_spin_lock_irq(&desc->lock);
518 cpumask_copy(mask, desc->affinity); 518 cpumask_copy(mask, desc->affinity);
519 spin_unlock_irq(&desc->lock); 519 raw_spin_unlock_irq(&desc->lock);
520 520
521 set_cpus_allowed_ptr(current, mask); 521 set_cpus_allowed_ptr(current, mask);
522 free_cpumask_var(mask); 522 free_cpumask_var(mask);
@@ -545,7 +545,7 @@ static int irq_thread(void *data)
545 545
546 atomic_inc(&desc->threads_active); 546 atomic_inc(&desc->threads_active);
547 547
548 spin_lock_irq(&desc->lock); 548 raw_spin_lock_irq(&desc->lock);
549 if (unlikely(desc->status & IRQ_DISABLED)) { 549 if (unlikely(desc->status & IRQ_DISABLED)) {
550 /* 550 /*
551 * CHECKME: We might need a dedicated 551 * CHECKME: We might need a dedicated
@@ -555,9 +555,9 @@ static int irq_thread(void *data)
555 * retriggers the interrupt itself --- tglx 555 * retriggers the interrupt itself --- tglx
556 */ 556 */
557 desc->status |= IRQ_PENDING; 557 desc->status |= IRQ_PENDING;
558 spin_unlock_irq(&desc->lock); 558 raw_spin_unlock_irq(&desc->lock);
559 } else { 559 } else {
560 spin_unlock_irq(&desc->lock); 560 raw_spin_unlock_irq(&desc->lock);
561 561
562 action->thread_fn(action->irq, action->dev_id); 562 action->thread_fn(action->irq, action->dev_id);
563 563
@@ -679,7 +679,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
679 /* 679 /*
680 * The following block of code has to be executed atomically 680 * The following block of code has to be executed atomically
681 */ 681 */
682 spin_lock_irqsave(&desc->lock, flags); 682 raw_spin_lock_irqsave(&desc->lock, flags);
683 old_ptr = &desc->action; 683 old_ptr = &desc->action;
684 old = *old_ptr; 684 old = *old_ptr;
685 if (old) { 685 if (old) {
@@ -775,7 +775,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
775 __enable_irq(desc, irq, false); 775 __enable_irq(desc, irq, false);
776 } 776 }
777 777
778 spin_unlock_irqrestore(&desc->lock, flags); 778 raw_spin_unlock_irqrestore(&desc->lock, flags);
779 779
780 /* 780 /*
781 * Strictly no need to wake it up, but hung_task complains 781 * Strictly no need to wake it up, but hung_task complains
@@ -802,7 +802,7 @@ mismatch:
802 ret = -EBUSY; 802 ret = -EBUSY;
803 803
804out_thread: 804out_thread:
805 spin_unlock_irqrestore(&desc->lock, flags); 805 raw_spin_unlock_irqrestore(&desc->lock, flags);
806 if (new->thread) { 806 if (new->thread) {
807 struct task_struct *t = new->thread; 807 struct task_struct *t = new->thread;
808 808
@@ -844,7 +844,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
844 if (!desc) 844 if (!desc)
845 return NULL; 845 return NULL;
846 846
847 spin_lock_irqsave(&desc->lock, flags); 847 raw_spin_lock_irqsave(&desc->lock, flags);
848 848
849 /* 849 /*
850 * There can be multiple actions per IRQ descriptor, find the right 850 * There can be multiple actions per IRQ descriptor, find the right
@@ -856,7 +856,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
856 856
857 if (!action) { 857 if (!action) {
858 WARN(1, "Trying to free already-free IRQ %d\n", irq); 858 WARN(1, "Trying to free already-free IRQ %d\n", irq);
859 spin_unlock_irqrestore(&desc->lock, flags); 859 raw_spin_unlock_irqrestore(&desc->lock, flags);
860 860
861 return NULL; 861 return NULL;
862 } 862 }
@@ -884,7 +884,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
884 desc->chip->disable(irq); 884 desc->chip->disable(irq);
885 } 885 }
886 886
887 spin_unlock_irqrestore(&desc->lock, flags); 887 raw_spin_unlock_irqrestore(&desc->lock, flags);
888 888
889 unregister_handler_proc(irq, action); 889 unregister_handler_proc(irq, action);
890 890
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index fcb6c96f2627..241962280836 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -27,7 +27,7 @@ void move_masked_irq(int irq)
27 if (!desc->chip->set_affinity) 27 if (!desc->chip->set_affinity)
28 return; 28 return;
29 29
30 assert_spin_locked(&desc->lock); 30 assert_raw_spin_locked(&desc->lock);
31 31
32 /* 32 /*
33 * If there was a valid mask to work with, please 33 * If there was a valid mask to work with, please
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 3fd30197da2e..26bac9d8f860 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
42 "for migration.\n", irq); 42 "for migration.\n", irq);
43 return false; 43 return false;
44 } 44 }
45 spin_lock_init(&desc->lock); 45 raw_spin_lock_init(&desc->lock);
46 desc->node = node; 46 desc->node = node;
47 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 47 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
48 init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); 48 init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
@@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
67 67
68 irq = old_desc->irq; 68 irq = old_desc->irq;
69 69
70 spin_lock_irqsave(&sparse_irq_lock, flags); 70 raw_spin_lock_irqsave(&sparse_irq_lock, flags);
71 71
72 /* We have to check it to avoid races with another CPU */ 72 /* We have to check it to avoid races with another CPU */
73 desc = irq_desc_ptrs[irq]; 73 desc = irq_desc_ptrs[irq];
@@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
91 } 91 }
92 92
93 irq_desc_ptrs[irq] = desc; 93 irq_desc_ptrs[irq] = desc;
94 spin_unlock_irqrestore(&sparse_irq_lock, flags); 94 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
95 95
96 /* free the old one */ 96 /* free the old one */
97 free_one_irq_desc(old_desc, desc); 97 free_one_irq_desc(old_desc, desc);
@@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
100 return desc; 100 return desc;
101 101
102out_unlock: 102out_unlock:
103 spin_unlock_irqrestore(&sparse_irq_lock, flags); 103 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
104 104
105 return desc; 105 return desc;
106} 106}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index a0bb09e79867..0d4005d85b03 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -28,9 +28,9 @@ void suspend_device_irqs(void)
28 for_each_irq_desc(irq, desc) { 28 for_each_irq_desc(irq, desc) {
29 unsigned long flags; 29 unsigned long flags;
30 30
31 spin_lock_irqsave(&desc->lock, flags); 31 raw_spin_lock_irqsave(&desc->lock, flags);
32 __disable_irq(desc, irq, true); 32 __disable_irq(desc, irq, true);
33 spin_unlock_irqrestore(&desc->lock, flags); 33 raw_spin_unlock_irqrestore(&desc->lock, flags);
34 } 34 }
35 35
36 for_each_irq_desc(irq, desc) 36 for_each_irq_desc(irq, desc)
@@ -56,9 +56,9 @@ void resume_device_irqs(void)
56 if (!(desc->status & IRQ_SUSPENDED)) 56 if (!(desc->status & IRQ_SUSPENDED))
57 continue; 57 continue;
58 58
59 spin_lock_irqsave(&desc->lock, flags); 59 raw_spin_lock_irqsave(&desc->lock, flags);
60 __enable_irq(desc, irq, true); 60 __enable_irq(desc, irq, true);
61 spin_unlock_irqrestore(&desc->lock, flags); 61 raw_spin_unlock_irqrestore(&desc->lock, flags);
62 } 62 }
63} 63}
64EXPORT_SYMBOL_GPL(resume_device_irqs); 64EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145fea97..6f50eccc79c0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -179,7 +179,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
179 unsigned long flags; 179 unsigned long flags;
180 int ret = 1; 180 int ret = 1;
181 181
182 spin_lock_irqsave(&desc->lock, flags); 182 raw_spin_lock_irqsave(&desc->lock, flags);
183 for (action = desc->action ; action; action = action->next) { 183 for (action = desc->action ; action; action = action->next) {
184 if ((action != new_action) && action->name && 184 if ((action != new_action) && action->name &&
185 !strcmp(new_action->name, action->name)) { 185 !strcmp(new_action->name, action->name)) {
@@ -187,7 +187,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
187 break; 187 break;
188 } 188 }
189 } 189 }
190 spin_unlock_irqrestore(&desc->lock, flags); 190 raw_spin_unlock_irqrestore(&desc->lock, flags);
191 return ret; 191 return ret;
192} 192}
193 193
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e49ea1c5232d..89fb90ae534f 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
28 struct irqaction *action; 28 struct irqaction *action;
29 int ok = 0, work = 0; 29 int ok = 0, work = 0;
30 30
31 spin_lock(&desc->lock); 31 raw_spin_lock(&desc->lock);
32 /* Already running on another processor */ 32 /* Already running on another processor */
33 if (desc->status & IRQ_INPROGRESS) { 33 if (desc->status & IRQ_INPROGRESS) {
34 /* 34 /*
@@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc)
37 */ 37 */
38 if (desc->action && (desc->action->flags & IRQF_SHARED)) 38 if (desc->action && (desc->action->flags & IRQF_SHARED))
39 desc->status |= IRQ_PENDING; 39 desc->status |= IRQ_PENDING;
40 spin_unlock(&desc->lock); 40 raw_spin_unlock(&desc->lock);
41 return ok; 41 return ok;
42 } 42 }
43 /* Honour the normal IRQ locking */ 43 /* Honour the normal IRQ locking */
44 desc->status |= IRQ_INPROGRESS; 44 desc->status |= IRQ_INPROGRESS;
45 action = desc->action; 45 action = desc->action;
46 spin_unlock(&desc->lock); 46 raw_spin_unlock(&desc->lock);
47 47
48 while (action) { 48 while (action) {
49 /* Only shared IRQ handlers are safe to call */ 49 /* Only shared IRQ handlers are safe to call */
@@ -56,7 +56,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
56 } 56 }
57 local_irq_disable(); 57 local_irq_disable();
58 /* Now clean up the flags */ 58 /* Now clean up the flags */
59 spin_lock(&desc->lock); 59 raw_spin_lock(&desc->lock);
60 action = desc->action; 60 action = desc->action;
61 61
62 /* 62 /*
@@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc)
68 * Perform real IRQ processing for the IRQ we deferred 68 * Perform real IRQ processing for the IRQ we deferred
69 */ 69 */
70 work = 1; 70 work = 1;
71 spin_unlock(&desc->lock); 71 raw_spin_unlock(&desc->lock);
72 handle_IRQ_event(irq, action); 72 handle_IRQ_event(irq, action);
73 spin_lock(&desc->lock); 73 raw_spin_lock(&desc->lock);
74 desc->status &= ~IRQ_PENDING; 74 desc->status &= ~IRQ_PENDING;
75 } 75 }
76 desc->status &= ~IRQ_INPROGRESS; 76 desc->status &= ~IRQ_INPROGRESS;
@@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
80 */ 80 */
81 if (work && desc->chip && desc->chip->end) 81 if (work && desc->chip && desc->chip->end)
82 desc->chip->end(irq); 82 desc->chip->end(irq);
83 spin_unlock(&desc->lock); 83 raw_spin_unlock(&desc->lock);
84 84
85 return ok; 85 return ok;
86} 86}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..a9a93d9ee7a7 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
21#include <linux/hardirq.h> 21#include <linux/hardirq.h>
22#include <linux/elf.h> 22#include <linux/elf.h>
23#include <linux/elfcore.h> 23#include <linux/elfcore.h>
24#include <linux/utsrelease.h> 24#include <generated/utsrelease.h>
25#include <linux/utsname.h> 25#include <linux/utsname.h>
26#include <linux/numa.h> 26#include <linux/numa.h>
27#include <linux/suspend.h> 27#include <linux/suspend.h>
@@ -31,6 +31,7 @@
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/console.h> 32#include <linux/console.h>
33#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
34#include <linux/swap.h>
34 35
35#include <asm/page.h> 36#include <asm/page.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
@@ -1082,6 +1083,64 @@ void crash_kexec(struct pt_regs *regs)
1082 } 1083 }
1083} 1084}
1084 1085
1086size_t crash_get_memory_size(void)
1087{
1088 size_t size;
1089 mutex_lock(&kexec_mutex);
1090 size = crashk_res.end - crashk_res.start + 1;
1091 mutex_unlock(&kexec_mutex);
1092 return size;
1093}
1094
1095static void free_reserved_phys_range(unsigned long begin, unsigned long end)
1096{
1097 unsigned long addr;
1098
1099 for (addr = begin; addr < end; addr += PAGE_SIZE) {
1100 ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
1101 init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
1102 free_page((unsigned long)__va(addr));
1103 totalram_pages++;
1104 }
1105}
1106
1107int crash_shrink_memory(unsigned long new_size)
1108{
1109 int ret = 0;
1110 unsigned long start, end;
1111
1112 mutex_lock(&kexec_mutex);
1113
1114 if (kexec_crash_image) {
1115 ret = -ENOENT;
1116 goto unlock;
1117 }
1118 start = crashk_res.start;
1119 end = crashk_res.end;
1120
1121 if (new_size >= end - start + 1) {
1122 ret = -EINVAL;
1123 if (new_size == end - start + 1)
1124 ret = 0;
1125 goto unlock;
1126 }
1127
1128 start = roundup(start, PAGE_SIZE);
1129 end = roundup(start + new_size, PAGE_SIZE);
1130
1131 free_reserved_phys_range(end, crashk_res.end);
1132
1133 if (start == end) {
1134 crashk_res.end = end;
1135 release_resource(&crashk_res);
1136 } else
1137 crashk_res.end = end - 1;
1138
1139unlock:
1140 mutex_unlock(&kexec_mutex);
1141 return ret;
1142}
1143
1085static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 1144static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1086 size_t data_len) 1145 size_t data_len)
1087{ 1146{
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 7d7014634022..2eb517e23514 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -129,6 +129,7 @@ struct task_struct *kgdb_usethread;
129struct task_struct *kgdb_contthread; 129struct task_struct *kgdb_contthread;
130 130
131int kgdb_single_step; 131int kgdb_single_step;
132pid_t kgdb_sstep_pid;
132 133
133/* Our I/O buffers. */ 134/* Our I/O buffers. */
134static char remcom_in_buffer[BUFMAX]; 135static char remcom_in_buffer[BUFMAX];
@@ -541,12 +542,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
541 */ 542 */
542 if (tid == 0 || tid == -1) 543 if (tid == 0 || tid == -1)
543 tid = -atomic_read(&kgdb_active) - 2; 544 tid = -atomic_read(&kgdb_active) - 2;
544 if (tid < 0) { 545 if (tid < -1 && tid > -NR_CPUS - 2) {
545 if (kgdb_info[-tid - 2].task) 546 if (kgdb_info[-tid - 2].task)
546 return kgdb_info[-tid - 2].task; 547 return kgdb_info[-tid - 2].task;
547 else 548 else
548 return idle_task(-tid - 2); 549 return idle_task(-tid - 2);
549 } 550 }
551 if (tid <= 0) {
552 printk(KERN_ERR "KGDB: Internal thread select error\n");
553 dump_stack();
554 return NULL;
555 }
550 556
551 /* 557 /*
552 * find_task_by_pid_ns() does not take the tasklist lock anymore 558 * find_task_by_pid_ns() does not take the tasklist lock anymore
@@ -619,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
619static int kgdb_activate_sw_breakpoints(void) 625static int kgdb_activate_sw_breakpoints(void)
620{ 626{
621 unsigned long addr; 627 unsigned long addr;
622 int error = 0; 628 int error;
629 int ret = 0;
623 int i; 630 int i;
624 631
625 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { 632 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -629,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void)
629 addr = kgdb_break[i].bpt_addr; 636 addr = kgdb_break[i].bpt_addr;
630 error = kgdb_arch_set_breakpoint(addr, 637 error = kgdb_arch_set_breakpoint(addr,
631 kgdb_break[i].saved_instr); 638 kgdb_break[i].saved_instr);
632 if (error) 639 if (error) {
633 return error; 640 ret = error;
641 printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
642 continue;
643 }
634 644
635 kgdb_flush_swbreak_addr(addr); 645 kgdb_flush_swbreak_addr(addr);
636 kgdb_break[i].state = BP_ACTIVE; 646 kgdb_break[i].state = BP_ACTIVE;
637 } 647 }
638 return 0; 648 return ret;
639} 649}
640 650
641static int kgdb_set_sw_break(unsigned long addr) 651static int kgdb_set_sw_break(unsigned long addr)
@@ -682,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr)
682static int kgdb_deactivate_sw_breakpoints(void) 692static int kgdb_deactivate_sw_breakpoints(void)
683{ 693{
684 unsigned long addr; 694 unsigned long addr;
685 int error = 0; 695 int error;
696 int ret = 0;
686 int i; 697 int i;
687 698
688 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { 699 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -691,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void)
691 addr = kgdb_break[i].bpt_addr; 702 addr = kgdb_break[i].bpt_addr;
692 error = kgdb_arch_remove_breakpoint(addr, 703 error = kgdb_arch_remove_breakpoint(addr,
693 kgdb_break[i].saved_instr); 704 kgdb_break[i].saved_instr);
694 if (error) 705 if (error) {
695 return error; 706 printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
707 ret = error;
708 }
696 709
697 kgdb_flush_swbreak_addr(addr); 710 kgdb_flush_swbreak_addr(addr);
698 kgdb_break[i].state = BP_SET; 711 kgdb_break[i].state = BP_SET;
699 } 712 }
700 return 0; 713 return ret;
701} 714}
702 715
703static int kgdb_remove_sw_break(unsigned long addr) 716static int kgdb_remove_sw_break(unsigned long addr)
@@ -1204,8 +1217,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks)
1204 return 1; 1217 return 1;
1205 1218
1206 } else { 1219 } else {
1207 error_packet(remcom_out_buffer, -EINVAL); 1220 kgdb_msg_write("KGDB only knows signal 9 (pass)"
1208 return 0; 1221 " and 15 (pass and disconnect)\n"
1222 "Executing a continue without signal passing\n", 0);
1223 remcom_in_buffer[0] = 'c';
1209 } 1224 }
1210 1225
1211 /* Indicate fall through */ 1226 /* Indicate fall through */
@@ -1395,6 +1410,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
1395 struct kgdb_state kgdb_var; 1410 struct kgdb_state kgdb_var;
1396 struct kgdb_state *ks = &kgdb_var; 1411 struct kgdb_state *ks = &kgdb_var;
1397 unsigned long flags; 1412 unsigned long flags;
1413 int sstep_tries = 100;
1398 int error = 0; 1414 int error = 0;
1399 int i, cpu; 1415 int i, cpu;
1400 1416
@@ -1425,13 +1441,14 @@ acquirelock:
1425 cpu_relax(); 1441 cpu_relax();
1426 1442
1427 /* 1443 /*
1428 * Do not start the debugger connection on this CPU if the last 1444 * For single stepping, try to only enter on the processor
1429 * instance of the exception handler wanted to come into the 1445 * that was single stepping. To gaurd against a deadlock, the
1430 * debugger on a different CPU via a single step 1446 * kernel will only try for the value of sstep_tries before
1447 * giving up and continuing on.
1431 */ 1448 */
1432 if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && 1449 if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
1433 atomic_read(&kgdb_cpu_doing_single_step) != cpu) { 1450 (kgdb_info[cpu].task &&
1434 1451 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
1435 atomic_set(&kgdb_active, -1); 1452 atomic_set(&kgdb_active, -1);
1436 touch_softlockup_watchdog(); 1453 touch_softlockup_watchdog();
1437 clocksource_touch_watchdog(); 1454 clocksource_touch_watchdog();
@@ -1524,6 +1541,13 @@ acquirelock:
1524 } 1541 }
1525 1542
1526kgdb_restore: 1543kgdb_restore:
1544 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
1545 int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
1546 if (kgdb_info[sstep_cpu].task)
1547 kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
1548 else
1549 kgdb_sstep_pid = 0;
1550 }
1527 /* Free kgdb_active */ 1551 /* Free kgdb_active */
1528 atomic_set(&kgdb_active, -1); 1552 atomic_set(&kgdb_active, -1);
1529 touch_softlockup_watchdog(); 1553 touch_softlockup_watchdog();
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 528dd78e7e7e..3feaf5a74514 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -100,6 +100,26 @@ static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
100} 100}
101KERNEL_ATTR_RO(kexec_crash_loaded); 101KERNEL_ATTR_RO(kexec_crash_loaded);
102 102
103static ssize_t kexec_crash_size_show(struct kobject *kobj,
104 struct kobj_attribute *attr, char *buf)
105{
106 return sprintf(buf, "%zu\n", crash_get_memory_size());
107}
108static ssize_t kexec_crash_size_store(struct kobject *kobj,
109 struct kobj_attribute *attr,
110 const char *buf, size_t count)
111{
112 unsigned long cnt;
113 int ret;
114
115 if (strict_strtoul(buf, 0, &cnt))
116 return -EINVAL;
117
118 ret = crash_shrink_memory(cnt);
119 return ret < 0 ? ret : count;
120}
121KERNEL_ATTR_RW(kexec_crash_size);
122
103static ssize_t vmcoreinfo_show(struct kobject *kobj, 123static ssize_t vmcoreinfo_show(struct kobject *kobj,
104 struct kobj_attribute *attr, char *buf) 124 struct kobj_attribute *attr, char *buf)
105{ 125{
@@ -147,6 +167,7 @@ static struct attribute * kernel_attrs[] = {
147#ifdef CONFIG_KEXEC 167#ifdef CONFIG_KEXEC
148 &kexec_loaded_attr.attr, 168 &kexec_loaded_attr.attr,
149 &kexec_crash_loaded_attr.attr, 169 &kexec_crash_loaded_attr.attr,
170 &kexec_crash_size_attr.attr,
150 &vmcoreinfo_attr.attr, 171 &vmcoreinfo_attr.attr,
151#endif 172#endif
152 NULL 173 NULL
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ab7ae57773e1..fbb6222fe7e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
150EXPORT_SYMBOL(kthread_create); 150EXPORT_SYMBOL(kthread_create);
151 151
152/** 152/**
153 * kthread_bind - bind a just-created kthread to a cpu.
154 * @p: thread created by kthread_create().
155 * @cpu: cpu (might not be online, must be possible) for @k to run on.
156 *
157 * Description: This function is equivalent to set_cpus_allowed(),
158 * except that @cpu doesn't need to be online, and the thread must be
159 * stopped (i.e., just returned from kthread_create()).
160 */
161void kthread_bind(struct task_struct *p, unsigned int cpu)
162{
163 /* Must have done schedule() in kthread() before we set_task_cpu */
164 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
165 WARN_ON(1);
166 return;
167 }
168
169 p->cpus_allowed = cpumask_of_cpu(cpu);
170 p->rt.nr_cpus_allowed = 1;
171 p->flags |= PF_THREAD_BOUND;
172}
173EXPORT_SYMBOL(kthread_bind);
174
175/**
153 * kthread_stop - stop a thread created by kthread_create(). 176 * kthread_stop - stop a thread created by kthread_create().
154 * @k: thread created by kthread_create(). 177 * @k: thread created by kthread_create().
155 * 178 *
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f5dcd36d3151..5feaddcdbe49 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -73,11 +73,11 @@ module_param(lock_stat, int, 0644);
73 * to use a raw spinlock - we really dont want the spinlock 73 * to use a raw spinlock - we really dont want the spinlock
74 * code to recurse back into the lockdep code... 74 * code to recurse back into the lockdep code...
75 */ 75 */
76static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 76static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
77 77
78static int graph_lock(void) 78static int graph_lock(void)
79{ 79{
80 __raw_spin_lock(&lockdep_lock); 80 arch_spin_lock(&lockdep_lock);
81 /* 81 /*
82 * Make sure that if another CPU detected a bug while 82 * Make sure that if another CPU detected a bug while
83 * walking the graph we dont change it (while the other 83 * walking the graph we dont change it (while the other
@@ -85,7 +85,7 @@ static int graph_lock(void)
85 * dropped already) 85 * dropped already)
86 */ 86 */
87 if (!debug_locks) { 87 if (!debug_locks) {
88 __raw_spin_unlock(&lockdep_lock); 88 arch_spin_unlock(&lockdep_lock);
89 return 0; 89 return 0;
90 } 90 }
91 /* prevent any recursions within lockdep from causing deadlocks */ 91 /* prevent any recursions within lockdep from causing deadlocks */
@@ -95,11 +95,11 @@ static int graph_lock(void)
95 95
96static inline int graph_unlock(void) 96static inline int graph_unlock(void)
97{ 97{
98 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) 98 if (debug_locks && !arch_spin_is_locked(&lockdep_lock))
99 return DEBUG_LOCKS_WARN_ON(1); 99 return DEBUG_LOCKS_WARN_ON(1);
100 100
101 current->lockdep_recursion--; 101 current->lockdep_recursion--;
102 __raw_spin_unlock(&lockdep_lock); 102 arch_spin_unlock(&lockdep_lock);
103 return 0; 103 return 0;
104} 104}
105 105
@@ -111,7 +111,7 @@ static inline int debug_locks_off_graph_unlock(void)
111{ 111{
112 int ret = debug_locks_off(); 112 int ret = debug_locks_off();
113 113
114 __raw_spin_unlock(&lockdep_lock); 114 arch_spin_unlock(&lockdep_lock);
115 115
116 return ret; 116 return ret;
117} 117}
@@ -140,7 +140,8 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
140} 140}
141 141
142#ifdef CONFIG_LOCK_STAT 142#ifdef CONFIG_LOCK_STAT
143static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 143static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
144 cpu_lock_stats);
144 145
145static inline u64 lockstat_clock(void) 146static inline u64 lockstat_clock(void)
146{ 147{
@@ -168,7 +169,7 @@ static void lock_time_inc(struct lock_time *lt, u64 time)
168 if (time > lt->max) 169 if (time > lt->max)
169 lt->max = time; 170 lt->max = time;
170 171
171 if (time < lt->min || !lt->min) 172 if (time < lt->min || !lt->nr)
172 lt->min = time; 173 lt->min = time;
173 174
174 lt->total += time; 175 lt->total += time;
@@ -177,8 +178,15 @@ static void lock_time_inc(struct lock_time *lt, u64 time)
177 178
178static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) 179static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
179{ 180{
180 dst->min += src->min; 181 if (!src->nr)
181 dst->max += src->max; 182 return;
183
184 if (src->max > dst->max)
185 dst->max = src->max;
186
187 if (src->min < dst->min || !dst->nr)
188 dst->min = src->min;
189
182 dst->total += src->total; 190 dst->total += src->total;
183 dst->nr += src->nr; 191 dst->nr += src->nr;
184} 192}
@@ -191,7 +199,7 @@ struct lock_class_stats lock_stats(struct lock_class *class)
191 memset(&stats, 0, sizeof(struct lock_class_stats)); 199 memset(&stats, 0, sizeof(struct lock_class_stats));
192 for_each_possible_cpu(cpu) { 200 for_each_possible_cpu(cpu) {
193 struct lock_class_stats *pcs = 201 struct lock_class_stats *pcs =
194 &per_cpu(lock_stats, cpu)[class - lock_classes]; 202 &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
195 203
196 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) 204 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
197 stats.contention_point[i] += pcs->contention_point[i]; 205 stats.contention_point[i] += pcs->contention_point[i];
@@ -218,7 +226,7 @@ void clear_lock_stats(struct lock_class *class)
218 226
219 for_each_possible_cpu(cpu) { 227 for_each_possible_cpu(cpu) {
220 struct lock_class_stats *cpu_stats = 228 struct lock_class_stats *cpu_stats =
221 &per_cpu(lock_stats, cpu)[class - lock_classes]; 229 &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
222 230
223 memset(cpu_stats, 0, sizeof(struct lock_class_stats)); 231 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
224 } 232 }
@@ -228,12 +236,12 @@ void clear_lock_stats(struct lock_class *class)
228 236
229static struct lock_class_stats *get_lock_stats(struct lock_class *class) 237static struct lock_class_stats *get_lock_stats(struct lock_class *class)
230{ 238{
231 return &get_cpu_var(lock_stats)[class - lock_classes]; 239 return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
232} 240}
233 241
234static void put_lock_stats(struct lock_class_stats *stats) 242static void put_lock_stats(struct lock_class_stats *stats)
235{ 243{
236 put_cpu_var(lock_stats); 244 put_cpu_var(cpu_lock_stats);
237} 245}
238 246
239static void lock_release_holdtime(struct held_lock *hlock) 247static void lock_release_holdtime(struct held_lock *hlock)
@@ -379,7 +387,8 @@ static int save_trace(struct stack_trace *trace)
379 * complete trace that maxes out the entries provided will be reported 387 * complete trace that maxes out the entries provided will be reported
380 * as incomplete, friggin useless </rant> 388 * as incomplete, friggin useless </rant>
381 */ 389 */
382 if (trace->entries[trace->nr_entries-1] == ULONG_MAX) 390 if (trace->nr_entries != 0 &&
391 trace->entries[trace->nr_entries-1] == ULONG_MAX)
383 trace->nr_entries--; 392 trace->nr_entries--;
384 393
385 trace->max_entries = trace->nr_entries; 394 trace->max_entries = trace->nr_entries;
@@ -1161,9 +1170,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
1161 this.class = class; 1170 this.class = class;
1162 1171
1163 local_irq_save(flags); 1172 local_irq_save(flags);
1164 __raw_spin_lock(&lockdep_lock); 1173 arch_spin_lock(&lockdep_lock);
1165 ret = __lockdep_count_forward_deps(&this); 1174 ret = __lockdep_count_forward_deps(&this);
1166 __raw_spin_unlock(&lockdep_lock); 1175 arch_spin_unlock(&lockdep_lock);
1167 local_irq_restore(flags); 1176 local_irq_restore(flags);
1168 1177
1169 return ret; 1178 return ret;
@@ -1188,9 +1197,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
1188 this.class = class; 1197 this.class = class;
1189 1198
1190 local_irq_save(flags); 1199 local_irq_save(flags);
1191 __raw_spin_lock(&lockdep_lock); 1200 arch_spin_lock(&lockdep_lock);
1192 ret = __lockdep_count_backward_deps(&this); 1201 ret = __lockdep_count_backward_deps(&this);
1193 __raw_spin_unlock(&lockdep_lock); 1202 arch_spin_unlock(&lockdep_lock);
1194 local_irq_restore(flags); 1203 local_irq_restore(flags);
1195 1204
1196 return ret; 1205 return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 5842a71cf052..e96b8ed1cb6a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -370,8 +370,6 @@ EXPORT_SYMBOL_GPL(find_module);
370 370
371#ifdef CONFIG_SMP 371#ifdef CONFIG_SMP
372 372
373#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
374
375static void *percpu_modalloc(unsigned long size, unsigned long align, 373static void *percpu_modalloc(unsigned long size, unsigned long align,
376 const char *name) 374 const char *name)
377{ 375{
@@ -395,154 +393,6 @@ static void percpu_modfree(void *freeme)
395 free_percpu(freeme); 393 free_percpu(freeme);
396} 394}
397 395
398#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
399
400/* Number of blocks used and allocated. */
401static unsigned int pcpu_num_used, pcpu_num_allocated;
402/* Size of each block. -ve means used. */
403static int *pcpu_size;
404
405static int split_block(unsigned int i, unsigned short size)
406{
407 /* Reallocation required? */
408 if (pcpu_num_used + 1 > pcpu_num_allocated) {
409 int *new;
410
411 new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
412 GFP_KERNEL);
413 if (!new)
414 return 0;
415
416 pcpu_num_allocated *= 2;
417 pcpu_size = new;
418 }
419
420 /* Insert a new subblock */
421 memmove(&pcpu_size[i+1], &pcpu_size[i],
422 sizeof(pcpu_size[0]) * (pcpu_num_used - i));
423 pcpu_num_used++;
424
425 pcpu_size[i+1] -= size;
426 pcpu_size[i] = size;
427 return 1;
428}
429
430static inline unsigned int block_size(int val)
431{
432 if (val < 0)
433 return -val;
434 return val;
435}
436
437static void *percpu_modalloc(unsigned long size, unsigned long align,
438 const char *name)
439{
440 unsigned long extra;
441 unsigned int i;
442 void *ptr;
443 int cpu;
444
445 if (align > PAGE_SIZE) {
446 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
447 name, align, PAGE_SIZE);
448 align = PAGE_SIZE;
449 }
450
451 ptr = __per_cpu_start;
452 for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
453 /* Extra for alignment requirement. */
454 extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
455 BUG_ON(i == 0 && extra != 0);
456
457 if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
458 continue;
459
460 /* Transfer extra to previous block. */
461 if (pcpu_size[i-1] < 0)
462 pcpu_size[i-1] -= extra;
463 else
464 pcpu_size[i-1] += extra;
465 pcpu_size[i] -= extra;
466 ptr += extra;
467
468 /* Split block if warranted */
469 if (pcpu_size[i] - size > sizeof(unsigned long))
470 if (!split_block(i, size))
471 return NULL;
472
473 /* add the per-cpu scanning areas */
474 for_each_possible_cpu(cpu)
475 kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
476 GFP_KERNEL);
477
478 /* Mark allocated */
479 pcpu_size[i] = -pcpu_size[i];
480 return ptr;
481 }
482
483 printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
484 size);
485 return NULL;
486}
487
488static void percpu_modfree(void *freeme)
489{
490 unsigned int i;
491 void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
492 int cpu;
493
494 /* First entry is core kernel percpu data. */
495 for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
496 if (ptr == freeme) {
497 pcpu_size[i] = -pcpu_size[i];
498 goto free;
499 }
500 }
501 BUG();
502
503 free:
504 /* remove the per-cpu scanning areas */
505 for_each_possible_cpu(cpu)
506 kmemleak_free(freeme + per_cpu_offset(cpu));
507
508 /* Merge with previous? */
509 if (pcpu_size[i-1] >= 0) {
510 pcpu_size[i-1] += pcpu_size[i];
511 pcpu_num_used--;
512 memmove(&pcpu_size[i], &pcpu_size[i+1],
513 (pcpu_num_used - i) * sizeof(pcpu_size[0]));
514 i--;
515 }
516 /* Merge with next? */
517 if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
518 pcpu_size[i] += pcpu_size[i+1];
519 pcpu_num_used--;
520 memmove(&pcpu_size[i+1], &pcpu_size[i+2],
521 (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
522 }
523}
524
525static int percpu_modinit(void)
526{
527 pcpu_num_used = 2;
528 pcpu_num_allocated = 2;
529 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
530 GFP_KERNEL);
531 /* Static in-kernel percpu data (used). */
532 pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
533 /* Free room. */
534 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
535 if (pcpu_size[1] < 0) {
536 printk(KERN_ERR "No per-cpu room for modules.\n");
537 pcpu_num_used = 1;
538 }
539
540 return 0;
541}
542__initcall(percpu_modinit);
543
544#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
545
546static unsigned int find_pcpusec(Elf_Ehdr *hdr, 396static unsigned int find_pcpusec(Elf_Ehdr *hdr,
547 Elf_Shdr *sechdrs, 397 Elf_Shdr *sechdrs,
548 const char *secstrings) 398 const char *secstrings)
@@ -1030,11 +880,23 @@ static int try_to_force_load(struct module *mod, const char *reason)
1030} 880}
1031 881
1032#ifdef CONFIG_MODVERSIONS 882#ifdef CONFIG_MODVERSIONS
883/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */
884static unsigned long maybe_relocated(unsigned long crc,
885 const struct module *crc_owner)
886{
887#ifdef ARCH_RELOCATES_KCRCTAB
888 if (crc_owner == NULL)
889 return crc - (unsigned long)reloc_start;
890#endif
891 return crc;
892}
893
1033static int check_version(Elf_Shdr *sechdrs, 894static int check_version(Elf_Shdr *sechdrs,
1034 unsigned int versindex, 895 unsigned int versindex,
1035 const char *symname, 896 const char *symname,
1036 struct module *mod, 897 struct module *mod,
1037 const unsigned long *crc) 898 const unsigned long *crc,
899 const struct module *crc_owner)
1038{ 900{
1039 unsigned int i, num_versions; 901 unsigned int i, num_versions;
1040 struct modversion_info *versions; 902 struct modversion_info *versions;
@@ -1055,10 +917,10 @@ static int check_version(Elf_Shdr *sechdrs,
1055 if (strcmp(versions[i].name, symname) != 0) 917 if (strcmp(versions[i].name, symname) != 0)
1056 continue; 918 continue;
1057 919
1058 if (versions[i].crc == *crc) 920 if (versions[i].crc == maybe_relocated(*crc, crc_owner))
1059 return 1; 921 return 1;
1060 DEBUGP("Found checksum %lX vs module %lX\n", 922 DEBUGP("Found checksum %lX vs module %lX\n",
1061 *crc, versions[i].crc); 923 maybe_relocated(*crc, crc_owner), versions[i].crc);
1062 goto bad_version; 924 goto bad_version;
1063 } 925 }
1064 926
@@ -1081,7 +943,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
1081 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, 943 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
1082 &crc, true, false)) 944 &crc, true, false))
1083 BUG(); 945 BUG();
1084 return check_version(sechdrs, versindex, "module_layout", mod, crc); 946 return check_version(sechdrs, versindex, "module_layout", mod, crc,
947 NULL);
1085} 948}
1086 949
1087/* First part is kernel version, which we ignore if module has crcs. */ 950/* First part is kernel version, which we ignore if module has crcs. */
@@ -1099,7 +962,8 @@ static inline int check_version(Elf_Shdr *sechdrs,
1099 unsigned int versindex, 962 unsigned int versindex,
1100 const char *symname, 963 const char *symname,
1101 struct module *mod, 964 struct module *mod,
1102 const unsigned long *crc) 965 const unsigned long *crc,
966 const struct module *crc_owner)
1103{ 967{
1104 return 1; 968 return 1;
1105} 969}
@@ -1134,8 +998,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
1134 /* use_module can fail due to OOM, 998 /* use_module can fail due to OOM,
1135 or module initialization or unloading */ 999 or module initialization or unloading */
1136 if (sym) { 1000 if (sym) {
1137 if (!check_version(sechdrs, versindex, name, mod, crc) || 1001 if (!check_version(sechdrs, versindex, name, mod, crc, owner)
1138 !use_module(mod, owner)) 1002 || !use_module(mod, owner))
1139 sym = NULL; 1003 sym = NULL;
1140 } 1004 }
1141 return sym; 1005 return sym;
@@ -2046,9 +1910,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
2046 unsigned int i; 1910 unsigned int i;
2047 1911
2048 /* only scan the sections containing data */ 1912 /* only scan the sections containing data */
2049 kmemleak_scan_area(mod->module_core, (unsigned long)mod - 1913 kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
2050 (unsigned long)mod->module_core,
2051 sizeof(struct module), GFP_KERNEL);
2052 1914
2053 for (i = 1; i < hdr->e_shnum; i++) { 1915 for (i = 1; i < hdr->e_shnum; i++) {
2054 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 1916 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2057,8 +1919,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
2057 && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) 1919 && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
2058 continue; 1920 continue;
2059 1921
2060 kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr - 1922 kmemleak_scan_area((void *)sechdrs[i].sh_addr,
2061 (unsigned long)mod->module_core,
2062 sechdrs[i].sh_size, GFP_KERNEL); 1923 sechdrs[i].sh_size, GFP_KERNEL);
2063 } 1924 }
2064} 1925}
@@ -2386,6 +2247,12 @@ static noinline struct module *load_module(void __user *umod,
2386 "_ftrace_events", 2247 "_ftrace_events",
2387 sizeof(*mod->trace_events), 2248 sizeof(*mod->trace_events),
2388 &mod->num_trace_events); 2249 &mod->num_trace_events);
2250 /*
2251 * This section contains pointers to allocated objects in the trace
2252 * code and not scanning it leads to false positives.
2253 */
2254 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2255 mod->num_trace_events, GFP_KERNEL);
2389#endif 2256#endif
2390#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2257#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2391 /* sechdrs[0].sh_size is always zero */ 2258 /* sechdrs[0].sh_size is always zero */
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 6b2d735846a5..57d527a16f9d 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock)
43 \ 43 \
44 DEBUG_LOCKS_WARN_ON(in_interrupt()); \ 44 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
45 local_irq_save(flags); \ 45 local_irq_save(flags); \
46 __raw_spin_lock(&(lock)->raw_lock); \ 46 arch_spin_lock(&(lock)->rlock.raw_lock);\
47 DEBUG_LOCKS_WARN_ON(l->magic != l); \ 47 DEBUG_LOCKS_WARN_ON(l->magic != l); \
48 } while (0) 48 } while (0)
49 49
50#define spin_unlock_mutex(lock, flags) \ 50#define spin_unlock_mutex(lock, flags) \
51 do { \ 51 do { \
52 __raw_spin_unlock(&(lock)->raw_lock); \ 52 arch_spin_unlock(&(lock)->rlock.raw_lock); \
53 local_irq_restore(flags); \ 53 local_irq_restore(flags); \
54 preempt_check_resched(); \ 54 preempt_check_resched(); \
55 } while (0) 55 } while (0)
diff --git a/kernel/panic.c b/kernel/panic.c
index 96b45d0b4ba5..5827f7b97254 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,6 +10,7 @@
10 */ 10 */
11#include <linux/debug_locks.h> 11#include <linux/debug_locks.h>
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13#include <linux/kmsg_dump.h>
13#include <linux/kallsyms.h> 14#include <linux/kallsyms.h>
14#include <linux/notifier.h> 15#include <linux/notifier.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -74,6 +75,7 @@ NORET_TYPE void panic(const char * fmt, ...)
74 dump_stack(); 75 dump_stack();
75#endif 76#endif
76 77
78 kmsg_dump(KMSG_DUMP_PANIC);
77 /* 79 /*
78 * If we have crashed and we have a crash kernel loaded let it handle 80 * If we have crashed and we have a crash kernel loaded let it handle
79 * everything else. 81 * everything else.
@@ -339,6 +341,7 @@ void oops_exit(void)
339{ 341{
340 do_oops_enter_exit(); 342 do_oops_enter_exit();
341 print_oops_end_marker(); 343 print_oops_end_marker();
344 kmsg_dump(KMSG_DUMP_OOPS);
342} 345}
343 346
344#ifdef WANT_WARN_ON_SLOWPATH 347#ifdef WANT_WARN_ON_SLOWPATH
diff --git a/kernel/params.c b/kernel/params.c
index d656c276508d..cf1b69183127 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,6 +24,7 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/ctype.h> 26#include <linux/ctype.h>
27#include <linux/string.h>
27 28
28#if 0 29#if 0
29#define DEBUGP printk 30#define DEBUGP printk
@@ -122,9 +123,7 @@ static char *next_arg(char *args, char **param, char **val)
122 next = args + i; 123 next = args + i;
123 124
124 /* Chew up trailing spaces. */ 125 /* Chew up trailing spaces. */
125 while (isspace(*next)) 126 return skip_spaces(next);
126 next++;
127 return next;
128} 127}
129 128
130/* Args looks like "foo=bar,bar2 baz=fuz wiz". */ 129/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
@@ -139,8 +138,7 @@ int parse_args(const char *name,
139 DEBUGP("Parsing ARGS: %s\n", args); 138 DEBUGP("Parsing ARGS: %s\n", args);
140 139
141 /* Chew leading spaces */ 140 /* Chew leading spaces */
142 while (isspace(*args)) 141 args = skip_spaces(args);
143 args++;
144 142
145 while (*args) { 143 while (*args) {
146 int ret; 144 int ret;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 40a996ec39fa..97d1a3dd7a59 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -36,7 +36,7 @@
36/* 36/*
37 * Each CPU has a list of per CPU events: 37 * Each CPU has a list of per CPU events:
38 */ 38 */
39DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); 39static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
40 40
41int perf_max_events __read_mostly = 1; 41int perf_max_events __read_mostly = 1;
42static int perf_reserved_percpu __read_mostly; 42static int perf_reserved_percpu __read_mostly;
@@ -203,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
203 * if so. If we locked the right context, then it 203 * if so. If we locked the right context, then it
204 * can't get swapped on us any more. 204 * can't get swapped on us any more.
205 */ 205 */
206 spin_lock_irqsave(&ctx->lock, *flags); 206 raw_spin_lock_irqsave(&ctx->lock, *flags);
207 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 207 if (ctx != rcu_dereference(task->perf_event_ctxp)) {
208 spin_unlock_irqrestore(&ctx->lock, *flags); 208 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
209 goto retry; 209 goto retry;
210 } 210 }
211 211
212 if (!atomic_inc_not_zero(&ctx->refcount)) { 212 if (!atomic_inc_not_zero(&ctx->refcount)) {
213 spin_unlock_irqrestore(&ctx->lock, *flags); 213 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
214 ctx = NULL; 214 ctx = NULL;
215 } 215 }
216 } 216 }
@@ -231,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
231 ctx = perf_lock_task_context(task, &flags); 231 ctx = perf_lock_task_context(task, &flags);
232 if (ctx) { 232 if (ctx) {
233 ++ctx->pin_count; 233 ++ctx->pin_count;
234 spin_unlock_irqrestore(&ctx->lock, flags); 234 raw_spin_unlock_irqrestore(&ctx->lock, flags);
235 } 235 }
236 return ctx; 236 return ctx;
237} 237}
@@ -240,9 +240,9 @@ static void perf_unpin_context(struct perf_event_context *ctx)
240{ 240{
241 unsigned long flags; 241 unsigned long flags;
242 242
243 spin_lock_irqsave(&ctx->lock, flags); 243 raw_spin_lock_irqsave(&ctx->lock, flags);
244 --ctx->pin_count; 244 --ctx->pin_count;
245 spin_unlock_irqrestore(&ctx->lock, flags); 245 raw_spin_unlock_irqrestore(&ctx->lock, flags);
246 put_ctx(ctx); 246 put_ctx(ctx);
247} 247}
248 248
@@ -427,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
427 if (ctx->task && cpuctx->task_ctx != ctx) 427 if (ctx->task && cpuctx->task_ctx != ctx)
428 return; 428 return;
429 429
430 spin_lock(&ctx->lock); 430 raw_spin_lock(&ctx->lock);
431 /* 431 /*
432 * Protect the list operation against NMI by disabling the 432 * Protect the list operation against NMI by disabling the
433 * events on a global level. 433 * events on a global level.
@@ -449,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
449 } 449 }
450 450
451 perf_enable(); 451 perf_enable();
452 spin_unlock(&ctx->lock); 452 raw_spin_unlock(&ctx->lock);
453} 453}
454 454
455 455
@@ -488,12 +488,12 @@ retry:
488 task_oncpu_function_call(task, __perf_event_remove_from_context, 488 task_oncpu_function_call(task, __perf_event_remove_from_context,
489 event); 489 event);
490 490
491 spin_lock_irq(&ctx->lock); 491 raw_spin_lock_irq(&ctx->lock);
492 /* 492 /*
493 * If the context is active we need to retry the smp call. 493 * If the context is active we need to retry the smp call.
494 */ 494 */
495 if (ctx->nr_active && !list_empty(&event->group_entry)) { 495 if (ctx->nr_active && !list_empty(&event->group_entry)) {
496 spin_unlock_irq(&ctx->lock); 496 raw_spin_unlock_irq(&ctx->lock);
497 goto retry; 497 goto retry;
498 } 498 }
499 499
@@ -504,7 +504,7 @@ retry:
504 */ 504 */
505 if (!list_empty(&event->group_entry)) 505 if (!list_empty(&event->group_entry))
506 list_del_event(event, ctx); 506 list_del_event(event, ctx);
507 spin_unlock_irq(&ctx->lock); 507 raw_spin_unlock_irq(&ctx->lock);
508} 508}
509 509
510/* 510/*
@@ -535,7 +535,7 @@ static void __perf_event_disable(void *info)
535 if (ctx->task && cpuctx->task_ctx != ctx) 535 if (ctx->task && cpuctx->task_ctx != ctx)
536 return; 536 return;
537 537
538 spin_lock(&ctx->lock); 538 raw_spin_lock(&ctx->lock);
539 539
540 /* 540 /*
541 * If the event is on, turn it off. 541 * If the event is on, turn it off.
@@ -551,7 +551,7 @@ static void __perf_event_disable(void *info)
551 event->state = PERF_EVENT_STATE_OFF; 551 event->state = PERF_EVENT_STATE_OFF;
552 } 552 }
553 553
554 spin_unlock(&ctx->lock); 554 raw_spin_unlock(&ctx->lock);
555} 555}
556 556
557/* 557/*
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
567 * is the current context on this CPU and preemption is disabled, 567 * is the current context on this CPU and preemption is disabled,
568 * hence we can't get into perf_event_task_sched_out for this context. 568 * hence we can't get into perf_event_task_sched_out for this context.
569 */ 569 */
570static void perf_event_disable(struct perf_event *event) 570void perf_event_disable(struct perf_event *event)
571{ 571{
572 struct perf_event_context *ctx = event->ctx; 572 struct perf_event_context *ctx = event->ctx;
573 struct task_struct *task = ctx->task; 573 struct task_struct *task = ctx->task;
@@ -584,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
584 retry: 584 retry:
585 task_oncpu_function_call(task, __perf_event_disable, event); 585 task_oncpu_function_call(task, __perf_event_disable, event);
586 586
587 spin_lock_irq(&ctx->lock); 587 raw_spin_lock_irq(&ctx->lock);
588 /* 588 /*
589 * If the event is still active, we need to retry the cross-call. 589 * If the event is still active, we need to retry the cross-call.
590 */ 590 */
591 if (event->state == PERF_EVENT_STATE_ACTIVE) { 591 if (event->state == PERF_EVENT_STATE_ACTIVE) {
592 spin_unlock_irq(&ctx->lock); 592 raw_spin_unlock_irq(&ctx->lock);
593 goto retry; 593 goto retry;
594 } 594 }
595 595
@@ -602,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
602 event->state = PERF_EVENT_STATE_OFF; 602 event->state = PERF_EVENT_STATE_OFF;
603 } 603 }
604 604
605 spin_unlock_irq(&ctx->lock); 605 raw_spin_unlock_irq(&ctx->lock);
606} 606}
607 607
608static int 608static int
@@ -770,7 +770,7 @@ static void __perf_install_in_context(void *info)
770 cpuctx->task_ctx = ctx; 770 cpuctx->task_ctx = ctx;
771 } 771 }
772 772
773 spin_lock(&ctx->lock); 773 raw_spin_lock(&ctx->lock);
774 ctx->is_active = 1; 774 ctx->is_active = 1;
775 update_context_time(ctx); 775 update_context_time(ctx);
776 776
@@ -782,6 +782,9 @@ static void __perf_install_in_context(void *info)
782 782
783 add_event_to_ctx(event, ctx); 783 add_event_to_ctx(event, ctx);
784 784
785 if (event->cpu != -1 && event->cpu != smp_processor_id())
786 goto unlock;
787
785 /* 788 /*
786 * Don't put the event on if it is disabled or if 789 * Don't put the event on if it is disabled or if
787 * it is in a group and the group isn't on. 790 * it is in a group and the group isn't on.
@@ -820,7 +823,7 @@ static void __perf_install_in_context(void *info)
820 unlock: 823 unlock:
821 perf_enable(); 824 perf_enable();
822 825
823 spin_unlock(&ctx->lock); 826 raw_spin_unlock(&ctx->lock);
824} 827}
825 828
826/* 829/*
@@ -856,12 +859,12 @@ retry:
856 task_oncpu_function_call(task, __perf_install_in_context, 859 task_oncpu_function_call(task, __perf_install_in_context,
857 event); 860 event);
858 861
859 spin_lock_irq(&ctx->lock); 862 raw_spin_lock_irq(&ctx->lock);
860 /* 863 /*
861 * we need to retry the smp call. 864 * we need to retry the smp call.
862 */ 865 */
863 if (ctx->is_active && list_empty(&event->group_entry)) { 866 if (ctx->is_active && list_empty(&event->group_entry)) {
864 spin_unlock_irq(&ctx->lock); 867 raw_spin_unlock_irq(&ctx->lock);
865 goto retry; 868 goto retry;
866 } 869 }
867 870
@@ -872,7 +875,7 @@ retry:
872 */ 875 */
873 if (list_empty(&event->group_entry)) 876 if (list_empty(&event->group_entry))
874 add_event_to_ctx(event, ctx); 877 add_event_to_ctx(event, ctx);
875 spin_unlock_irq(&ctx->lock); 878 raw_spin_unlock_irq(&ctx->lock);
876} 879}
877 880
878/* 881/*
@@ -917,7 +920,7 @@ static void __perf_event_enable(void *info)
917 cpuctx->task_ctx = ctx; 920 cpuctx->task_ctx = ctx;
918 } 921 }
919 922
920 spin_lock(&ctx->lock); 923 raw_spin_lock(&ctx->lock);
921 ctx->is_active = 1; 924 ctx->is_active = 1;
922 update_context_time(ctx); 925 update_context_time(ctx);
923 926
@@ -925,6 +928,9 @@ static void __perf_event_enable(void *info)
925 goto unlock; 928 goto unlock;
926 __perf_event_mark_enabled(event, ctx); 929 __perf_event_mark_enabled(event, ctx);
927 930
931 if (event->cpu != -1 && event->cpu != smp_processor_id())
932 goto unlock;
933
928 /* 934 /*
929 * If the event is in a group and isn't the group leader, 935 * If the event is in a group and isn't the group leader,
930 * then don't put it on unless the group is on. 936 * then don't put it on unless the group is on.
@@ -959,7 +965,7 @@ static void __perf_event_enable(void *info)
959 } 965 }
960 966
961 unlock: 967 unlock:
962 spin_unlock(&ctx->lock); 968 raw_spin_unlock(&ctx->lock);
963} 969}
964 970
965/* 971/*
@@ -971,7 +977,7 @@ static void __perf_event_enable(void *info)
971 * perf_event_for_each_child or perf_event_for_each as described 977 * perf_event_for_each_child or perf_event_for_each as described
972 * for perf_event_disable. 978 * for perf_event_disable.
973 */ 979 */
974static void perf_event_enable(struct perf_event *event) 980void perf_event_enable(struct perf_event *event)
975{ 981{
976 struct perf_event_context *ctx = event->ctx; 982 struct perf_event_context *ctx = event->ctx;
977 struct task_struct *task = ctx->task; 983 struct task_struct *task = ctx->task;
@@ -985,7 +991,7 @@ static void perf_event_enable(struct perf_event *event)
985 return; 991 return;
986 } 992 }
987 993
988 spin_lock_irq(&ctx->lock); 994 raw_spin_lock_irq(&ctx->lock);
989 if (event->state >= PERF_EVENT_STATE_INACTIVE) 995 if (event->state >= PERF_EVENT_STATE_INACTIVE)
990 goto out; 996 goto out;
991 997
@@ -1000,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event)
1000 event->state = PERF_EVENT_STATE_OFF; 1006 event->state = PERF_EVENT_STATE_OFF;
1001 1007
1002 retry: 1008 retry:
1003 spin_unlock_irq(&ctx->lock); 1009 raw_spin_unlock_irq(&ctx->lock);
1004 task_oncpu_function_call(task, __perf_event_enable, event); 1010 task_oncpu_function_call(task, __perf_event_enable, event);
1005 1011
1006 spin_lock_irq(&ctx->lock); 1012 raw_spin_lock_irq(&ctx->lock);
1007 1013
1008 /* 1014 /*
1009 * If the context is active and the event is still off, 1015 * If the context is active and the event is still off,
@@ -1020,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event)
1020 __perf_event_mark_enabled(event, ctx); 1026 __perf_event_mark_enabled(event, ctx);
1021 1027
1022 out: 1028 out:
1023 spin_unlock_irq(&ctx->lock); 1029 raw_spin_unlock_irq(&ctx->lock);
1024} 1030}
1025 1031
1026static int perf_event_refresh(struct perf_event *event, int refresh) 1032static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1042,7 +1048,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1042{ 1048{
1043 struct perf_event *event; 1049 struct perf_event *event;
1044 1050
1045 spin_lock(&ctx->lock); 1051 raw_spin_lock(&ctx->lock);
1046 ctx->is_active = 0; 1052 ctx->is_active = 0;
1047 if (likely(!ctx->nr_events)) 1053 if (likely(!ctx->nr_events))
1048 goto out; 1054 goto out;
@@ -1055,7 +1061,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1055 } 1061 }
1056 perf_enable(); 1062 perf_enable();
1057 out: 1063 out:
1058 spin_unlock(&ctx->lock); 1064 raw_spin_unlock(&ctx->lock);
1059} 1065}
1060 1066
1061/* 1067/*
@@ -1193,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1193 * order we take the locks because no other cpu could 1199 * order we take the locks because no other cpu could
1194 * be trying to lock both of these tasks. 1200 * be trying to lock both of these tasks.
1195 */ 1201 */
1196 spin_lock(&ctx->lock); 1202 raw_spin_lock(&ctx->lock);
1197 spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 1203 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
1198 if (context_equiv(ctx, next_ctx)) { 1204 if (context_equiv(ctx, next_ctx)) {
1199 /* 1205 /*
1200 * XXX do we need a memory barrier of sorts 1206 * XXX do we need a memory barrier of sorts
@@ -1208,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1208 1214
1209 perf_event_sync_stat(ctx, next_ctx); 1215 perf_event_sync_stat(ctx, next_ctx);
1210 } 1216 }
1211 spin_unlock(&next_ctx->lock); 1217 raw_spin_unlock(&next_ctx->lock);
1212 spin_unlock(&ctx->lock); 1218 raw_spin_unlock(&ctx->lock);
1213 } 1219 }
1214 rcu_read_unlock(); 1220 rcu_read_unlock();
1215 1221
@@ -1251,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1251 struct perf_event *event; 1257 struct perf_event *event;
1252 int can_add_hw = 1; 1258 int can_add_hw = 1;
1253 1259
1254 spin_lock(&ctx->lock); 1260 raw_spin_lock(&ctx->lock);
1255 ctx->is_active = 1; 1261 ctx->is_active = 1;
1256 if (likely(!ctx->nr_events)) 1262 if (likely(!ctx->nr_events))
1257 goto out; 1263 goto out;
@@ -1306,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1306 } 1312 }
1307 perf_enable(); 1313 perf_enable();
1308 out: 1314 out:
1309 spin_unlock(&ctx->lock); 1315 raw_spin_unlock(&ctx->lock);
1310} 1316}
1311 1317
1312/* 1318/*
@@ -1370,7 +1376,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1370 struct hw_perf_event *hwc; 1376 struct hw_perf_event *hwc;
1371 u64 interrupts, freq; 1377 u64 interrupts, freq;
1372 1378
1373 spin_lock(&ctx->lock); 1379 raw_spin_lock(&ctx->lock);
1374 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
1375 if (event->state != PERF_EVENT_STATE_ACTIVE) 1381 if (event->state != PERF_EVENT_STATE_ACTIVE)
1376 continue; 1382 continue;
@@ -1425,7 +1431,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1425 perf_enable(); 1431 perf_enable();
1426 } 1432 }
1427 } 1433 }
1428 spin_unlock(&ctx->lock); 1434 raw_spin_unlock(&ctx->lock);
1429} 1435}
1430 1436
1431/* 1437/*
@@ -1438,7 +1444,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1438 if (!ctx->nr_events) 1444 if (!ctx->nr_events)
1439 return; 1445 return;
1440 1446
1441 spin_lock(&ctx->lock); 1447 raw_spin_lock(&ctx->lock);
1442 /* 1448 /*
1443 * Rotate the first entry last (works just fine for group events too): 1449 * Rotate the first entry last (works just fine for group events too):
1444 */ 1450 */
@@ -1449,7 +1455,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1449 } 1455 }
1450 perf_enable(); 1456 perf_enable();
1451 1457
1452 spin_unlock(&ctx->lock); 1458 raw_spin_unlock(&ctx->lock);
1453} 1459}
1454 1460
1455void perf_event_task_tick(struct task_struct *curr, int cpu) 1461void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1498,7 +1504,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1498 1504
1499 __perf_event_task_sched_out(ctx); 1505 __perf_event_task_sched_out(ctx);
1500 1506
1501 spin_lock(&ctx->lock); 1507 raw_spin_lock(&ctx->lock);
1502 1508
1503 list_for_each_entry(event, &ctx->group_list, group_entry) { 1509 list_for_each_entry(event, &ctx->group_list, group_entry) {
1504 if (!event->attr.enable_on_exec) 1510 if (!event->attr.enable_on_exec)
@@ -1516,7 +1522,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1516 if (enabled) 1522 if (enabled)
1517 unclone_ctx(ctx); 1523 unclone_ctx(ctx);
1518 1524
1519 spin_unlock(&ctx->lock); 1525 raw_spin_unlock(&ctx->lock);
1520 1526
1521 perf_event_task_sched_in(task, smp_processor_id()); 1527 perf_event_task_sched_in(task, smp_processor_id());
1522 out: 1528 out:
@@ -1542,10 +1548,10 @@ static void __perf_event_read(void *info)
1542 if (ctx->task && cpuctx->task_ctx != ctx) 1548 if (ctx->task && cpuctx->task_ctx != ctx)
1543 return; 1549 return;
1544 1550
1545 spin_lock(&ctx->lock); 1551 raw_spin_lock(&ctx->lock);
1546 update_context_time(ctx); 1552 update_context_time(ctx);
1547 update_event_times(event); 1553 update_event_times(event);
1548 spin_unlock(&ctx->lock); 1554 raw_spin_unlock(&ctx->lock);
1549 1555
1550 event->pmu->read(event); 1556 event->pmu->read(event);
1551} 1557}
@@ -1563,10 +1569,10 @@ static u64 perf_event_read(struct perf_event *event)
1563 struct perf_event_context *ctx = event->ctx; 1569 struct perf_event_context *ctx = event->ctx;
1564 unsigned long flags; 1570 unsigned long flags;
1565 1571
1566 spin_lock_irqsave(&ctx->lock, flags); 1572 raw_spin_lock_irqsave(&ctx->lock, flags);
1567 update_context_time(ctx); 1573 update_context_time(ctx);
1568 update_event_times(event); 1574 update_event_times(event);
1569 spin_unlock_irqrestore(&ctx->lock, flags); 1575 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1570 } 1576 }
1571 1577
1572 return atomic64_read(&event->count); 1578 return atomic64_read(&event->count);
@@ -1579,8 +1585,7 @@ static void
1579__perf_event_init_context(struct perf_event_context *ctx, 1585__perf_event_init_context(struct perf_event_context *ctx,
1580 struct task_struct *task) 1586 struct task_struct *task)
1581{ 1587{
1582 memset(ctx, 0, sizeof(*ctx)); 1588 raw_spin_lock_init(&ctx->lock);
1583 spin_lock_init(&ctx->lock);
1584 mutex_init(&ctx->mutex); 1589 mutex_init(&ctx->mutex);
1585 INIT_LIST_HEAD(&ctx->group_list); 1590 INIT_LIST_HEAD(&ctx->group_list);
1586 INIT_LIST_HEAD(&ctx->event_list); 1591 INIT_LIST_HEAD(&ctx->event_list);
@@ -1596,15 +1601,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1596 unsigned long flags; 1601 unsigned long flags;
1597 int err; 1602 int err;
1598 1603
1599 /* 1604 if (pid == -1 && cpu != -1) {
1600 * If cpu is not a wildcard then this is a percpu event:
1601 */
1602 if (cpu != -1) {
1603 /* Must be root to operate on a CPU event: */ 1605 /* Must be root to operate on a CPU event: */
1604 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 1606 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1605 return ERR_PTR(-EACCES); 1607 return ERR_PTR(-EACCES);
1606 1608
1607 if (cpu < 0 || cpu > num_possible_cpus()) 1609 if (cpu < 0 || cpu >= nr_cpumask_bits)
1608 return ERR_PTR(-EINVAL); 1610 return ERR_PTR(-EINVAL);
1609 1611
1610 /* 1612 /*
@@ -1612,7 +1614,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1612 * offline CPU and activate it when the CPU comes up, but 1614 * offline CPU and activate it when the CPU comes up, but
1613 * that's for later. 1615 * that's for later.
1614 */ 1616 */
1615 if (!cpu_isset(cpu, cpu_online_map)) 1617 if (!cpu_online(cpu))
1616 return ERR_PTR(-ENODEV); 1618 return ERR_PTR(-ENODEV);
1617 1619
1618 cpuctx = &per_cpu(perf_cpu_context, cpu); 1620 cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -1650,11 +1652,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1650 ctx = perf_lock_task_context(task, &flags); 1652 ctx = perf_lock_task_context(task, &flags);
1651 if (ctx) { 1653 if (ctx) {
1652 unclone_ctx(ctx); 1654 unclone_ctx(ctx);
1653 spin_unlock_irqrestore(&ctx->lock, flags); 1655 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1654 } 1656 }
1655 1657
1656 if (!ctx) { 1658 if (!ctx) {
1657 ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); 1659 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1658 err = -ENOMEM; 1660 err = -ENOMEM;
1659 if (!ctx) 1661 if (!ctx)
1660 goto errout; 1662 goto errout;
@@ -1988,7 +1990,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1988 if (!value) 1990 if (!value)
1989 return -EINVAL; 1991 return -EINVAL;
1990 1992
1991 spin_lock_irq(&ctx->lock); 1993 raw_spin_lock_irq(&ctx->lock);
1992 if (event->attr.freq) { 1994 if (event->attr.freq) {
1993 if (value > sysctl_perf_event_sample_rate) { 1995 if (value > sysctl_perf_event_sample_rate) {
1994 ret = -EINVAL; 1996 ret = -EINVAL;
@@ -2001,7 +2003,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2001 event->hw.sample_period = value; 2003 event->hw.sample_period = value;
2002 } 2004 }
2003unlock: 2005unlock:
2004 spin_unlock_irq(&ctx->lock); 2006 raw_spin_unlock_irq(&ctx->lock);
2005 2007
2006 return ret; 2008 return ret;
2007} 2009}
@@ -4011,6 +4013,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4011 event->pmu->read(event); 4013 event->pmu->read(event);
4012 4014
4013 data.addr = 0; 4015 data.addr = 0;
4016 data.raw = NULL;
4014 data.period = event->hw.last_period; 4017 data.period = event->hw.last_period;
4015 regs = get_irq_regs(); 4018 regs = get_irq_regs();
4016 /* 4019 /*
@@ -4080,8 +4083,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4080 u64 now; 4083 u64 now;
4081 4084
4082 now = cpu_clock(cpu); 4085 now = cpu_clock(cpu);
4083 prev = atomic64_read(&event->hw.prev_count); 4086 prev = atomic64_xchg(&event->hw.prev_count, now);
4084 atomic64_set(&event->hw.prev_count, now);
4085 atomic64_add(now - prev, &event->count); 4087 atomic64_add(now - prev, &event->count);
4086} 4088}
4087 4089
@@ -4286,15 +4288,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
4286static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4288static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4287{ 4289{
4288 int err; 4290 int err;
4289 /* 4291
4290 * The breakpoint is already filled if we haven't created the counter 4292 err = register_perf_hw_breakpoint(bp);
4291 * through perf syscall
4292 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4293 */
4294 if (!bp->callback)
4295 err = register_perf_hw_breakpoint(bp);
4296 else
4297 err = __register_perf_hw_breakpoint(bp);
4298 if (err) 4293 if (err)
4299 return ERR_PTR(err); 4294 return ERR_PTR(err);
4300 4295
@@ -4308,6 +4303,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
4308 struct perf_sample_data sample; 4303 struct perf_sample_data sample;
4309 struct pt_regs *regs = data; 4304 struct pt_regs *regs = data;
4310 4305
4306 sample.raw = NULL;
4311 sample.addr = bp->attr.bp_addr; 4307 sample.addr = bp->attr.bp_addr;
4312 4308
4313 if (!perf_exclude_event(bp, regs)) 4309 if (!perf_exclude_event(bp, regs))
@@ -4390,7 +4386,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4390 struct perf_event_context *ctx, 4386 struct perf_event_context *ctx,
4391 struct perf_event *group_leader, 4387 struct perf_event *group_leader,
4392 struct perf_event *parent_event, 4388 struct perf_event *parent_event,
4393 perf_callback_t callback, 4389 perf_overflow_handler_t overflow_handler,
4394 gfp_t gfpflags) 4390 gfp_t gfpflags)
4395{ 4391{
4396 const struct pmu *pmu; 4392 const struct pmu *pmu;
@@ -4433,10 +4429,10 @@ perf_event_alloc(struct perf_event_attr *attr,
4433 4429
4434 event->state = PERF_EVENT_STATE_INACTIVE; 4430 event->state = PERF_EVENT_STATE_INACTIVE;
4435 4431
4436 if (!callback && parent_event) 4432 if (!overflow_handler && parent_event)
4437 callback = parent_event->callback; 4433 overflow_handler = parent_event->overflow_handler;
4438 4434
4439 event->callback = callback; 4435 event->overflow_handler = overflow_handler;
4440 4436
4441 if (attr->disabled) 4437 if (attr->disabled)
4442 event->state = PERF_EVENT_STATE_OFF; 4438 event->state = PERF_EVENT_STATE_OFF;
@@ -4571,7 +4567,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
4571 if (attr->type >= PERF_TYPE_MAX) 4567 if (attr->type >= PERF_TYPE_MAX)
4572 return -EINVAL; 4568 return -EINVAL;
4573 4569
4574 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) 4570 if (attr->__reserved_1 || attr->__reserved_2)
4575 return -EINVAL; 4571 return -EINVAL;
4576 4572
4577 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 4573 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4776,7 +4772,8 @@ err_put_context:
4776 */ 4772 */
4777struct perf_event * 4773struct perf_event *
4778perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, 4774perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4779 pid_t pid, perf_callback_t callback) 4775 pid_t pid,
4776 perf_overflow_handler_t overflow_handler)
4780{ 4777{
4781 struct perf_event *event; 4778 struct perf_event *event;
4782 struct perf_event_context *ctx; 4779 struct perf_event_context *ctx;
@@ -4793,7 +4790,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4793 } 4790 }
4794 4791
4795 event = perf_event_alloc(attr, cpu, ctx, NULL, 4792 event = perf_event_alloc(attr, cpu, ctx, NULL,
4796 NULL, callback, GFP_KERNEL); 4793 NULL, overflow_handler, GFP_KERNEL);
4797 if (IS_ERR(event)) { 4794 if (IS_ERR(event)) {
4798 err = PTR_ERR(event); 4795 err = PTR_ERR(event);
4799 goto err_put_context; 4796 goto err_put_context;
@@ -4998,7 +4995,7 @@ void perf_event_exit_task(struct task_struct *child)
4998 * reading child->perf_event_ctxp, we wait until it has 4995 * reading child->perf_event_ctxp, we wait until it has
4999 * incremented the context's refcount before we do put_ctx below. 4996 * incremented the context's refcount before we do put_ctx below.
5000 */ 4997 */
5001 spin_lock(&child_ctx->lock); 4998 raw_spin_lock(&child_ctx->lock);
5002 child->perf_event_ctxp = NULL; 4999 child->perf_event_ctxp = NULL;
5003 /* 5000 /*
5004 * If this context is a clone; unclone it so it can't get 5001 * If this context is a clone; unclone it so it can't get
@@ -5007,7 +5004,7 @@ void perf_event_exit_task(struct task_struct *child)
5007 */ 5004 */
5008 unclone_ctx(child_ctx); 5005 unclone_ctx(child_ctx);
5009 update_context_time(child_ctx); 5006 update_context_time(child_ctx);
5010 spin_unlock_irqrestore(&child_ctx->lock, flags); 5007 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
5011 5008
5012 /* 5009 /*
5013 * Report the task dead after unscheduling the events so that we 5010 * Report the task dead after unscheduling the events so that we
@@ -5090,7 +5087,7 @@ again:
5090 */ 5087 */
5091int perf_event_init_task(struct task_struct *child) 5088int perf_event_init_task(struct task_struct *child)
5092{ 5089{
5093 struct perf_event_context *child_ctx, *parent_ctx; 5090 struct perf_event_context *child_ctx = NULL, *parent_ctx;
5094 struct perf_event_context *cloned_ctx; 5091 struct perf_event_context *cloned_ctx;
5095 struct perf_event *event; 5092 struct perf_event *event;
5096 struct task_struct *parent = current; 5093 struct task_struct *parent = current;
@@ -5106,20 +5103,6 @@ int perf_event_init_task(struct task_struct *child)
5106 return 0; 5103 return 0;
5107 5104
5108 /* 5105 /*
5109 * This is executed from the parent task context, so inherit
5110 * events that have been marked for cloning.
5111 * First allocate and initialize a context for the child.
5112 */
5113
5114 child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
5115 if (!child_ctx)
5116 return -ENOMEM;
5117
5118 __perf_event_init_context(child_ctx, child);
5119 child->perf_event_ctxp = child_ctx;
5120 get_task_struct(child);
5121
5122 /*
5123 * If the parent's context is a clone, pin it so it won't get 5106 * If the parent's context is a clone, pin it so it won't get
5124 * swapped under us. 5107 * swapped under us.
5125 */ 5108 */
@@ -5149,6 +5132,26 @@ int perf_event_init_task(struct task_struct *child)
5149 continue; 5132 continue;
5150 } 5133 }
5151 5134
5135 if (!child->perf_event_ctxp) {
5136 /*
5137 * This is executed from the parent task context, so
5138 * inherit events that have been marked for cloning.
5139 * First allocate and initialize a context for the
5140 * child.
5141 */
5142
5143 child_ctx = kzalloc(sizeof(struct perf_event_context),
5144 GFP_KERNEL);
5145 if (!child_ctx) {
5146 ret = -ENOMEM;
5147 goto exit;
5148 }
5149
5150 __perf_event_init_context(child_ctx, child);
5151 child->perf_event_ctxp = child_ctx;
5152 get_task_struct(child);
5153 }
5154
5152 ret = inherit_group(event, parent, parent_ctx, 5155 ret = inherit_group(event, parent, parent_ctx,
5153 child, child_ctx); 5156 child, child_ctx);
5154 if (ret) { 5157 if (ret) {
@@ -5177,6 +5180,7 @@ int perf_event_init_task(struct task_struct *child)
5177 get_ctx(child_ctx->parent_ctx); 5180 get_ctx(child_ctx->parent_ctx);
5178 } 5181 }
5179 5182
5183exit:
5180 mutex_unlock(&parent_ctx->mutex); 5184 mutex_unlock(&parent_ctx->mutex);
5181 5185
5182 perf_unpin_context(parent_ctx); 5186 perf_unpin_context(parent_ctx);
@@ -5291,11 +5295,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
5291 perf_reserved_percpu = val; 5295 perf_reserved_percpu = val;
5292 for_each_online_cpu(cpu) { 5296 for_each_online_cpu(cpu) {
5293 cpuctx = &per_cpu(perf_cpu_context, cpu); 5297 cpuctx = &per_cpu(perf_cpu_context, cpu);
5294 spin_lock_irq(&cpuctx->ctx.lock); 5298 raw_spin_lock_irq(&cpuctx->ctx.lock);
5295 mpt = min(perf_max_events - cpuctx->ctx.nr_events, 5299 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5296 perf_max_events - perf_reserved_percpu); 5300 perf_max_events - perf_reserved_percpu);
5297 cpuctx->max_pertask = mpt; 5301 cpuctx->max_pertask = mpt;
5298 spin_unlock_irq(&cpuctx->ctx.lock); 5302 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5299 } 5303 }
5300 spin_unlock(&perf_resource_lock); 5304 spin_unlock(&perf_resource_lock);
5301 5305
diff --git a/kernel/pid.c b/kernel/pid.c
index d3f722d20f9c..2e17c9c92cbe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -141,11 +141,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
141 * installing it: 141 * installing it:
142 */ 142 */
143 spin_lock_irq(&pidmap_lock); 143 spin_lock_irq(&pidmap_lock);
144 if (map->page) 144 if (!map->page) {
145 kfree(page);
146 else
147 map->page = page; 145 map->page = page;
146 page = NULL;
147 }
148 spin_unlock_irq(&pidmap_lock); 148 spin_unlock_irq(&pidmap_lock);
149 kfree(page);
149 if (unlikely(!map->page)) 150 if (unlikely(!map->page))
150 break; 151 break;
151 } 152 }
@@ -268,12 +269,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
268 for (type = 0; type < PIDTYPE_MAX; ++type) 269 for (type = 0; type < PIDTYPE_MAX; ++type)
269 INIT_HLIST_HEAD(&pid->tasks[type]); 270 INIT_HLIST_HEAD(&pid->tasks[type]);
270 271
272 upid = pid->numbers + ns->level;
271 spin_lock_irq(&pidmap_lock); 273 spin_lock_irq(&pidmap_lock);
272 for (i = ns->level; i >= 0; i--) { 274 for ( ; upid >= pid->numbers; --upid)
273 upid = &pid->numbers[i];
274 hlist_add_head_rcu(&upid->pid_chain, 275 hlist_add_head_rcu(&upid->pid_chain,
275 &pid_hash[pid_hashfn(upid->nr, upid->ns)]); 276 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
276 }
277 spin_unlock_irq(&pidmap_lock); 277 spin_unlock_irq(&pidmap_lock);
278 278
279out: 279out:
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 5187136fe1de..218e5af90156 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -6,7 +6,7 @@
6 6
7#include <linux/vt_kern.h> 7#include <linux/vt_kern.h>
8#include <linux/kbd_kern.h> 8#include <linux/kbd_kern.h>
9#include <linux/console.h> 9#include <linux/vt.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include "power.h" 11#include "power.h"
12 12
@@ -21,8 +21,7 @@ int pm_prepare_console(void)
21 if (orig_fgconsole < 0) 21 if (orig_fgconsole < 0)
22 return 1; 22 return 1;
23 23
24 orig_kmsg = kmsg_redirect; 24 orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
25 kmsg_redirect = SUSPEND_CONSOLE;
26 return 0; 25 return 0;
27} 26}
28 27
@@ -30,7 +29,7 @@ void pm_restore_console(void)
30{ 29{
31 if (orig_fgconsole >= 0) { 30 if (orig_fgconsole >= 0) {
32 vt_move_to_console(orig_fgconsole, 0); 31 vt_move_to_console(orig_fgconsole, 0);
33 kmsg_redirect = orig_kmsg; 32 vt_kmsg_redirect(orig_kmsg);
34 } 33 }
35} 34}
36#endif 35#endif
diff --git a/kernel/printk.c b/kernel/printk.c
index b5ac4d99c667..17463ca2e229 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -34,6 +34,7 @@
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/kexec.h> 35#include <linux/kexec.h>
36#include <linux/ratelimit.h> 36#include <linux/ratelimit.h>
37#include <linux/kmsg_dump.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -1405,4 +1406,122 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
1405 return false; 1406 return false;
1406} 1407}
1407EXPORT_SYMBOL(printk_timed_ratelimit); 1408EXPORT_SYMBOL(printk_timed_ratelimit);
1409
1410static DEFINE_SPINLOCK(dump_list_lock);
1411static LIST_HEAD(dump_list);
1412
1413/**
1414 * kmsg_dump_register - register a kernel log dumper.
1415 * @dumper: pointer to the kmsg_dumper structure
1416 *
1417 * Adds a kernel log dumper to the system. The dump callback in the
1418 * structure will be called when the kernel oopses or panics and must be
1419 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
1420 */
1421int kmsg_dump_register(struct kmsg_dumper *dumper)
1422{
1423 unsigned long flags;
1424 int err = -EBUSY;
1425
1426 /* The dump callback needs to be set */
1427 if (!dumper->dump)
1428 return -EINVAL;
1429
1430 spin_lock_irqsave(&dump_list_lock, flags);
1431 /* Don't allow registering multiple times */
1432 if (!dumper->registered) {
1433 dumper->registered = 1;
1434 list_add_tail(&dumper->list, &dump_list);
1435 err = 0;
1436 }
1437 spin_unlock_irqrestore(&dump_list_lock, flags);
1438
1439 return err;
1440}
1441EXPORT_SYMBOL_GPL(kmsg_dump_register);
1442
1443/**
1444 * kmsg_dump_unregister - unregister a kmsg dumper.
1445 * @dumper: pointer to the kmsg_dumper structure
1446 *
1447 * Removes a dump device from the system. Returns zero on success and
1448 * %-EINVAL otherwise.
1449 */
1450int kmsg_dump_unregister(struct kmsg_dumper *dumper)
1451{
1452 unsigned long flags;
1453 int err = -EINVAL;
1454
1455 spin_lock_irqsave(&dump_list_lock, flags);
1456 if (dumper->registered) {
1457 dumper->registered = 0;
1458 list_del(&dumper->list);
1459 err = 0;
1460 }
1461 spin_unlock_irqrestore(&dump_list_lock, flags);
1462
1463 return err;
1464}
1465EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
1466
1467static const char const *kmsg_reasons[] = {
1468 [KMSG_DUMP_OOPS] = "oops",
1469 [KMSG_DUMP_PANIC] = "panic",
1470};
1471
1472static const char *kmsg_to_str(enum kmsg_dump_reason reason)
1473{
1474 if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
1475 return "unknown";
1476
1477 return kmsg_reasons[reason];
1478}
1479
1480/**
1481 * kmsg_dump - dump kernel log to kernel message dumpers.
1482 * @reason: the reason (oops, panic etc) for dumping
1483 *
1484 * Iterate through each of the dump devices and call the oops/panic
1485 * callbacks with the log buffer.
1486 */
1487void kmsg_dump(enum kmsg_dump_reason reason)
1488{
1489 unsigned long end;
1490 unsigned chars;
1491 struct kmsg_dumper *dumper;
1492 const char *s1, *s2;
1493 unsigned long l1, l2;
1494 unsigned long flags;
1495
1496 /* Theoretically, the log could move on after we do this, but
1497 there's not a lot we can do about that. The new messages
1498 will overwrite the start of what we dump. */
1499 spin_lock_irqsave(&logbuf_lock, flags);
1500 end = log_end & LOG_BUF_MASK;
1501 chars = logged_chars;
1502 spin_unlock_irqrestore(&logbuf_lock, flags);
1503
1504 if (logged_chars > end) {
1505 s1 = log_buf + log_buf_len - logged_chars + end;
1506 l1 = logged_chars - end;
1507
1508 s2 = log_buf;
1509 l2 = end;
1510 } else {
1511 s1 = "";
1512 l1 = 0;
1513
1514 s2 = log_buf + end - logged_chars;
1515 l2 = logged_chars;
1516 }
1517
1518 if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
1519 printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
1520 kmsg_to_str(reason));
1521 return;
1522 }
1523 list_for_each_entry(dumper, &dump_list, list)
1524 dumper->dump(dumper, reason, s1, l1, s2, l2);
1525 spin_unlock_irqrestore(&dump_list_lock, flags);
1526}
1408#endif 1527#endif
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a621a67ef4e3..9bb52177af02 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -763,13 +763,13 @@ static void rcu_torture_timer(unsigned long unused)
763 /* Should not happen, but... */ 763 /* Should not happen, but... */
764 pipe_count = RCU_TORTURE_PIPE_LEN; 764 pipe_count = RCU_TORTURE_PIPE_LEN;
765 } 765 }
766 ++__get_cpu_var(rcu_torture_count)[pipe_count]; 766 __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
767 completed = cur_ops->completed() - completed; 767 completed = cur_ops->completed() - completed;
768 if (completed > RCU_TORTURE_PIPE_LEN) { 768 if (completed > RCU_TORTURE_PIPE_LEN) {
769 /* Should not happen, but... */ 769 /* Should not happen, but... */
770 completed = RCU_TORTURE_PIPE_LEN; 770 completed = RCU_TORTURE_PIPE_LEN;
771 } 771 }
772 ++__get_cpu_var(rcu_torture_batch)[completed]; 772 __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
773 preempt_enable(); 773 preempt_enable();
774 cur_ops->readunlock(idx); 774 cur_ops->readunlock(idx);
775} 775}
@@ -818,13 +818,13 @@ rcu_torture_reader(void *arg)
818 /* Should not happen, but... */ 818 /* Should not happen, but... */
819 pipe_count = RCU_TORTURE_PIPE_LEN; 819 pipe_count = RCU_TORTURE_PIPE_LEN;
820 } 820 }
821 ++__get_cpu_var(rcu_torture_count)[pipe_count]; 821 __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
822 completed = cur_ops->completed() - completed; 822 completed = cur_ops->completed() - completed;
823 if (completed > RCU_TORTURE_PIPE_LEN) { 823 if (completed > RCU_TORTURE_PIPE_LEN) {
824 /* Should not happen, but... */ 824 /* Should not happen, but... */
825 completed = RCU_TORTURE_PIPE_LEN; 825 completed = RCU_TORTURE_PIPE_LEN;
826 } 826 }
827 ++__get_cpu_var(rcu_torture_batch)[completed]; 827 __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
828 preempt_enable(); 828 preempt_enable();
829 cur_ops->readunlock(idx); 829 cur_ops->readunlock(idx);
830 schedule(); 830 schedule();
diff --git a/kernel/relay.c b/kernel/relay.c
index 760c26209a3c..c705a41b4ba3 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1198,7 +1198,7 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
1198 relay_consume_bytes(rbuf, buf->private); 1198 relay_consume_bytes(rbuf, buf->private);
1199} 1199}
1200 1200
1201static struct pipe_buf_operations relay_pipe_buf_ops = { 1201static const struct pipe_buf_operations relay_pipe_buf_ops = {
1202 .can_merge = 0, 1202 .can_merge = 0,
1203 .map = generic_pipe_buf_map, 1203 .map = generic_pipe_buf_map,
1204 .unmap = generic_pipe_buf_unmap, 1204 .unmap = generic_pipe_buf_unmap,
diff --git a/kernel/resource.c b/kernel/resource.c
index fb11a58b9594..dc15686b7a77 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new,
308 void *alignf_data) 308 void *alignf_data)
309{ 309{
310 struct resource *this = root->child; 310 struct resource *this = root->child;
311 resource_size_t start, end;
311 312
312 new->start = root->start; 313 start = root->start;
313 /* 314 /*
314 * Skip past an allocated resource that starts at 0, since the assignment 315 * Skip past an allocated resource that starts at 0, since the assignment
315 * of this->start - 1 to new->end below would cause an underflow. 316 * of this->start - 1 to new->end below would cause an underflow.
316 */ 317 */
317 if (this && this->start == 0) { 318 if (this && this->start == 0) {
318 new->start = this->end + 1; 319 start = this->end + 1;
319 this = this->sibling; 320 this = this->sibling;
320 } 321 }
321 for(;;) { 322 for(;;) {
322 if (this) 323 if (this)
323 new->end = this->start - 1; 324 end = this->start - 1;
324 else 325 else
325 new->end = root->end; 326 end = root->end;
326 if (new->start < min) 327 if (start < min)
327 new->start = min; 328 start = min;
328 if (new->end > max) 329 if (end > max)
329 new->end = max; 330 end = max;
330 new->start = ALIGN(new->start, align); 331 start = ALIGN(start, align);
331 if (alignf) 332 if (alignf)
332 alignf(alignf_data, new, size, align); 333 alignf(alignf_data, new, size, align);
333 if (new->start < new->end && new->end - new->start >= size - 1) { 334 if (start < end && end - start >= size - 1) {
334 new->end = new->start + size - 1; 335 new->start = start;
336 new->end = start + size - 1;
335 return 0; 337 return 0;
336 } 338 }
337 if (!this) 339 if (!this)
338 break; 340 break;
339 new->start = this->end + 1; 341 start = this->end + 1;
340 this = this->sibling; 342 this = this->sibling;
341 } 343 }
342 return -EBUSY; 344 return -EBUSY;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5fcb4fe645e2..ddabb54bb5c8 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -37,8 +37,8 @@ do { \
37 if (rt_trace_on) { \ 37 if (rt_trace_on) { \
38 rt_trace_on = 0; \ 38 rt_trace_on = 0; \
39 console_verbose(); \ 39 console_verbose(); \
40 if (spin_is_locked(&current->pi_lock)) \ 40 if (raw_spin_is_locked(&current->pi_lock)) \
41 spin_unlock(&current->pi_lock); \ 41 raw_spin_unlock(&current->pi_lock); \
42 } \ 42 } \
43} while (0) 43} while (0)
44 44
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 29bd4baf9e75..a9604815786a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -138,9 +138,9 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
138{ 138{
139 unsigned long flags; 139 unsigned long flags;
140 140
141 spin_lock_irqsave(&task->pi_lock, flags); 141 raw_spin_lock_irqsave(&task->pi_lock, flags);
142 __rt_mutex_adjust_prio(task); 142 __rt_mutex_adjust_prio(task);
143 spin_unlock_irqrestore(&task->pi_lock, flags); 143 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
144} 144}
145 145
146/* 146/*
@@ -195,7 +195,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
195 /* 195 /*
196 * Task can not go away as we did a get_task() before ! 196 * Task can not go away as we did a get_task() before !
197 */ 197 */
198 spin_lock_irqsave(&task->pi_lock, flags); 198 raw_spin_lock_irqsave(&task->pi_lock, flags);
199 199
200 waiter = task->pi_blocked_on; 200 waiter = task->pi_blocked_on;
201 /* 201 /*
@@ -231,8 +231,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
231 goto out_unlock_pi; 231 goto out_unlock_pi;
232 232
233 lock = waiter->lock; 233 lock = waiter->lock;
234 if (!spin_trylock(&lock->wait_lock)) { 234 if (!raw_spin_trylock(&lock->wait_lock)) {
235 spin_unlock_irqrestore(&task->pi_lock, flags); 235 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
236 cpu_relax(); 236 cpu_relax();
237 goto retry; 237 goto retry;
238 } 238 }
@@ -240,7 +240,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
240 /* Deadlock detection */ 240 /* Deadlock detection */
241 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 241 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
242 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 242 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
243 spin_unlock(&lock->wait_lock); 243 raw_spin_unlock(&lock->wait_lock);
244 ret = deadlock_detect ? -EDEADLK : 0; 244 ret = deadlock_detect ? -EDEADLK : 0;
245 goto out_unlock_pi; 245 goto out_unlock_pi;
246 } 246 }
@@ -253,13 +253,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
253 plist_add(&waiter->list_entry, &lock->wait_list); 253 plist_add(&waiter->list_entry, &lock->wait_list);
254 254
255 /* Release the task */ 255 /* Release the task */
256 spin_unlock_irqrestore(&task->pi_lock, flags); 256 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
257 put_task_struct(task); 257 put_task_struct(task);
258 258
259 /* Grab the next task */ 259 /* Grab the next task */
260 task = rt_mutex_owner(lock); 260 task = rt_mutex_owner(lock);
261 get_task_struct(task); 261 get_task_struct(task);
262 spin_lock_irqsave(&task->pi_lock, flags); 262 raw_spin_lock_irqsave(&task->pi_lock, flags);
263 263
264 if (waiter == rt_mutex_top_waiter(lock)) { 264 if (waiter == rt_mutex_top_waiter(lock)) {
265 /* Boost the owner */ 265 /* Boost the owner */
@@ -277,10 +277,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
277 __rt_mutex_adjust_prio(task); 277 __rt_mutex_adjust_prio(task);
278 } 278 }
279 279
280 spin_unlock_irqrestore(&task->pi_lock, flags); 280 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
281 281
282 top_waiter = rt_mutex_top_waiter(lock); 282 top_waiter = rt_mutex_top_waiter(lock);
283 spin_unlock(&lock->wait_lock); 283 raw_spin_unlock(&lock->wait_lock);
284 284
285 if (!detect_deadlock && waiter != top_waiter) 285 if (!detect_deadlock && waiter != top_waiter)
286 goto out_put_task; 286 goto out_put_task;
@@ -288,7 +288,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
288 goto again; 288 goto again;
289 289
290 out_unlock_pi: 290 out_unlock_pi:
291 spin_unlock_irqrestore(&task->pi_lock, flags); 291 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
292 out_put_task: 292 out_put_task:
293 put_task_struct(task); 293 put_task_struct(task);
294 294
@@ -313,9 +313,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
313 if (pendowner == task) 313 if (pendowner == task)
314 return 1; 314 return 1;
315 315
316 spin_lock_irqsave(&pendowner->pi_lock, flags); 316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
317 if (task->prio >= pendowner->prio) { 317 if (task->prio >= pendowner->prio) {
318 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
319 return 0; 319 return 0;
320 } 320 }
321 321
@@ -325,7 +325,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
325 * priority. 325 * priority.
326 */ 326 */
327 if (likely(!rt_mutex_has_waiters(lock))) { 327 if (likely(!rt_mutex_has_waiters(lock))) {
328 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
329 return 1; 329 return 1;
330 } 330 }
331 331
@@ -333,7 +333,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
333 next = rt_mutex_top_waiter(lock); 333 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters); 334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner); 335 __rt_mutex_adjust_prio(pendowner);
336 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
337 337
338 /* 338 /*
339 * We are going to steal the lock and a waiter was 339 * We are going to steal the lock and a waiter was
@@ -350,10 +350,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
350 * might be task: 350 * might be task:
351 */ 351 */
352 if (likely(next->task != task)) { 352 if (likely(next->task != task)) {
353 spin_lock_irqsave(&task->pi_lock, flags); 353 raw_spin_lock_irqsave(&task->pi_lock, flags);
354 plist_add(&next->pi_list_entry, &task->pi_waiters); 354 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task); 355 __rt_mutex_adjust_prio(task);
356 spin_unlock_irqrestore(&task->pi_lock, flags); 356 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
357 } 357 }
358 return 1; 358 return 1;
359} 359}
@@ -420,7 +420,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
420 unsigned long flags; 420 unsigned long flags;
421 int chain_walk = 0, res; 421 int chain_walk = 0, res;
422 422
423 spin_lock_irqsave(&task->pi_lock, flags); 423 raw_spin_lock_irqsave(&task->pi_lock, flags);
424 __rt_mutex_adjust_prio(task); 424 __rt_mutex_adjust_prio(task);
425 waiter->task = task; 425 waiter->task = task;
426 waiter->lock = lock; 426 waiter->lock = lock;
@@ -434,17 +434,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
434 434
435 task->pi_blocked_on = waiter; 435 task->pi_blocked_on = waiter;
436 436
437 spin_unlock_irqrestore(&task->pi_lock, flags); 437 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
438 438
439 if (waiter == rt_mutex_top_waiter(lock)) { 439 if (waiter == rt_mutex_top_waiter(lock)) {
440 spin_lock_irqsave(&owner->pi_lock, flags); 440 raw_spin_lock_irqsave(&owner->pi_lock, flags);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
442 plist_add(&waiter->pi_list_entry, &owner->pi_waiters); 442 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
443 443
444 __rt_mutex_adjust_prio(owner); 444 __rt_mutex_adjust_prio(owner);
445 if (owner->pi_blocked_on) 445 if (owner->pi_blocked_on)
446 chain_walk = 1; 446 chain_walk = 1;
447 spin_unlock_irqrestore(&owner->pi_lock, flags); 447 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
448 } 448 }
449 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) 449 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
450 chain_walk = 1; 450 chain_walk = 1;
@@ -459,12 +459,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
459 */ 459 */
460 get_task_struct(owner); 460 get_task_struct(owner);
461 461
462 spin_unlock(&lock->wait_lock); 462 raw_spin_unlock(&lock->wait_lock);
463 463
464 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 464 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
465 task); 465 task);
466 466
467 spin_lock(&lock->wait_lock); 467 raw_spin_lock(&lock->wait_lock);
468 468
469 return res; 469 return res;
470} 470}
@@ -483,7 +483,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
483 struct task_struct *pendowner; 483 struct task_struct *pendowner;
484 unsigned long flags; 484 unsigned long flags;
485 485
486 spin_lock_irqsave(&current->pi_lock, flags); 486 raw_spin_lock_irqsave(&current->pi_lock, flags);
487 487
488 waiter = rt_mutex_top_waiter(lock); 488 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list); 489 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -500,7 +500,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
500 500
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
502 502
503 spin_unlock_irqrestore(&current->pi_lock, flags); 503 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
504 504
505 /* 505 /*
506 * Clear the pi_blocked_on variable and enqueue a possible 506 * Clear the pi_blocked_on variable and enqueue a possible
@@ -509,7 +509,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
509 * waiter with higher priority than pending-owner->normal_prio 509 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner. 510 * is blocked on the unboosted (pending) owner.
511 */ 511 */
512 spin_lock_irqsave(&pendowner->pi_lock, flags); 512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
513 513
514 WARN_ON(!pendowner->pi_blocked_on); 514 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter); 515 WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -523,7 +523,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
523 next = rt_mutex_top_waiter(lock); 523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters); 524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 } 525 }
526 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527 527
528 wake_up_process(pendowner); 528 wake_up_process(pendowner);
529} 529}
@@ -541,15 +541,15 @@ static void remove_waiter(struct rt_mutex *lock,
541 unsigned long flags; 541 unsigned long flags;
542 int chain_walk = 0; 542 int chain_walk = 0;
543 543
544 spin_lock_irqsave(&current->pi_lock, flags); 544 raw_spin_lock_irqsave(&current->pi_lock, flags);
545 plist_del(&waiter->list_entry, &lock->wait_list); 545 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL; 546 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 547 current->pi_blocked_on = NULL;
548 spin_unlock_irqrestore(&current->pi_lock, flags); 548 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
549 549
550 if (first && owner != current) { 550 if (first && owner != current) {
551 551
552 spin_lock_irqsave(&owner->pi_lock, flags); 552 raw_spin_lock_irqsave(&owner->pi_lock, flags);
553 553
554 plist_del(&waiter->pi_list_entry, &owner->pi_waiters); 554 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
555 555
@@ -564,7 +564,7 @@ static void remove_waiter(struct rt_mutex *lock,
564 if (owner->pi_blocked_on) 564 if (owner->pi_blocked_on)
565 chain_walk = 1; 565 chain_walk = 1;
566 566
567 spin_unlock_irqrestore(&owner->pi_lock, flags); 567 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
568 } 568 }
569 569
570 WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 570 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
@@ -575,11 +575,11 @@ static void remove_waiter(struct rt_mutex *lock,
575 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 575 /* gets dropped in rt_mutex_adjust_prio_chain()! */
576 get_task_struct(owner); 576 get_task_struct(owner);
577 577
578 spin_unlock(&lock->wait_lock); 578 raw_spin_unlock(&lock->wait_lock);
579 579
580 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 580 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
581 581
582 spin_lock(&lock->wait_lock); 582 raw_spin_lock(&lock->wait_lock);
583} 583}
584 584
585/* 585/*
@@ -592,15 +592,15 @@ void rt_mutex_adjust_pi(struct task_struct *task)
592 struct rt_mutex_waiter *waiter; 592 struct rt_mutex_waiter *waiter;
593 unsigned long flags; 593 unsigned long flags;
594 594
595 spin_lock_irqsave(&task->pi_lock, flags); 595 raw_spin_lock_irqsave(&task->pi_lock, flags);
596 596
597 waiter = task->pi_blocked_on; 597 waiter = task->pi_blocked_on;
598 if (!waiter || waiter->list_entry.prio == task->prio) { 598 if (!waiter || waiter->list_entry.prio == task->prio) {
599 spin_unlock_irqrestore(&task->pi_lock, flags); 599 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
600 return; 600 return;
601 } 601 }
602 602
603 spin_unlock_irqrestore(&task->pi_lock, flags); 603 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
604 604
605 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 605 /* gets dropped in rt_mutex_adjust_prio_chain()! */
606 get_task_struct(task); 606 get_task_struct(task);
@@ -672,14 +672,14 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
672 break; 672 break;
673 } 673 }
674 674
675 spin_unlock(&lock->wait_lock); 675 raw_spin_unlock(&lock->wait_lock);
676 676
677 debug_rt_mutex_print_deadlock(waiter); 677 debug_rt_mutex_print_deadlock(waiter);
678 678
679 if (waiter->task) 679 if (waiter->task)
680 schedule_rt_mutex(lock); 680 schedule_rt_mutex(lock);
681 681
682 spin_lock(&lock->wait_lock); 682 raw_spin_lock(&lock->wait_lock);
683 set_current_state(state); 683 set_current_state(state);
684 } 684 }
685 685
@@ -700,11 +700,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
700 debug_rt_mutex_init_waiter(&waiter); 700 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL; 701 waiter.task = NULL;
702 702
703 spin_lock(&lock->wait_lock); 703 raw_spin_lock(&lock->wait_lock);
704 704
705 /* Try to acquire the lock again: */ 705 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 706 if (try_to_take_rt_mutex(lock)) {
707 spin_unlock(&lock->wait_lock); 707 raw_spin_unlock(&lock->wait_lock);
708 return 0; 708 return 0;
709 } 709 }
710 710
@@ -731,7 +731,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
731 */ 731 */
732 fixup_rt_mutex_waiters(lock); 732 fixup_rt_mutex_waiters(lock);
733 733
734 spin_unlock(&lock->wait_lock); 734 raw_spin_unlock(&lock->wait_lock);
735 735
736 /* Remove pending timer: */ 736 /* Remove pending timer: */
737 if (unlikely(timeout)) 737 if (unlikely(timeout))
@@ -758,7 +758,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
758{ 758{
759 int ret = 0; 759 int ret = 0;
760 760
761 spin_lock(&lock->wait_lock); 761 raw_spin_lock(&lock->wait_lock);
762 762
763 if (likely(rt_mutex_owner(lock) != current)) { 763 if (likely(rt_mutex_owner(lock) != current)) {
764 764
@@ -770,7 +770,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
770 fixup_rt_mutex_waiters(lock); 770 fixup_rt_mutex_waiters(lock);
771 } 771 }
772 772
773 spin_unlock(&lock->wait_lock); 773 raw_spin_unlock(&lock->wait_lock);
774 774
775 return ret; 775 return ret;
776} 776}
@@ -781,7 +781,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
781static void __sched 781static void __sched
782rt_mutex_slowunlock(struct rt_mutex *lock) 782rt_mutex_slowunlock(struct rt_mutex *lock)
783{ 783{
784 spin_lock(&lock->wait_lock); 784 raw_spin_lock(&lock->wait_lock);
785 785
786 debug_rt_mutex_unlock(lock); 786 debug_rt_mutex_unlock(lock);
787 787
@@ -789,13 +789,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
789 789
790 if (!rt_mutex_has_waiters(lock)) { 790 if (!rt_mutex_has_waiters(lock)) {
791 lock->owner = NULL; 791 lock->owner = NULL;
792 spin_unlock(&lock->wait_lock); 792 raw_spin_unlock(&lock->wait_lock);
793 return; 793 return;
794 } 794 }
795 795
796 wakeup_next_waiter(lock); 796 wakeup_next_waiter(lock);
797 797
798 spin_unlock(&lock->wait_lock); 798 raw_spin_unlock(&lock->wait_lock);
799 799
800 /* Undo pi boosting if necessary: */ 800 /* Undo pi boosting if necessary: */
801 rt_mutex_adjust_prio(current); 801 rt_mutex_adjust_prio(current);
@@ -970,8 +970,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
970void __rt_mutex_init(struct rt_mutex *lock, const char *name) 970void __rt_mutex_init(struct rt_mutex *lock, const char *name)
971{ 971{
972 lock->owner = NULL; 972 lock->owner = NULL;
973 spin_lock_init(&lock->wait_lock); 973 raw_spin_lock_init(&lock->wait_lock);
974 plist_head_init(&lock->wait_list, &lock->wait_lock); 974 plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
975 975
976 debug_rt_mutex_init(lock, name); 976 debug_rt_mutex_init(lock, name);
977} 977}
@@ -1032,7 +1032,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1032{ 1032{
1033 int ret; 1033 int ret;
1034 1034
1035 spin_lock(&lock->wait_lock); 1035 raw_spin_lock(&lock->wait_lock);
1036 1036
1037 mark_rt_mutex_waiters(lock); 1037 mark_rt_mutex_waiters(lock);
1038 1038
@@ -1040,7 +1040,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1040 /* We got the lock for task. */ 1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock); 1041 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0); 1042 rt_mutex_set_owner(lock, task, 0);
1043 spin_unlock(&lock->wait_lock); 1043 raw_spin_unlock(&lock->wait_lock);
1044 rt_mutex_deadlock_account_lock(lock, task); 1044 rt_mutex_deadlock_account_lock(lock, task);
1045 return 1; 1045 return 1;
1046 } 1046 }
@@ -1056,7 +1056,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 1056 */
1057 ret = 0; 1057 ret = 0;
1058 } 1058 }
1059 spin_unlock(&lock->wait_lock); 1059 raw_spin_unlock(&lock->wait_lock);
1060 1060
1061 debug_rt_mutex_print_deadlock(waiter); 1061 debug_rt_mutex_print_deadlock(waiter);
1062 1062
@@ -1106,7 +1106,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1106{ 1106{
1107 int ret; 1107 int ret;
1108 1108
1109 spin_lock(&lock->wait_lock); 1109 raw_spin_lock(&lock->wait_lock);
1110 1110
1111 set_current_state(TASK_INTERRUPTIBLE); 1111 set_current_state(TASK_INTERRUPTIBLE);
1112 1112
@@ -1124,7 +1124,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1124 */ 1124 */
1125 fixup_rt_mutex_waiters(lock); 1125 fixup_rt_mutex_waiters(lock);
1126 1126
1127 spin_unlock(&lock->wait_lock); 1127 raw_spin_unlock(&lock->wait_lock);
1128 1128
1129 /* 1129 /*
1130 * Readjust priority, when we did not get the lock. We might have been 1130 * Readjust priority, when we did not get the lock. We might have been
diff --git a/kernel/sched.c b/kernel/sched.c
index e7f2cfa6a257..720df108a2d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -26,6 +26,8 @@
26 * Thomas Gleixner, Mike Kravetz 26 * Thomas Gleixner, Mike Kravetz
27 */ 27 */
28 28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
29#include <linux/mm.h> 31#include <linux/mm.h>
30#include <linux/module.h> 32#include <linux/module.h>
31#include <linux/nmi.h> 33#include <linux/nmi.h>
@@ -141,7 +143,7 @@ struct rt_prio_array {
141 143
142struct rt_bandwidth { 144struct rt_bandwidth {
143 /* nests inside the rq lock: */ 145 /* nests inside the rq lock: */
144 spinlock_t rt_runtime_lock; 146 raw_spinlock_t rt_runtime_lock;
145 ktime_t rt_period; 147 ktime_t rt_period;
146 u64 rt_runtime; 148 u64 rt_runtime;
147 struct hrtimer rt_period_timer; 149 struct hrtimer rt_period_timer;
@@ -178,7 +180,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
178 rt_b->rt_period = ns_to_ktime(period); 180 rt_b->rt_period = ns_to_ktime(period);
179 rt_b->rt_runtime = runtime; 181 rt_b->rt_runtime = runtime;
180 182
181 spin_lock_init(&rt_b->rt_runtime_lock); 183 raw_spin_lock_init(&rt_b->rt_runtime_lock);
182 184
183 hrtimer_init(&rt_b->rt_period_timer, 185 hrtimer_init(&rt_b->rt_period_timer,
184 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 186 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -200,7 +202,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
200 if (hrtimer_active(&rt_b->rt_period_timer)) 202 if (hrtimer_active(&rt_b->rt_period_timer))
201 return; 203 return;
202 204
203 spin_lock(&rt_b->rt_runtime_lock); 205 raw_spin_lock(&rt_b->rt_runtime_lock);
204 for (;;) { 206 for (;;) {
205 unsigned long delta; 207 unsigned long delta;
206 ktime_t soft, hard; 208 ktime_t soft, hard;
@@ -217,7 +219,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
217 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, 219 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
218 HRTIMER_MODE_ABS_PINNED, 0); 220 HRTIMER_MODE_ABS_PINNED, 0);
219 } 221 }
220 spin_unlock(&rt_b->rt_runtime_lock); 222 raw_spin_unlock(&rt_b->rt_runtime_lock);
221} 223}
222 224
223#ifdef CONFIG_RT_GROUP_SCHED 225#ifdef CONFIG_RT_GROUP_SCHED
@@ -298,7 +300,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
298 300
299#ifdef CONFIG_RT_GROUP_SCHED 301#ifdef CONFIG_RT_GROUP_SCHED
300static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); 302static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
301static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); 303static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
302#endif /* CONFIG_RT_GROUP_SCHED */ 304#endif /* CONFIG_RT_GROUP_SCHED */
303#else /* !CONFIG_USER_SCHED */ 305#else /* !CONFIG_USER_SCHED */
304#define root_task_group init_task_group 306#define root_task_group init_task_group
@@ -470,7 +472,7 @@ struct rt_rq {
470 u64 rt_time; 472 u64 rt_time;
471 u64 rt_runtime; 473 u64 rt_runtime;
472 /* Nests inside the rq lock: */ 474 /* Nests inside the rq lock: */
473 spinlock_t rt_runtime_lock; 475 raw_spinlock_t rt_runtime_lock;
474 476
475#ifdef CONFIG_RT_GROUP_SCHED 477#ifdef CONFIG_RT_GROUP_SCHED
476 unsigned long rt_nr_boosted; 478 unsigned long rt_nr_boosted;
@@ -525,7 +527,7 @@ static struct root_domain def_root_domain;
525 */ 527 */
526struct rq { 528struct rq {
527 /* runqueue lock: */ 529 /* runqueue lock: */
528 spinlock_t lock; 530 raw_spinlock_t lock;
529 531
530 /* 532 /*
531 * nr_running and cpu_load should be in the same cacheline because 533 * nr_running and cpu_load should be in the same cacheline because
@@ -685,7 +687,7 @@ inline void update_rq_clock(struct rq *rq)
685 */ 687 */
686int runqueue_is_locked(int cpu) 688int runqueue_is_locked(int cpu)
687{ 689{
688 return spin_is_locked(&cpu_rq(cpu)->lock); 690 return raw_spin_is_locked(&cpu_rq(cpu)->lock);
689} 691}
690 692
691/* 693/*
@@ -814,6 +816,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
814 * default: 0.25ms 816 * default: 0.25ms
815 */ 817 */
816unsigned int sysctl_sched_shares_ratelimit = 250000; 818unsigned int sysctl_sched_shares_ratelimit = 250000;
819unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
817 820
818/* 821/*
819 * Inject some fuzzyness into changing the per-cpu group shares 822 * Inject some fuzzyness into changing the per-cpu group shares
@@ -892,7 +895,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
892 */ 895 */
893 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); 896 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
894 897
895 spin_unlock_irq(&rq->lock); 898 raw_spin_unlock_irq(&rq->lock);
896} 899}
897 900
898#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 901#else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -916,9 +919,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
916 next->oncpu = 1; 919 next->oncpu = 1;
917#endif 920#endif
918#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 921#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
919 spin_unlock_irq(&rq->lock); 922 raw_spin_unlock_irq(&rq->lock);
920#else 923#else
921 spin_unlock(&rq->lock); 924 raw_spin_unlock(&rq->lock);
922#endif 925#endif
923} 926}
924 927
@@ -948,10 +951,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
948{ 951{
949 for (;;) { 952 for (;;) {
950 struct rq *rq = task_rq(p); 953 struct rq *rq = task_rq(p);
951 spin_lock(&rq->lock); 954 raw_spin_lock(&rq->lock);
952 if (likely(rq == task_rq(p))) 955 if (likely(rq == task_rq(p)))
953 return rq; 956 return rq;
954 spin_unlock(&rq->lock); 957 raw_spin_unlock(&rq->lock);
955 } 958 }
956} 959}
957 960
@@ -968,10 +971,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
968 for (;;) { 971 for (;;) {
969 local_irq_save(*flags); 972 local_irq_save(*flags);
970 rq = task_rq(p); 973 rq = task_rq(p);
971 spin_lock(&rq->lock); 974 raw_spin_lock(&rq->lock);
972 if (likely(rq == task_rq(p))) 975 if (likely(rq == task_rq(p)))
973 return rq; 976 return rq;
974 spin_unlock_irqrestore(&rq->lock, *flags); 977 raw_spin_unlock_irqrestore(&rq->lock, *flags);
975 } 978 }
976} 979}
977 980
@@ -980,19 +983,19 @@ void task_rq_unlock_wait(struct task_struct *p)
980 struct rq *rq = task_rq(p); 983 struct rq *rq = task_rq(p);
981 984
982 smp_mb(); /* spin-unlock-wait is not a full memory barrier */ 985 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
983 spin_unlock_wait(&rq->lock); 986 raw_spin_unlock_wait(&rq->lock);
984} 987}
985 988
986static void __task_rq_unlock(struct rq *rq) 989static void __task_rq_unlock(struct rq *rq)
987 __releases(rq->lock) 990 __releases(rq->lock)
988{ 991{
989 spin_unlock(&rq->lock); 992 raw_spin_unlock(&rq->lock);
990} 993}
991 994
992static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) 995static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
993 __releases(rq->lock) 996 __releases(rq->lock)
994{ 997{
995 spin_unlock_irqrestore(&rq->lock, *flags); 998 raw_spin_unlock_irqrestore(&rq->lock, *flags);
996} 999}
997 1000
998/* 1001/*
@@ -1005,7 +1008,7 @@ static struct rq *this_rq_lock(void)
1005 1008
1006 local_irq_disable(); 1009 local_irq_disable();
1007 rq = this_rq(); 1010 rq = this_rq();
1008 spin_lock(&rq->lock); 1011 raw_spin_lock(&rq->lock);
1009 1012
1010 return rq; 1013 return rq;
1011} 1014}
@@ -1052,10 +1055,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1052 1055
1053 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); 1056 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1054 1057
1055 spin_lock(&rq->lock); 1058 raw_spin_lock(&rq->lock);
1056 update_rq_clock(rq); 1059 update_rq_clock(rq);
1057 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1060 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1058 spin_unlock(&rq->lock); 1061 raw_spin_unlock(&rq->lock);
1059 1062
1060 return HRTIMER_NORESTART; 1063 return HRTIMER_NORESTART;
1061} 1064}
@@ -1068,10 +1071,10 @@ static void __hrtick_start(void *arg)
1068{ 1071{
1069 struct rq *rq = arg; 1072 struct rq *rq = arg;
1070 1073
1071 spin_lock(&rq->lock); 1074 raw_spin_lock(&rq->lock);
1072 hrtimer_restart(&rq->hrtick_timer); 1075 hrtimer_restart(&rq->hrtick_timer);
1073 rq->hrtick_csd_pending = 0; 1076 rq->hrtick_csd_pending = 0;
1074 spin_unlock(&rq->lock); 1077 raw_spin_unlock(&rq->lock);
1075} 1078}
1076 1079
1077/* 1080/*
@@ -1178,7 +1181,7 @@ static void resched_task(struct task_struct *p)
1178{ 1181{
1179 int cpu; 1182 int cpu;
1180 1183
1181 assert_spin_locked(&task_rq(p)->lock); 1184 assert_raw_spin_locked(&task_rq(p)->lock);
1182 1185
1183 if (test_tsk_need_resched(p)) 1186 if (test_tsk_need_resched(p))
1184 return; 1187 return;
@@ -1200,10 +1203,10 @@ static void resched_cpu(int cpu)
1200 struct rq *rq = cpu_rq(cpu); 1203 struct rq *rq = cpu_rq(cpu);
1201 unsigned long flags; 1204 unsigned long flags;
1202 1205
1203 if (!spin_trylock_irqsave(&rq->lock, flags)) 1206 if (!raw_spin_trylock_irqsave(&rq->lock, flags))
1204 return; 1207 return;
1205 resched_task(cpu_curr(cpu)); 1208 resched_task(cpu_curr(cpu));
1206 spin_unlock_irqrestore(&rq->lock, flags); 1209 raw_spin_unlock_irqrestore(&rq->lock, flags);
1207} 1210}
1208 1211
1209#ifdef CONFIG_NO_HZ 1212#ifdef CONFIG_NO_HZ
@@ -1272,7 +1275,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1272#else /* !CONFIG_SMP */ 1275#else /* !CONFIG_SMP */
1273static void resched_task(struct task_struct *p) 1276static void resched_task(struct task_struct *p)
1274{ 1277{
1275 assert_spin_locked(&task_rq(p)->lock); 1278 assert_raw_spin_locked(&task_rq(p)->lock);
1276 set_tsk_need_resched(p); 1279 set_tsk_need_resched(p);
1277} 1280}
1278 1281
@@ -1599,11 +1602,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
1599 struct rq *rq = cpu_rq(cpu); 1602 struct rq *rq = cpu_rq(cpu);
1600 unsigned long flags; 1603 unsigned long flags;
1601 1604
1602 spin_lock_irqsave(&rq->lock, flags); 1605 raw_spin_lock_irqsave(&rq->lock, flags);
1603 tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; 1606 tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
1604 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1607 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1605 __set_se_shares(tg->se[cpu], shares); 1608 __set_se_shares(tg->se[cpu], shares);
1606 spin_unlock_irqrestore(&rq->lock, flags); 1609 raw_spin_unlock_irqrestore(&rq->lock, flags);
1607 } 1610 }
1608} 1611}
1609 1612
@@ -1614,7 +1617,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
1614 */ 1617 */
1615static int tg_shares_up(struct task_group *tg, void *data) 1618static int tg_shares_up(struct task_group *tg, void *data)
1616{ 1619{
1617 unsigned long weight, rq_weight = 0, shares = 0; 1620 unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
1618 unsigned long *usd_rq_weight; 1621 unsigned long *usd_rq_weight;
1619 struct sched_domain *sd = data; 1622 struct sched_domain *sd = data;
1620 unsigned long flags; 1623 unsigned long flags;
@@ -1630,6 +1633,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1630 weight = tg->cfs_rq[i]->load.weight; 1633 weight = tg->cfs_rq[i]->load.weight;
1631 usd_rq_weight[i] = weight; 1634 usd_rq_weight[i] = weight;
1632 1635
1636 rq_weight += weight;
1633 /* 1637 /*
1634 * If there are currently no tasks on the cpu pretend there 1638 * If there are currently no tasks on the cpu pretend there
1635 * is one of average load so that when a new task gets to 1639 * is one of average load so that when a new task gets to
@@ -1638,10 +1642,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
1638 if (!weight) 1642 if (!weight)
1639 weight = NICE_0_LOAD; 1643 weight = NICE_0_LOAD;
1640 1644
1641 rq_weight += weight; 1645 sum_weight += weight;
1642 shares += tg->cfs_rq[i]->shares; 1646 shares += tg->cfs_rq[i]->shares;
1643 } 1647 }
1644 1648
1649 if (!rq_weight)
1650 rq_weight = sum_weight;
1651
1645 if ((!shares && rq_weight) || shares > tg->shares) 1652 if ((!shares && rq_weight) || shares > tg->shares)
1646 shares = tg->shares; 1653 shares = tg->shares;
1647 1654
@@ -1701,9 +1708,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1701 if (root_task_group_empty()) 1708 if (root_task_group_empty())
1702 return; 1709 return;
1703 1710
1704 spin_unlock(&rq->lock); 1711 raw_spin_unlock(&rq->lock);
1705 update_shares(sd); 1712 update_shares(sd);
1706 spin_lock(&rq->lock); 1713 raw_spin_lock(&rq->lock);
1707} 1714}
1708 1715
1709static void update_h_load(long cpu) 1716static void update_h_load(long cpu)
@@ -1743,7 +1750,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1743 __acquires(busiest->lock) 1750 __acquires(busiest->lock)
1744 __acquires(this_rq->lock) 1751 __acquires(this_rq->lock)
1745{ 1752{
1746 spin_unlock(&this_rq->lock); 1753 raw_spin_unlock(&this_rq->lock);
1747 double_rq_lock(this_rq, busiest); 1754 double_rq_lock(this_rq, busiest);
1748 1755
1749 return 1; 1756 return 1;
@@ -1764,14 +1771,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1764{ 1771{
1765 int ret = 0; 1772 int ret = 0;
1766 1773
1767 if (unlikely(!spin_trylock(&busiest->lock))) { 1774 if (unlikely(!raw_spin_trylock(&busiest->lock))) {
1768 if (busiest < this_rq) { 1775 if (busiest < this_rq) {
1769 spin_unlock(&this_rq->lock); 1776 raw_spin_unlock(&this_rq->lock);
1770 spin_lock(&busiest->lock); 1777 raw_spin_lock(&busiest->lock);
1771 spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); 1778 raw_spin_lock_nested(&this_rq->lock,
1779 SINGLE_DEPTH_NESTING);
1772 ret = 1; 1780 ret = 1;
1773 } else 1781 } else
1774 spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); 1782 raw_spin_lock_nested(&busiest->lock,
1783 SINGLE_DEPTH_NESTING);
1775 } 1784 }
1776 return ret; 1785 return ret;
1777} 1786}
@@ -1785,7 +1794,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1785{ 1794{
1786 if (unlikely(!irqs_disabled())) { 1795 if (unlikely(!irqs_disabled())) {
1787 /* printk() doesn't work good under rq->lock */ 1796 /* printk() doesn't work good under rq->lock */
1788 spin_unlock(&this_rq->lock); 1797 raw_spin_unlock(&this_rq->lock);
1789 BUG_ON(1); 1798 BUG_ON(1);
1790 } 1799 }
1791 1800
@@ -1795,7 +1804,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1795static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 1804static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1796 __releases(busiest->lock) 1805 __releases(busiest->lock)
1797{ 1806{
1798 spin_unlock(&busiest->lock); 1807 raw_spin_unlock(&busiest->lock);
1799 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); 1808 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
1800} 1809}
1801#endif 1810#endif
@@ -1810,6 +1819,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1810#endif 1819#endif
1811 1820
1812static void calc_load_account_active(struct rq *this_rq); 1821static void calc_load_account_active(struct rq *this_rq);
1822static void update_sysctl(void);
1823static int get_update_sysctl_factor(void);
1824
1825static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1826{
1827 set_task_rq(p, cpu);
1828#ifdef CONFIG_SMP
1829 /*
1830 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1831 * successfuly executed on another CPU. We must ensure that updates of
1832 * per-task data have been completed by this moment.
1833 */
1834 smp_wmb();
1835 task_thread_info(p)->cpu = cpu;
1836#endif
1837}
1813 1838
1814#include "sched_stats.h" 1839#include "sched_stats.h"
1815#include "sched_idletask.c" 1840#include "sched_idletask.c"
@@ -1967,20 +1992,6 @@ inline int task_curr(const struct task_struct *p)
1967 return cpu_curr(task_cpu(p)) == p; 1992 return cpu_curr(task_cpu(p)) == p;
1968} 1993}
1969 1994
1970static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1971{
1972 set_task_rq(p, cpu);
1973#ifdef CONFIG_SMP
1974 /*
1975 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1976 * successfuly executed on another CPU. We must ensure that updates of
1977 * per-task data have been completed by this moment.
1978 */
1979 smp_wmb();
1980 task_thread_info(p)->cpu = cpu;
1981#endif
1982}
1983
1984static inline void check_class_changed(struct rq *rq, struct task_struct *p, 1995static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1985 const struct sched_class *prev_class, 1996 const struct sched_class *prev_class,
1986 int oldprio, int running) 1997 int oldprio, int running)
@@ -1993,39 +2004,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1993 p->sched_class->prio_changed(rq, p, oldprio, running); 2004 p->sched_class->prio_changed(rq, p, oldprio, running);
1994} 2005}
1995 2006
1996/**
1997 * kthread_bind - bind a just-created kthread to a cpu.
1998 * @p: thread created by kthread_create().
1999 * @cpu: cpu (might not be online, must be possible) for @k to run on.
2000 *
2001 * Description: This function is equivalent to set_cpus_allowed(),
2002 * except that @cpu doesn't need to be online, and the thread must be
2003 * stopped (i.e., just returned from kthread_create()).
2004 *
2005 * Function lives here instead of kthread.c because it messes with
2006 * scheduler internals which require locking.
2007 */
2008void kthread_bind(struct task_struct *p, unsigned int cpu)
2009{
2010 struct rq *rq = cpu_rq(cpu);
2011 unsigned long flags;
2012
2013 /* Must have done schedule() in kthread() before we set_task_cpu */
2014 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
2015 WARN_ON(1);
2016 return;
2017 }
2018
2019 spin_lock_irqsave(&rq->lock, flags);
2020 update_rq_clock(rq);
2021 set_task_cpu(p, cpu);
2022 p->cpus_allowed = cpumask_of_cpu(cpu);
2023 p->rt.nr_cpus_allowed = 1;
2024 p->flags |= PF_THREAD_BOUND;
2025 spin_unlock_irqrestore(&rq->lock, flags);
2026}
2027EXPORT_SYMBOL(kthread_bind);
2028
2029#ifdef CONFIG_SMP 2007#ifdef CONFIG_SMP
2030/* 2008/*
2031 * Is this task likely cache-hot: 2009 * Is this task likely cache-hot:
@@ -2035,6 +2013,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2035{ 2013{
2036 s64 delta; 2014 s64 delta;
2037 2015
2016 if (p->sched_class != &fair_sched_class)
2017 return 0;
2018
2038 /* 2019 /*
2039 * Buddy candidates are cache hot: 2020 * Buddy candidates are cache hot:
2040 */ 2021 */
@@ -2043,9 +2024,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2043 &p->se == cfs_rq_of(&p->se)->last)) 2024 &p->se == cfs_rq_of(&p->se)->last))
2044 return 1; 2025 return 1;
2045 2026
2046 if (p->sched_class != &fair_sched_class)
2047 return 0;
2048
2049 if (sysctl_sched_migration_cost == -1) 2027 if (sysctl_sched_migration_cost == -1)
2050 return 1; 2028 return 1;
2051 if (sysctl_sched_migration_cost == 0) 2029 if (sysctl_sched_migration_cost == 0)
@@ -2056,38 +2034,24 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2056 return delta < (s64)sysctl_sched_migration_cost; 2034 return delta < (s64)sysctl_sched_migration_cost;
2057} 2035}
2058 2036
2059
2060void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2037void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2061{ 2038{
2062 int old_cpu = task_cpu(p); 2039#ifdef CONFIG_SCHED_DEBUG
2063 struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); 2040 /*
2064 struct cfs_rq *old_cfsrq = task_cfs_rq(p), 2041 * We should never call set_task_cpu() on a blocked task,
2065 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); 2042 * ttwu() will sort out the placement.
2066 u64 clock_offset; 2043 */
2067 2044 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2068 clock_offset = old_rq->clock - new_rq->clock; 2045 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2046#endif
2069 2047
2070 trace_sched_migrate_task(p, new_cpu); 2048 trace_sched_migrate_task(p, new_cpu);
2071 2049
2072#ifdef CONFIG_SCHEDSTATS 2050 if (task_cpu(p) == new_cpu)
2073 if (p->se.wait_start) 2051 return;
2074 p->se.wait_start -= clock_offset; 2052
2075 if (p->se.sleep_start) 2053 p->se.nr_migrations++;
2076 p->se.sleep_start -= clock_offset; 2054 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
2077 if (p->se.block_start)
2078 p->se.block_start -= clock_offset;
2079#endif
2080 if (old_cpu != new_cpu) {
2081 p->se.nr_migrations++;
2082#ifdef CONFIG_SCHEDSTATS
2083 if (task_hot(p, old_rq->clock, NULL))
2084 schedstat_inc(p, se.nr_forced2_migrations);
2085#endif
2086 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
2087 1, 1, NULL, 0);
2088 }
2089 p->se.vruntime -= old_cfsrq->min_vruntime -
2090 new_cfsrq->min_vruntime;
2091 2055
2092 __set_task_cpu(p, new_cpu); 2056 __set_task_cpu(p, new_cpu);
2093} 2057}
@@ -2112,13 +2076,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2112 2076
2113 /* 2077 /*
2114 * If the task is not on a runqueue (and not running), then 2078 * If the task is not on a runqueue (and not running), then
2115 * it is sufficient to simply update the task's cpu field. 2079 * the next wake-up will properly place the task.
2116 */ 2080 */
2117 if (!p->se.on_rq && !task_running(rq, p)) { 2081 if (!p->se.on_rq && !task_running(rq, p))
2118 update_rq_clock(rq);
2119 set_task_cpu(p, dest_cpu);
2120 return 0; 2082 return 0;
2121 }
2122 2083
2123 init_completion(&req->done); 2084 init_completion(&req->done);
2124 req->task = p; 2085 req->task = p;
@@ -2323,6 +2284,77 @@ void task_oncpu_function_call(struct task_struct *p,
2323 preempt_enable(); 2284 preempt_enable();
2324} 2285}
2325 2286
2287#ifdef CONFIG_SMP
2288static int select_fallback_rq(int cpu, struct task_struct *p)
2289{
2290 int dest_cpu;
2291 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
2292
2293 /* Look for allowed, online CPU in same node. */
2294 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
2295 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
2296 return dest_cpu;
2297
2298 /* Any allowed, online CPU? */
2299 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
2300 if (dest_cpu < nr_cpu_ids)
2301 return dest_cpu;
2302
2303 /* No more Mr. Nice Guy. */
2304 if (dest_cpu >= nr_cpu_ids) {
2305 rcu_read_lock();
2306 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
2307 rcu_read_unlock();
2308 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
2309
2310 /*
2311 * Don't tell them about moving exiting tasks or
2312 * kernel threads (both mm NULL), since they never
2313 * leave kernel.
2314 */
2315 if (p->mm && printk_ratelimit()) {
2316 printk(KERN_INFO "process %d (%s) no "
2317 "longer affine to cpu%d\n",
2318 task_pid_nr(p), p->comm, cpu);
2319 }
2320 }
2321
2322 return dest_cpu;
2323}
2324
2325/*
2326 * Called from:
2327 *
2328 * - fork, @p is stable because it isn't on the tasklist yet
2329 *
2330 * - exec, @p is unstable, retry loop
2331 *
2332 * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
2333 * we should be good.
2334 */
2335static inline
2336int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2337{
2338 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2339
2340 /*
2341 * In order not to call set_task_cpu() on a blocking task we need
2342 * to rely on ttwu() to place the task on a valid ->cpus_allowed
2343 * cpu.
2344 *
2345 * Since this is common to all placement strategies, this lives here.
2346 *
2347 * [ this allows ->select_task() to simply return task_cpu(p) and
2348 * not worry about this generic constraint ]
2349 */
2350 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
2351 !cpu_active(cpu)))
2352 cpu = select_fallback_rq(task_cpu(p), p);
2353
2354 return cpu;
2355}
2356#endif
2357
2326/*** 2358/***
2327 * try_to_wake_up - wake up a thread 2359 * try_to_wake_up - wake up a thread
2328 * @p: the to-be-woken-up thread 2360 * @p: the to-be-woken-up thread
@@ -2374,17 +2406,18 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2374 if (task_contributes_to_load(p)) 2406 if (task_contributes_to_load(p))
2375 rq->nr_uninterruptible--; 2407 rq->nr_uninterruptible--;
2376 p->state = TASK_WAKING; 2408 p->state = TASK_WAKING;
2377 task_rq_unlock(rq, &flags);
2378 2409
2379 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2410 if (p->sched_class->task_waking)
2380 if (cpu != orig_cpu) { 2411 p->sched_class->task_waking(rq, p);
2381 local_irq_save(flags); 2412
2382 rq = cpu_rq(cpu); 2413 __task_rq_unlock(rq);
2383 update_rq_clock(rq); 2414
2415 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2416 if (cpu != orig_cpu)
2384 set_task_cpu(p, cpu); 2417 set_task_cpu(p, cpu);
2385 local_irq_restore(flags); 2418
2386 } 2419 rq = __task_rq_lock(p);
2387 rq = task_rq_lock(p, &flags); 2420 update_rq_clock(rq);
2388 2421
2389 WARN_ON(p->state != TASK_WAKING); 2422 WARN_ON(p->state != TASK_WAKING);
2390 cpu = task_cpu(p); 2423 cpu = task_cpu(p);
@@ -2440,8 +2473,8 @@ out_running:
2440 2473
2441 p->state = TASK_RUNNING; 2474 p->state = TASK_RUNNING;
2442#ifdef CONFIG_SMP 2475#ifdef CONFIG_SMP
2443 if (p->sched_class->task_wake_up) 2476 if (p->sched_class->task_woken)
2444 p->sched_class->task_wake_up(rq, p); 2477 p->sched_class->task_woken(rq, p);
2445 2478
2446 if (unlikely(rq->idle_stamp)) { 2479 if (unlikely(rq->idle_stamp)) {
2447 u64 delta = rq->clock - rq->idle_stamp; 2480 u64 delta = rq->clock - rq->idle_stamp;
@@ -2499,7 +2532,6 @@ static void __sched_fork(struct task_struct *p)
2499 p->se.avg_overlap = 0; 2532 p->se.avg_overlap = 0;
2500 p->se.start_runtime = 0; 2533 p->se.start_runtime = 0;
2501 p->se.avg_wakeup = sysctl_sched_wakeup_granularity; 2534 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2502 p->se.avg_running = 0;
2503 2535
2504#ifdef CONFIG_SCHEDSTATS 2536#ifdef CONFIG_SCHEDSTATS
2505 p->se.wait_start = 0; 2537 p->se.wait_start = 0;
@@ -2521,7 +2553,6 @@ static void __sched_fork(struct task_struct *p)
2521 p->se.nr_failed_migrations_running = 0; 2553 p->se.nr_failed_migrations_running = 0;
2522 p->se.nr_failed_migrations_hot = 0; 2554 p->se.nr_failed_migrations_hot = 0;
2523 p->se.nr_forced_migrations = 0; 2555 p->se.nr_forced_migrations = 0;
2524 p->se.nr_forced2_migrations = 0;
2525 2556
2526 p->se.nr_wakeups = 0; 2557 p->se.nr_wakeups = 0;
2527 p->se.nr_wakeups_sync = 0; 2558 p->se.nr_wakeups_sync = 0;
@@ -2542,14 +2573,6 @@ static void __sched_fork(struct task_struct *p)
2542#ifdef CONFIG_PREEMPT_NOTIFIERS 2573#ifdef CONFIG_PREEMPT_NOTIFIERS
2543 INIT_HLIST_HEAD(&p->preempt_notifiers); 2574 INIT_HLIST_HEAD(&p->preempt_notifiers);
2544#endif 2575#endif
2545
2546 /*
2547 * We mark the process as running here, but have not actually
2548 * inserted it onto the runqueue yet. This guarantees that
2549 * nobody will actually run it, and a signal or other external
2550 * event cannot wake it up and insert it on the runqueue either.
2551 */
2552 p->state = TASK_RUNNING;
2553} 2576}
2554 2577
2555/* 2578/*
@@ -2558,9 +2581,14 @@ static void __sched_fork(struct task_struct *p)
2558void sched_fork(struct task_struct *p, int clone_flags) 2581void sched_fork(struct task_struct *p, int clone_flags)
2559{ 2582{
2560 int cpu = get_cpu(); 2583 int cpu = get_cpu();
2561 unsigned long flags;
2562 2584
2563 __sched_fork(p); 2585 __sched_fork(p);
2586 /*
2587 * We mark the process as waking here. This guarantees that
2588 * nobody will actually run it, and a signal or other external
2589 * event cannot wake it up and insert it on the runqueue either.
2590 */
2591 p->state = TASK_WAKING;
2564 2592
2565 /* 2593 /*
2566 * Revert to default priority/policy on fork if requested. 2594 * Revert to default priority/policy on fork if requested.
@@ -2592,13 +2620,13 @@ void sched_fork(struct task_struct *p, int clone_flags)
2592 if (!rt_prio(p->prio)) 2620 if (!rt_prio(p->prio))
2593 p->sched_class = &fair_sched_class; 2621 p->sched_class = &fair_sched_class;
2594 2622
2623 if (p->sched_class->task_fork)
2624 p->sched_class->task_fork(p);
2625
2595#ifdef CONFIG_SMP 2626#ifdef CONFIG_SMP
2596 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); 2627 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2597#endif 2628#endif
2598 local_irq_save(flags);
2599 update_rq_clock(cpu_rq(cpu));
2600 set_task_cpu(p, cpu); 2629 set_task_cpu(p, cpu);
2601 local_irq_restore(flags);
2602 2630
2603#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2631#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2604 if (likely(sched_info_on())) 2632 if (likely(sched_info_on()))
@@ -2629,24 +2657,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2629 struct rq *rq; 2657 struct rq *rq;
2630 2658
2631 rq = task_rq_lock(p, &flags); 2659 rq = task_rq_lock(p, &flags);
2632 BUG_ON(p->state != TASK_RUNNING); 2660 BUG_ON(p->state != TASK_WAKING);
2661 p->state = TASK_RUNNING;
2633 update_rq_clock(rq); 2662 update_rq_clock(rq);
2634 2663 activate_task(rq, p, 0);
2635 if (!p->sched_class->task_new || !current->se.on_rq) {
2636 activate_task(rq, p, 0);
2637 } else {
2638 /*
2639 * Let the scheduling class do new task startup
2640 * management (if any):
2641 */
2642 p->sched_class->task_new(rq, p);
2643 inc_nr_running(rq);
2644 }
2645 trace_sched_wakeup_new(rq, p, 1); 2664 trace_sched_wakeup_new(rq, p, 1);
2646 check_preempt_curr(rq, p, WF_FORK); 2665 check_preempt_curr(rq, p, WF_FORK);
2647#ifdef CONFIG_SMP 2666#ifdef CONFIG_SMP
2648 if (p->sched_class->task_wake_up) 2667 if (p->sched_class->task_woken)
2649 p->sched_class->task_wake_up(rq, p); 2668 p->sched_class->task_woken(rq, p);
2650#endif 2669#endif
2651 task_rq_unlock(rq, &flags); 2670 task_rq_unlock(rq, &flags);
2652} 2671}
@@ -2798,10 +2817,10 @@ static inline void post_schedule(struct rq *rq)
2798 if (rq->post_schedule) { 2817 if (rq->post_schedule) {
2799 unsigned long flags; 2818 unsigned long flags;
2800 2819
2801 spin_lock_irqsave(&rq->lock, flags); 2820 raw_spin_lock_irqsave(&rq->lock, flags);
2802 if (rq->curr->sched_class->post_schedule) 2821 if (rq->curr->sched_class->post_schedule)
2803 rq->curr->sched_class->post_schedule(rq); 2822 rq->curr->sched_class->post_schedule(rq);
2804 spin_unlock_irqrestore(&rq->lock, flags); 2823 raw_spin_unlock_irqrestore(&rq->lock, flags);
2805 2824
2806 rq->post_schedule = 0; 2825 rq->post_schedule = 0;
2807 } 2826 }
@@ -3083,15 +3102,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
3083{ 3102{
3084 BUG_ON(!irqs_disabled()); 3103 BUG_ON(!irqs_disabled());
3085 if (rq1 == rq2) { 3104 if (rq1 == rq2) {
3086 spin_lock(&rq1->lock); 3105 raw_spin_lock(&rq1->lock);
3087 __acquire(rq2->lock); /* Fake it out ;) */ 3106 __acquire(rq2->lock); /* Fake it out ;) */
3088 } else { 3107 } else {
3089 if (rq1 < rq2) { 3108 if (rq1 < rq2) {
3090 spin_lock(&rq1->lock); 3109 raw_spin_lock(&rq1->lock);
3091 spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); 3110 raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
3092 } else { 3111 } else {
3093 spin_lock(&rq2->lock); 3112 raw_spin_lock(&rq2->lock);
3094 spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); 3113 raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
3095 } 3114 }
3096 } 3115 }
3097 update_rq_clock(rq1); 3116 update_rq_clock(rq1);
@@ -3108,29 +3127,44 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
3108 __releases(rq1->lock) 3127 __releases(rq1->lock)
3109 __releases(rq2->lock) 3128 __releases(rq2->lock)
3110{ 3129{
3111 spin_unlock(&rq1->lock); 3130 raw_spin_unlock(&rq1->lock);
3112 if (rq1 != rq2) 3131 if (rq1 != rq2)
3113 spin_unlock(&rq2->lock); 3132 raw_spin_unlock(&rq2->lock);
3114 else 3133 else
3115 __release(rq2->lock); 3134 __release(rq2->lock);
3116} 3135}
3117 3136
3118/* 3137/*
3119 * If dest_cpu is allowed for this process, migrate the task to it. 3138 * sched_exec - execve() is a valuable balancing opportunity, because at
3120 * This is accomplished by forcing the cpu_allowed mask to only 3139 * this point the task has the smallest effective memory and cache footprint.
3121 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
3122 * the cpu_allowed mask is restored.
3123 */ 3140 */
3124static void sched_migrate_task(struct task_struct *p, int dest_cpu) 3141void sched_exec(void)
3125{ 3142{
3143 struct task_struct *p = current;
3126 struct migration_req req; 3144 struct migration_req req;
3145 int dest_cpu, this_cpu;
3127 unsigned long flags; 3146 unsigned long flags;
3128 struct rq *rq; 3147 struct rq *rq;
3129 3148
3149again:
3150 this_cpu = get_cpu();
3151 dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
3152 if (dest_cpu == this_cpu) {
3153 put_cpu();
3154 return;
3155 }
3156
3130 rq = task_rq_lock(p, &flags); 3157 rq = task_rq_lock(p, &flags);
3158 put_cpu();
3159
3160 /*
3161 * select_task_rq() can race against ->cpus_allowed
3162 */
3131 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) 3163 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
3132 || unlikely(!cpu_active(dest_cpu))) 3164 || unlikely(!cpu_active(dest_cpu))) {
3133 goto out; 3165 task_rq_unlock(rq, &flags);
3166 goto again;
3167 }
3134 3168
3135 /* force the process onto the specified CPU */ 3169 /* force the process onto the specified CPU */
3136 if (migrate_task(p, dest_cpu, &req)) { 3170 if (migrate_task(p, dest_cpu, &req)) {
@@ -3145,24 +3179,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
3145 3179
3146 return; 3180 return;
3147 } 3181 }
3148out:
3149 task_rq_unlock(rq, &flags); 3182 task_rq_unlock(rq, &flags);
3150} 3183}
3151 3184
3152/* 3185/*
3153 * sched_exec - execve() is a valuable balancing opportunity, because at
3154 * this point the task has the smallest effective memory and cache footprint.
3155 */
3156void sched_exec(void)
3157{
3158 int new_cpu, this_cpu = get_cpu();
3159 new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
3160 put_cpu();
3161 if (new_cpu != this_cpu)
3162 sched_migrate_task(current, new_cpu);
3163}
3164
3165/*
3166 * pull_task - move a task from a remote runqueue to the local runqueue. 3186 * pull_task - move a task from a remote runqueue to the local runqueue.
3167 * Both runqueues must be locked. 3187 * Both runqueues must be locked.
3168 */ 3188 */
@@ -3172,10 +3192,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
3172 deactivate_task(src_rq, p, 0); 3192 deactivate_task(src_rq, p, 0);
3173 set_task_cpu(p, this_cpu); 3193 set_task_cpu(p, this_cpu);
3174 activate_task(this_rq, p, 0); 3194 activate_task(this_rq, p, 0);
3175 /*
3176 * Note that idle threads have a prio of MAX_PRIO, for this test
3177 * to be always true for them.
3178 */
3179 check_preempt_curr(this_rq, p, 0); 3195 check_preempt_curr(this_rq, p, 0);
3180} 3196}
3181 3197
@@ -4134,7 +4150,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4134 unsigned long flags; 4150 unsigned long flags;
4135 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4151 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4136 4152
4137 cpumask_copy(cpus, cpu_online_mask); 4153 cpumask_copy(cpus, cpu_active_mask);
4138 4154
4139 /* 4155 /*
4140 * When power savings policy is enabled for the parent domain, idle 4156 * When power savings policy is enabled for the parent domain, idle
@@ -4207,14 +4223,15 @@ redo:
4207 4223
4208 if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { 4224 if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
4209 4225
4210 spin_lock_irqsave(&busiest->lock, flags); 4226 raw_spin_lock_irqsave(&busiest->lock, flags);
4211 4227
4212 /* don't kick the migration_thread, if the curr 4228 /* don't kick the migration_thread, if the curr
4213 * task on busiest cpu can't be moved to this_cpu 4229 * task on busiest cpu can't be moved to this_cpu
4214 */ 4230 */
4215 if (!cpumask_test_cpu(this_cpu, 4231 if (!cpumask_test_cpu(this_cpu,
4216 &busiest->curr->cpus_allowed)) { 4232 &busiest->curr->cpus_allowed)) {
4217 spin_unlock_irqrestore(&busiest->lock, flags); 4233 raw_spin_unlock_irqrestore(&busiest->lock,
4234 flags);
4218 all_pinned = 1; 4235 all_pinned = 1;
4219 goto out_one_pinned; 4236 goto out_one_pinned;
4220 } 4237 }
@@ -4224,7 +4241,7 @@ redo:
4224 busiest->push_cpu = this_cpu; 4241 busiest->push_cpu = this_cpu;
4225 active_balance = 1; 4242 active_balance = 1;
4226 } 4243 }
4227 spin_unlock_irqrestore(&busiest->lock, flags); 4244 raw_spin_unlock_irqrestore(&busiest->lock, flags);
4228 if (active_balance) 4245 if (active_balance)
4229 wake_up_process(busiest->migration_thread); 4246 wake_up_process(busiest->migration_thread);
4230 4247
@@ -4297,7 +4314,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
4297 int all_pinned = 0; 4314 int all_pinned = 0;
4298 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4315 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4299 4316
4300 cpumask_copy(cpus, cpu_online_mask); 4317 cpumask_copy(cpus, cpu_active_mask);
4301 4318
4302 /* 4319 /*
4303 * When power savings policy is enabled for the parent domain, idle 4320 * When power savings policy is enabled for the parent domain, idle
@@ -4406,10 +4423,10 @@ redo:
4406 /* 4423 /*
4407 * Should not call ttwu while holding a rq->lock 4424 * Should not call ttwu while holding a rq->lock
4408 */ 4425 */
4409 spin_unlock(&this_rq->lock); 4426 raw_spin_unlock(&this_rq->lock);
4410 if (active_balance) 4427 if (active_balance)
4411 wake_up_process(busiest->migration_thread); 4428 wake_up_process(busiest->migration_thread);
4412 spin_lock(&this_rq->lock); 4429 raw_spin_lock(&this_rq->lock);
4413 4430
4414 } else 4431 } else
4415 sd->nr_balance_failed = 0; 4432 sd->nr_balance_failed = 0;
@@ -4694,7 +4711,7 @@ int select_nohz_load_balancer(int stop_tick)
4694 cpumask_set_cpu(cpu, nohz.cpu_mask); 4711 cpumask_set_cpu(cpu, nohz.cpu_mask);
4695 4712
4696 /* time for ilb owner also to sleep */ 4713 /* time for ilb owner also to sleep */
4697 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 4714 if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
4698 if (atomic_read(&nohz.load_balancer) == cpu) 4715 if (atomic_read(&nohz.load_balancer) == cpu)
4699 atomic_set(&nohz.load_balancer, -1); 4716 atomic_set(&nohz.load_balancer, -1);
4700 return 0; 4717 return 0;
@@ -5278,11 +5295,11 @@ void scheduler_tick(void)
5278 5295
5279 sched_clock_tick(); 5296 sched_clock_tick();
5280 5297
5281 spin_lock(&rq->lock); 5298 raw_spin_lock(&rq->lock);
5282 update_rq_clock(rq); 5299 update_rq_clock(rq);
5283 update_cpu_load(rq); 5300 update_cpu_load(rq);
5284 curr->sched_class->task_tick(rq, curr, 0); 5301 curr->sched_class->task_tick(rq, curr, 0);
5285 spin_unlock(&rq->lock); 5302 raw_spin_unlock(&rq->lock);
5286 5303
5287 perf_event_task_tick(curr, cpu); 5304 perf_event_task_tick(curr, cpu);
5288 5305
@@ -5358,8 +5375,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
5358{ 5375{
5359 struct pt_regs *regs = get_irq_regs(); 5376 struct pt_regs *regs = get_irq_regs();
5360 5377
5361 printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", 5378 pr_err("BUG: scheduling while atomic: %s/%d/0x%08x\n",
5362 prev->comm, prev->pid, preempt_count()); 5379 prev->comm, prev->pid, preempt_count());
5363 5380
5364 debug_show_held_locks(prev); 5381 debug_show_held_locks(prev);
5365 print_modules(); 5382 print_modules();
@@ -5396,13 +5413,14 @@ static inline void schedule_debug(struct task_struct *prev)
5396#endif 5413#endif
5397} 5414}
5398 5415
5399static void put_prev_task(struct rq *rq, struct task_struct *p) 5416static void put_prev_task(struct rq *rq, struct task_struct *prev)
5400{ 5417{
5401 u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; 5418 if (prev->state == TASK_RUNNING) {
5419 u64 runtime = prev->se.sum_exec_runtime;
5402 5420
5403 update_avg(&p->se.avg_running, runtime); 5421 runtime -= prev->se.prev_sum_exec_runtime;
5422 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
5404 5423
5405 if (p->state == TASK_RUNNING) {
5406 /* 5424 /*
5407 * In order to avoid avg_overlap growing stale when we are 5425 * In order to avoid avg_overlap growing stale when we are
5408 * indeed overlapping and hence not getting put to sleep, grow 5426 * indeed overlapping and hence not getting put to sleep, grow
@@ -5412,12 +5430,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p)
5412 * correlates to the amount of cache footprint a task can 5430 * correlates to the amount of cache footprint a task can
5413 * build up. 5431 * build up.
5414 */ 5432 */
5415 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); 5433 update_avg(&prev->se.avg_overlap, runtime);
5416 update_avg(&p->se.avg_overlap, runtime);
5417 } else {
5418 update_avg(&p->se.avg_running, 0);
5419 } 5434 }
5420 p->sched_class->put_prev_task(rq, p); 5435 prev->sched_class->put_prev_task(rq, prev);
5421} 5436}
5422 5437
5423/* 5438/*
@@ -5478,7 +5493,7 @@ need_resched_nonpreemptible:
5478 if (sched_feat(HRTICK)) 5493 if (sched_feat(HRTICK))
5479 hrtick_clear(rq); 5494 hrtick_clear(rq);
5480 5495
5481 spin_lock_irq(&rq->lock); 5496 raw_spin_lock_irq(&rq->lock);
5482 update_rq_clock(rq); 5497 update_rq_clock(rq);
5483 clear_tsk_need_resched(prev); 5498 clear_tsk_need_resched(prev);
5484 5499
@@ -5514,7 +5529,7 @@ need_resched_nonpreemptible:
5514 cpu = smp_processor_id(); 5529 cpu = smp_processor_id();
5515 rq = cpu_rq(cpu); 5530 rq = cpu_rq(cpu);
5516 } else 5531 } else
5517 spin_unlock_irq(&rq->lock); 5532 raw_spin_unlock_irq(&rq->lock);
5518 5533
5519 post_schedule(rq); 5534 post_schedule(rq);
5520 5535
@@ -5931,14 +5946,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
5931 */ 5946 */
5932bool try_wait_for_completion(struct completion *x) 5947bool try_wait_for_completion(struct completion *x)
5933{ 5948{
5949 unsigned long flags;
5934 int ret = 1; 5950 int ret = 1;
5935 5951
5936 spin_lock_irq(&x->wait.lock); 5952 spin_lock_irqsave(&x->wait.lock, flags);
5937 if (!x->done) 5953 if (!x->done)
5938 ret = 0; 5954 ret = 0;
5939 else 5955 else
5940 x->done--; 5956 x->done--;
5941 spin_unlock_irq(&x->wait.lock); 5957 spin_unlock_irqrestore(&x->wait.lock, flags);
5942 return ret; 5958 return ret;
5943} 5959}
5944EXPORT_SYMBOL(try_wait_for_completion); 5960EXPORT_SYMBOL(try_wait_for_completion);
@@ -5953,12 +5969,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
5953 */ 5969 */
5954bool completion_done(struct completion *x) 5970bool completion_done(struct completion *x)
5955{ 5971{
5972 unsigned long flags;
5956 int ret = 1; 5973 int ret = 1;
5957 5974
5958 spin_lock_irq(&x->wait.lock); 5975 spin_lock_irqsave(&x->wait.lock, flags);
5959 if (!x->done) 5976 if (!x->done)
5960 ret = 0; 5977 ret = 0;
5961 spin_unlock_irq(&x->wait.lock); 5978 spin_unlock_irqrestore(&x->wait.lock, flags);
5962 return ret; 5979 return ret;
5963} 5980}
5964EXPORT_SYMBOL(completion_done); 5981EXPORT_SYMBOL(completion_done);
@@ -6343,7 +6360,7 @@ recheck:
6343 * make sure no PI-waiters arrive (or leave) while we are 6360 * make sure no PI-waiters arrive (or leave) while we are
6344 * changing the priority of the task: 6361 * changing the priority of the task:
6345 */ 6362 */
6346 spin_lock_irqsave(&p->pi_lock, flags); 6363 raw_spin_lock_irqsave(&p->pi_lock, flags);
6347 /* 6364 /*
6348 * To be able to change p->policy safely, the apropriate 6365 * To be able to change p->policy safely, the apropriate
6349 * runqueue lock must be held. 6366 * runqueue lock must be held.
@@ -6353,7 +6370,7 @@ recheck:
6353 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 6370 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
6354 policy = oldpolicy = -1; 6371 policy = oldpolicy = -1;
6355 __task_rq_unlock(rq); 6372 __task_rq_unlock(rq);
6356 spin_unlock_irqrestore(&p->pi_lock, flags); 6373 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
6357 goto recheck; 6374 goto recheck;
6358 } 6375 }
6359 update_rq_clock(rq); 6376 update_rq_clock(rq);
@@ -6377,7 +6394,7 @@ recheck:
6377 check_class_changed(rq, p, prev_class, oldprio, running); 6394 check_class_changed(rq, p, prev_class, oldprio, running);
6378 } 6395 }
6379 __task_rq_unlock(rq); 6396 __task_rq_unlock(rq);
6380 spin_unlock_irqrestore(&p->pi_lock, flags); 6397 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
6381 6398
6382 rt_mutex_adjust_pi(p); 6399 rt_mutex_adjust_pi(p);
6383 6400
@@ -6477,7 +6494,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6477 return -EINVAL; 6494 return -EINVAL;
6478 6495
6479 retval = -ESRCH; 6496 retval = -ESRCH;
6480 read_lock(&tasklist_lock); 6497 rcu_read_lock();
6481 p = find_process_by_pid(pid); 6498 p = find_process_by_pid(pid);
6482 if (p) { 6499 if (p) {
6483 retval = security_task_getscheduler(p); 6500 retval = security_task_getscheduler(p);
@@ -6485,7 +6502,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6485 retval = p->policy 6502 retval = p->policy
6486 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); 6503 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
6487 } 6504 }
6488 read_unlock(&tasklist_lock); 6505 rcu_read_unlock();
6489 return retval; 6506 return retval;
6490} 6507}
6491 6508
@@ -6503,7 +6520,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6503 if (!param || pid < 0) 6520 if (!param || pid < 0)
6504 return -EINVAL; 6521 return -EINVAL;
6505 6522
6506 read_lock(&tasklist_lock); 6523 rcu_read_lock();
6507 p = find_process_by_pid(pid); 6524 p = find_process_by_pid(pid);
6508 retval = -ESRCH; 6525 retval = -ESRCH;
6509 if (!p) 6526 if (!p)
@@ -6514,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6514 goto out_unlock; 6531 goto out_unlock;
6515 6532
6516 lp.sched_priority = p->rt_priority; 6533 lp.sched_priority = p->rt_priority;
6517 read_unlock(&tasklist_lock); 6534 rcu_read_unlock();
6518 6535
6519 /* 6536 /*
6520 * This one might sleep, we cannot do it with a spinlock held ... 6537 * This one might sleep, we cannot do it with a spinlock held ...
@@ -6524,7 +6541,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6524 return retval; 6541 return retval;
6525 6542
6526out_unlock: 6543out_unlock:
6527 read_unlock(&tasklist_lock); 6544 rcu_read_unlock();
6528 return retval; 6545 return retval;
6529} 6546}
6530 6547
@@ -6535,22 +6552,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
6535 int retval; 6552 int retval;
6536 6553
6537 get_online_cpus(); 6554 get_online_cpus();
6538 read_lock(&tasklist_lock); 6555 rcu_read_lock();
6539 6556
6540 p = find_process_by_pid(pid); 6557 p = find_process_by_pid(pid);
6541 if (!p) { 6558 if (!p) {
6542 read_unlock(&tasklist_lock); 6559 rcu_read_unlock();
6543 put_online_cpus(); 6560 put_online_cpus();
6544 return -ESRCH; 6561 return -ESRCH;
6545 } 6562 }
6546 6563
6547 /* 6564 /* Prevent p going away */
6548 * It is not safe to call set_cpus_allowed with the
6549 * tasklist_lock held. We will bump the task_struct's
6550 * usage count and then drop tasklist_lock.
6551 */
6552 get_task_struct(p); 6565 get_task_struct(p);
6553 read_unlock(&tasklist_lock); 6566 rcu_read_unlock();
6554 6567
6555 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { 6568 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
6556 retval = -ENOMEM; 6569 retval = -ENOMEM;
@@ -6631,10 +6644,12 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
6631long sched_getaffinity(pid_t pid, struct cpumask *mask) 6644long sched_getaffinity(pid_t pid, struct cpumask *mask)
6632{ 6645{
6633 struct task_struct *p; 6646 struct task_struct *p;
6647 unsigned long flags;
6648 struct rq *rq;
6634 int retval; 6649 int retval;
6635 6650
6636 get_online_cpus(); 6651 get_online_cpus();
6637 read_lock(&tasklist_lock); 6652 rcu_read_lock();
6638 6653
6639 retval = -ESRCH; 6654 retval = -ESRCH;
6640 p = find_process_by_pid(pid); 6655 p = find_process_by_pid(pid);
@@ -6645,10 +6660,12 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6645 if (retval) 6660 if (retval)
6646 goto out_unlock; 6661 goto out_unlock;
6647 6662
6663 rq = task_rq_lock(p, &flags);
6648 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); 6664 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
6665 task_rq_unlock(rq, &flags);
6649 6666
6650out_unlock: 6667out_unlock:
6651 read_unlock(&tasklist_lock); 6668 rcu_read_unlock();
6652 put_online_cpus(); 6669 put_online_cpus();
6653 6670
6654 return retval; 6671 return retval;
@@ -6703,7 +6720,7 @@ SYSCALL_DEFINE0(sched_yield)
6703 */ 6720 */
6704 __release(rq->lock); 6721 __release(rq->lock);
6705 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 6722 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
6706 _raw_spin_unlock(&rq->lock); 6723 do_raw_spin_unlock(&rq->lock);
6707 preempt_enable_no_resched(); 6724 preempt_enable_no_resched();
6708 6725
6709 schedule(); 6726 schedule();
@@ -6883,6 +6900,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6883{ 6900{
6884 struct task_struct *p; 6901 struct task_struct *p;
6885 unsigned int time_slice; 6902 unsigned int time_slice;
6903 unsigned long flags;
6904 struct rq *rq;
6886 int retval; 6905 int retval;
6887 struct timespec t; 6906 struct timespec t;
6888 6907
@@ -6890,7 +6909,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6890 return -EINVAL; 6909 return -EINVAL;
6891 6910
6892 retval = -ESRCH; 6911 retval = -ESRCH;
6893 read_lock(&tasklist_lock); 6912 rcu_read_lock();
6894 p = find_process_by_pid(pid); 6913 p = find_process_by_pid(pid);
6895 if (!p) 6914 if (!p)
6896 goto out_unlock; 6915 goto out_unlock;
@@ -6899,15 +6918,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6899 if (retval) 6918 if (retval)
6900 goto out_unlock; 6919 goto out_unlock;
6901 6920
6902 time_slice = p->sched_class->get_rr_interval(p); 6921 rq = task_rq_lock(p, &flags);
6922 time_slice = p->sched_class->get_rr_interval(rq, p);
6923 task_rq_unlock(rq, &flags);
6903 6924
6904 read_unlock(&tasklist_lock); 6925 rcu_read_unlock();
6905 jiffies_to_timespec(time_slice, &t); 6926 jiffies_to_timespec(time_slice, &t);
6906 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; 6927 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
6907 return retval; 6928 return retval;
6908 6929
6909out_unlock: 6930out_unlock:
6910 read_unlock(&tasklist_lock); 6931 rcu_read_unlock();
6911 return retval; 6932 return retval;
6912} 6933}
6913 6934
@@ -6919,23 +6940,23 @@ void sched_show_task(struct task_struct *p)
6919 unsigned state; 6940 unsigned state;
6920 6941
6921 state = p->state ? __ffs(p->state) + 1 : 0; 6942 state = p->state ? __ffs(p->state) + 1 : 0;
6922 printk(KERN_INFO "%-13.13s %c", p->comm, 6943 pr_info("%-13.13s %c", p->comm,
6923 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); 6944 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
6924#if BITS_PER_LONG == 32 6945#if BITS_PER_LONG == 32
6925 if (state == TASK_RUNNING) 6946 if (state == TASK_RUNNING)
6926 printk(KERN_CONT " running "); 6947 pr_cont(" running ");
6927 else 6948 else
6928 printk(KERN_CONT " %08lx ", thread_saved_pc(p)); 6949 pr_cont(" %08lx ", thread_saved_pc(p));
6929#else 6950#else
6930 if (state == TASK_RUNNING) 6951 if (state == TASK_RUNNING)
6931 printk(KERN_CONT " running task "); 6952 pr_cont(" running task ");
6932 else 6953 else
6933 printk(KERN_CONT " %016lx ", thread_saved_pc(p)); 6954 pr_cont(" %016lx ", thread_saved_pc(p));
6934#endif 6955#endif
6935#ifdef CONFIG_DEBUG_STACK_USAGE 6956#ifdef CONFIG_DEBUG_STACK_USAGE
6936 free = stack_not_used(p); 6957 free = stack_not_used(p);
6937#endif 6958#endif
6938 printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, 6959 pr_cont("%5lu %5d %6d 0x%08lx\n", free,
6939 task_pid_nr(p), task_pid_nr(p->real_parent), 6960 task_pid_nr(p), task_pid_nr(p->real_parent),
6940 (unsigned long)task_thread_info(p)->flags); 6961 (unsigned long)task_thread_info(p)->flags);
6941 6962
@@ -6947,11 +6968,9 @@ void show_state_filter(unsigned long state_filter)
6947 struct task_struct *g, *p; 6968 struct task_struct *g, *p;
6948 6969
6949#if BITS_PER_LONG == 32 6970#if BITS_PER_LONG == 32
6950 printk(KERN_INFO 6971 pr_info(" task PC stack pid father\n");
6951 " task PC stack pid father\n");
6952#else 6972#else
6953 printk(KERN_INFO 6973 pr_info(" task PC stack pid father\n");
6954 " task PC stack pid father\n");
6955#endif 6974#endif
6956 read_lock(&tasklist_lock); 6975 read_lock(&tasklist_lock);
6957 do_each_thread(g, p) { 6976 do_each_thread(g, p) {
@@ -6995,12 +7014,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
6995 struct rq *rq = cpu_rq(cpu); 7014 struct rq *rq = cpu_rq(cpu);
6996 unsigned long flags; 7015 unsigned long flags;
6997 7016
6998 spin_lock_irqsave(&rq->lock, flags); 7017 raw_spin_lock_irqsave(&rq->lock, flags);
6999 7018
7000 __sched_fork(idle); 7019 __sched_fork(idle);
7020 idle->state = TASK_RUNNING;
7001 idle->se.exec_start = sched_clock(); 7021 idle->se.exec_start = sched_clock();
7002 7022
7003 idle->prio = idle->normal_prio = MAX_PRIO;
7004 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 7023 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
7005 __set_task_cpu(idle, cpu); 7024 __set_task_cpu(idle, cpu);
7006 7025
@@ -7008,7 +7027,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
7008#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 7027#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
7009 idle->oncpu = 1; 7028 idle->oncpu = 1;
7010#endif 7029#endif
7011 spin_unlock_irqrestore(&rq->lock, flags); 7030 raw_spin_unlock_irqrestore(&rq->lock, flags);
7012 7031
7013 /* Set the preempt count _outside_ the spinlocks! */ 7032 /* Set the preempt count _outside_ the spinlocks! */
7014#if defined(CONFIG_PREEMPT) 7033#if defined(CONFIG_PREEMPT)
@@ -7041,22 +7060,43 @@ cpumask_var_t nohz_cpu_mask;
7041 * 7060 *
7042 * This idea comes from the SD scheduler of Con Kolivas: 7061 * This idea comes from the SD scheduler of Con Kolivas:
7043 */ 7062 */
7044static inline void sched_init_granularity(void) 7063static int get_update_sysctl_factor(void)
7045{ 7064{
7046 unsigned int factor = 1 + ilog2(num_online_cpus()); 7065 unsigned int cpus = min_t(int, num_online_cpus(), 8);
7047 const unsigned long limit = 200000000; 7066 unsigned int factor;
7067
7068 switch (sysctl_sched_tunable_scaling) {
7069 case SCHED_TUNABLESCALING_NONE:
7070 factor = 1;
7071 break;
7072 case SCHED_TUNABLESCALING_LINEAR:
7073 factor = cpus;
7074 break;
7075 case SCHED_TUNABLESCALING_LOG:
7076 default:
7077 factor = 1 + ilog2(cpus);
7078 break;
7079 }
7048 7080
7049 sysctl_sched_min_granularity *= factor; 7081 return factor;
7050 if (sysctl_sched_min_granularity > limit) 7082}
7051 sysctl_sched_min_granularity = limit;
7052 7083
7053 sysctl_sched_latency *= factor; 7084static void update_sysctl(void)
7054 if (sysctl_sched_latency > limit) 7085{
7055 sysctl_sched_latency = limit; 7086 unsigned int factor = get_update_sysctl_factor();
7056 7087
7057 sysctl_sched_wakeup_granularity *= factor; 7088#define SET_SYSCTL(name) \
7089 (sysctl_##name = (factor) * normalized_sysctl_##name)
7090 SET_SYSCTL(sched_min_granularity);
7091 SET_SYSCTL(sched_latency);
7092 SET_SYSCTL(sched_wakeup_granularity);
7093 SET_SYSCTL(sched_shares_ratelimit);
7094#undef SET_SYSCTL
7095}
7058 7096
7059 sysctl_sched_shares_ratelimit *= factor; 7097static inline void sched_init_granularity(void)
7098{
7099 update_sysctl();
7060} 7100}
7061 7101
7062#ifdef CONFIG_SMP 7102#ifdef CONFIG_SMP
@@ -7092,8 +7132,24 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7092 struct rq *rq; 7132 struct rq *rq;
7093 int ret = 0; 7133 int ret = 0;
7094 7134
7135 /*
7136 * Since we rely on wake-ups to migrate sleeping tasks, don't change
7137 * the ->cpus_allowed mask from under waking tasks, which would be
7138 * possible when we change rq->lock in ttwu(), so synchronize against
7139 * TASK_WAKING to avoid that.
7140 */
7141again:
7142 while (p->state == TASK_WAKING)
7143 cpu_relax();
7144
7095 rq = task_rq_lock(p, &flags); 7145 rq = task_rq_lock(p, &flags);
7096 if (!cpumask_intersects(new_mask, cpu_online_mask)) { 7146
7147 if (p->state == TASK_WAKING) {
7148 task_rq_unlock(rq, &flags);
7149 goto again;
7150 }
7151
7152 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7097 ret = -EINVAL; 7153 ret = -EINVAL;
7098 goto out; 7154 goto out;
7099 } 7155 }
@@ -7115,7 +7171,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7115 if (cpumask_test_cpu(task_cpu(p), new_mask)) 7171 if (cpumask_test_cpu(task_cpu(p), new_mask))
7116 goto out; 7172 goto out;
7117 7173
7118 if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { 7174 if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
7119 /* Need help from migration thread: drop lock and wait. */ 7175 /* Need help from migration thread: drop lock and wait. */
7120 struct task_struct *mt = rq->migration_thread; 7176 struct task_struct *mt = rq->migration_thread;
7121 7177
@@ -7148,7 +7204,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
7148static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 7204static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7149{ 7205{
7150 struct rq *rq_dest, *rq_src; 7206 struct rq *rq_dest, *rq_src;
7151 int ret = 0, on_rq; 7207 int ret = 0;
7152 7208
7153 if (unlikely(!cpu_active(dest_cpu))) 7209 if (unlikely(!cpu_active(dest_cpu)))
7154 return ret; 7210 return ret;
@@ -7164,12 +7220,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7164 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 7220 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7165 goto fail; 7221 goto fail;
7166 7222
7167 on_rq = p->se.on_rq; 7223 /*
7168 if (on_rq) 7224 * If we're not on a rq, the next wake-up will ensure we're
7225 * placed properly.
7226 */
7227 if (p->se.on_rq) {
7169 deactivate_task(rq_src, p, 0); 7228 deactivate_task(rq_src, p, 0);
7170 7229 set_task_cpu(p, dest_cpu);
7171 set_task_cpu(p, dest_cpu);
7172 if (on_rq) {
7173 activate_task(rq_dest, p, 0); 7230 activate_task(rq_dest, p, 0);
7174 check_preempt_curr(rq_dest, p, 0); 7231 check_preempt_curr(rq_dest, p, 0);
7175 } 7232 }
@@ -7204,10 +7261,10 @@ static int migration_thread(void *data)
7204 struct migration_req *req; 7261 struct migration_req *req;
7205 struct list_head *head; 7262 struct list_head *head;
7206 7263
7207 spin_lock_irq(&rq->lock); 7264 raw_spin_lock_irq(&rq->lock);
7208 7265
7209 if (cpu_is_offline(cpu)) { 7266 if (cpu_is_offline(cpu)) {
7210 spin_unlock_irq(&rq->lock); 7267 raw_spin_unlock_irq(&rq->lock);
7211 break; 7268 break;
7212 } 7269 }
7213 7270
@@ -7219,7 +7276,7 @@ static int migration_thread(void *data)
7219 head = &rq->migration_queue; 7276 head = &rq->migration_queue;
7220 7277
7221 if (list_empty(head)) { 7278 if (list_empty(head)) {
7222 spin_unlock_irq(&rq->lock); 7279 raw_spin_unlock_irq(&rq->lock);
7223 schedule(); 7280 schedule();
7224 set_current_state(TASK_INTERRUPTIBLE); 7281 set_current_state(TASK_INTERRUPTIBLE);
7225 continue; 7282 continue;
@@ -7228,14 +7285,14 @@ static int migration_thread(void *data)
7228 list_del_init(head->next); 7285 list_del_init(head->next);
7229 7286
7230 if (req->task != NULL) { 7287 if (req->task != NULL) {
7231 spin_unlock(&rq->lock); 7288 raw_spin_unlock(&rq->lock);
7232 __migrate_task(req->task, cpu, req->dest_cpu); 7289 __migrate_task(req->task, cpu, req->dest_cpu);
7233 } else if (likely(cpu == (badcpu = smp_processor_id()))) { 7290 } else if (likely(cpu == (badcpu = smp_processor_id()))) {
7234 req->dest_cpu = RCU_MIGRATION_GOT_QS; 7291 req->dest_cpu = RCU_MIGRATION_GOT_QS;
7235 spin_unlock(&rq->lock); 7292 raw_spin_unlock(&rq->lock);
7236 } else { 7293 } else {
7237 req->dest_cpu = RCU_MIGRATION_MUST_SYNC; 7294 req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
7238 spin_unlock(&rq->lock); 7295 raw_spin_unlock(&rq->lock);
7239 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); 7296 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
7240 } 7297 }
7241 local_irq_enable(); 7298 local_irq_enable();
@@ -7265,37 +7322,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
7265static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 7322static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
7266{ 7323{
7267 int dest_cpu; 7324 int dest_cpu;
7268 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
7269 7325
7270again: 7326again:
7271 /* Look for allowed, online CPU in same node. */ 7327 dest_cpu = select_fallback_rq(dead_cpu, p);
7272 for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
7273 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7274 goto move;
7275
7276 /* Any allowed, online CPU? */
7277 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
7278 if (dest_cpu < nr_cpu_ids)
7279 goto move;
7280 7328
7281 /* No more Mr. Nice Guy. */
7282 if (dest_cpu >= nr_cpu_ids) {
7283 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
7284 dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
7285
7286 /*
7287 * Don't tell them about moving exiting tasks or
7288 * kernel threads (both mm NULL), since they never
7289 * leave kernel.
7290 */
7291 if (p->mm && printk_ratelimit()) {
7292 printk(KERN_INFO "process %d (%s) no "
7293 "longer affine to cpu%d\n",
7294 task_pid_nr(p), p->comm, dead_cpu);
7295 }
7296 }
7297
7298move:
7299 /* It can have affinity changed while we were choosing. */ 7329 /* It can have affinity changed while we were choosing. */
7300 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) 7330 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
7301 goto again; 7331 goto again;
@@ -7310,7 +7340,7 @@ move:
7310 */ 7340 */
7311static void migrate_nr_uninterruptible(struct rq *rq_src) 7341static void migrate_nr_uninterruptible(struct rq *rq_src)
7312{ 7342{
7313 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); 7343 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
7314 unsigned long flags; 7344 unsigned long flags;
7315 7345
7316 local_irq_save(flags); 7346 local_irq_save(flags);
@@ -7358,14 +7388,14 @@ void sched_idle_next(void)
7358 * Strictly not necessary since rest of the CPUs are stopped by now 7388 * Strictly not necessary since rest of the CPUs are stopped by now
7359 * and interrupts disabled on the current cpu. 7389 * and interrupts disabled on the current cpu.
7360 */ 7390 */
7361 spin_lock_irqsave(&rq->lock, flags); 7391 raw_spin_lock_irqsave(&rq->lock, flags);
7362 7392
7363 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); 7393 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
7364 7394
7365 update_rq_clock(rq); 7395 update_rq_clock(rq);
7366 activate_task(rq, p, 0); 7396 activate_task(rq, p, 0);
7367 7397
7368 spin_unlock_irqrestore(&rq->lock, flags); 7398 raw_spin_unlock_irqrestore(&rq->lock, flags);
7369} 7399}
7370 7400
7371/* 7401/*
@@ -7401,9 +7431,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
7401 * that's OK. No task can be added to this CPU, so iteration is 7431 * that's OK. No task can be added to this CPU, so iteration is
7402 * fine. 7432 * fine.
7403 */ 7433 */
7404 spin_unlock_irq(&rq->lock); 7434 raw_spin_unlock_irq(&rq->lock);
7405 move_task_off_dead_cpu(dead_cpu, p); 7435 move_task_off_dead_cpu(dead_cpu, p);
7406 spin_lock_irq(&rq->lock); 7436 raw_spin_lock_irq(&rq->lock);
7407 7437
7408 put_task_struct(p); 7438 put_task_struct(p);
7409} 7439}
@@ -7563,7 +7593,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
7563static struct ctl_table_header *sd_sysctl_header; 7593static struct ctl_table_header *sd_sysctl_header;
7564static void register_sched_domain_sysctl(void) 7594static void register_sched_domain_sysctl(void)
7565{ 7595{
7566 int i, cpu_num = num_online_cpus(); 7596 int i, cpu_num = num_possible_cpus();
7567 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); 7597 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
7568 char buf[32]; 7598 char buf[32];
7569 7599
@@ -7573,7 +7603,7 @@ static void register_sched_domain_sysctl(void)
7573 if (entry == NULL) 7603 if (entry == NULL)
7574 return; 7604 return;
7575 7605
7576 for_each_online_cpu(i) { 7606 for_each_possible_cpu(i) {
7577 snprintf(buf, 32, "cpu%d", i); 7607 snprintf(buf, 32, "cpu%d", i);
7578 entry->procname = kstrdup(buf, GFP_KERNEL); 7608 entry->procname = kstrdup(buf, GFP_KERNEL);
7579 entry->mode = 0555; 7609 entry->mode = 0555;
@@ -7669,13 +7699,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7669 7699
7670 /* Update our root-domain */ 7700 /* Update our root-domain */
7671 rq = cpu_rq(cpu); 7701 rq = cpu_rq(cpu);
7672 spin_lock_irqsave(&rq->lock, flags); 7702 raw_spin_lock_irqsave(&rq->lock, flags);
7673 if (rq->rd) { 7703 if (rq->rd) {
7674 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7704 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7675 7705
7676 set_rq_online(rq); 7706 set_rq_online(rq);
7677 } 7707 }
7678 spin_unlock_irqrestore(&rq->lock, flags); 7708 raw_spin_unlock_irqrestore(&rq->lock, flags);
7679 break; 7709 break;
7680 7710
7681#ifdef CONFIG_HOTPLUG_CPU 7711#ifdef CONFIG_HOTPLUG_CPU
@@ -7700,14 +7730,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7700 put_task_struct(rq->migration_thread); 7730 put_task_struct(rq->migration_thread);
7701 rq->migration_thread = NULL; 7731 rq->migration_thread = NULL;
7702 /* Idle task back to normal (off runqueue, low prio) */ 7732 /* Idle task back to normal (off runqueue, low prio) */
7703 spin_lock_irq(&rq->lock); 7733 raw_spin_lock_irq(&rq->lock);
7704 update_rq_clock(rq); 7734 update_rq_clock(rq);
7705 deactivate_task(rq, rq->idle, 0); 7735 deactivate_task(rq, rq->idle, 0);
7706 rq->idle->static_prio = MAX_PRIO;
7707 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 7736 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
7708 rq->idle->sched_class = &idle_sched_class; 7737 rq->idle->sched_class = &idle_sched_class;
7709 migrate_dead_tasks(cpu); 7738 migrate_dead_tasks(cpu);
7710 spin_unlock_irq(&rq->lock); 7739 raw_spin_unlock_irq(&rq->lock);
7711 cpuset_unlock(); 7740 cpuset_unlock();
7712 migrate_nr_uninterruptible(rq); 7741 migrate_nr_uninterruptible(rq);
7713 BUG_ON(rq->nr_running != 0); 7742 BUG_ON(rq->nr_running != 0);
@@ -7717,30 +7746,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7717 * they didn't take sched_hotcpu_mutex. Just wake up 7746 * they didn't take sched_hotcpu_mutex. Just wake up
7718 * the requestors. 7747 * the requestors.
7719 */ 7748 */
7720 spin_lock_irq(&rq->lock); 7749 raw_spin_lock_irq(&rq->lock);
7721 while (!list_empty(&rq->migration_queue)) { 7750 while (!list_empty(&rq->migration_queue)) {
7722 struct migration_req *req; 7751 struct migration_req *req;
7723 7752
7724 req = list_entry(rq->migration_queue.next, 7753 req = list_entry(rq->migration_queue.next,
7725 struct migration_req, list); 7754 struct migration_req, list);
7726 list_del_init(&req->list); 7755 list_del_init(&req->list);
7727 spin_unlock_irq(&rq->lock); 7756 raw_spin_unlock_irq(&rq->lock);
7728 complete(&req->done); 7757 complete(&req->done);
7729 spin_lock_irq(&rq->lock); 7758 raw_spin_lock_irq(&rq->lock);
7730 } 7759 }
7731 spin_unlock_irq(&rq->lock); 7760 raw_spin_unlock_irq(&rq->lock);
7732 break; 7761 break;
7733 7762
7734 case CPU_DYING: 7763 case CPU_DYING:
7735 case CPU_DYING_FROZEN: 7764 case CPU_DYING_FROZEN:
7736 /* Update our root-domain */ 7765 /* Update our root-domain */
7737 rq = cpu_rq(cpu); 7766 rq = cpu_rq(cpu);
7738 spin_lock_irqsave(&rq->lock, flags); 7767 raw_spin_lock_irqsave(&rq->lock, flags);
7739 if (rq->rd) { 7768 if (rq->rd) {
7740 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7769 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7741 set_rq_offline(rq); 7770 set_rq_offline(rq);
7742 } 7771 }
7743 spin_unlock_irqrestore(&rq->lock, flags); 7772 raw_spin_unlock_irqrestore(&rq->lock, flags);
7744 break; 7773 break;
7745#endif 7774#endif
7746 } 7775 }
@@ -7799,48 +7828,44 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7799 printk(KERN_DEBUG "%*s domain %d: ", level, "", level); 7828 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
7800 7829
7801 if (!(sd->flags & SD_LOAD_BALANCE)) { 7830 if (!(sd->flags & SD_LOAD_BALANCE)) {
7802 printk("does not load-balance\n"); 7831 pr_cont("does not load-balance\n");
7803 if (sd->parent) 7832 if (sd->parent)
7804 printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" 7833 pr_err("ERROR: !SD_LOAD_BALANCE domain has parent\n");
7805 " has parent");
7806 return -1; 7834 return -1;
7807 } 7835 }
7808 7836
7809 printk(KERN_CONT "span %s level %s\n", str, sd->name); 7837 pr_cont("span %s level %s\n", str, sd->name);
7810 7838
7811 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { 7839 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
7812 printk(KERN_ERR "ERROR: domain->span does not contain " 7840 pr_err("ERROR: domain->span does not contain CPU%d\n", cpu);
7813 "CPU%d\n", cpu);
7814 } 7841 }
7815 if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) { 7842 if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
7816 printk(KERN_ERR "ERROR: domain->groups does not contain" 7843 pr_err("ERROR: domain->groups does not contain CPU%d\n", cpu);
7817 " CPU%d\n", cpu);
7818 } 7844 }
7819 7845
7820 printk(KERN_DEBUG "%*s groups:", level + 1, ""); 7846 printk(KERN_DEBUG "%*s groups:", level + 1, "");
7821 do { 7847 do {
7822 if (!group) { 7848 if (!group) {
7823 printk("\n"); 7849 pr_cont("\n");
7824 printk(KERN_ERR "ERROR: group is NULL\n"); 7850 pr_err("ERROR: group is NULL\n");
7825 break; 7851 break;
7826 } 7852 }
7827 7853
7828 if (!group->cpu_power) { 7854 if (!group->cpu_power) {
7829 printk(KERN_CONT "\n"); 7855 pr_cont("\n");
7830 printk(KERN_ERR "ERROR: domain->cpu_power not " 7856 pr_err("ERROR: domain->cpu_power not set\n");
7831 "set\n");
7832 break; 7857 break;
7833 } 7858 }
7834 7859
7835 if (!cpumask_weight(sched_group_cpus(group))) { 7860 if (!cpumask_weight(sched_group_cpus(group))) {
7836 printk(KERN_CONT "\n"); 7861 pr_cont("\n");
7837 printk(KERN_ERR "ERROR: empty group\n"); 7862 pr_err("ERROR: empty group\n");
7838 break; 7863 break;
7839 } 7864 }
7840 7865
7841 if (cpumask_intersects(groupmask, sched_group_cpus(group))) { 7866 if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
7842 printk(KERN_CONT "\n"); 7867 pr_cont("\n");
7843 printk(KERN_ERR "ERROR: repeated CPUs\n"); 7868 pr_err("ERROR: repeated CPUs\n");
7844 break; 7869 break;
7845 } 7870 }
7846 7871
@@ -7848,23 +7873,21 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7848 7873
7849 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7874 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7850 7875
7851 printk(KERN_CONT " %s", str); 7876 pr_cont(" %s", str);
7852 if (group->cpu_power != SCHED_LOAD_SCALE) { 7877 if (group->cpu_power != SCHED_LOAD_SCALE) {
7853 printk(KERN_CONT " (cpu_power = %d)", 7878 pr_cont(" (cpu_power = %d)", group->cpu_power);
7854 group->cpu_power);
7855 } 7879 }
7856 7880
7857 group = group->next; 7881 group = group->next;
7858 } while (group != sd->groups); 7882 } while (group != sd->groups);
7859 printk(KERN_CONT "\n"); 7883 pr_cont("\n");
7860 7884
7861 if (!cpumask_equal(sched_domain_span(sd), groupmask)) 7885 if (!cpumask_equal(sched_domain_span(sd), groupmask))
7862 printk(KERN_ERR "ERROR: groups don't span domain->span\n"); 7886 pr_err("ERROR: groups don't span domain->span\n");
7863 7887
7864 if (sd->parent && 7888 if (sd->parent &&
7865 !cpumask_subset(groupmask, sched_domain_span(sd->parent))) 7889 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
7866 printk(KERN_ERR "ERROR: parent span is not a superset " 7890 pr_err("ERROR: parent span is not a superset of domain->span\n");
7867 "of domain->span\n");
7868 return 0; 7891 return 0;
7869} 7892}
7870 7893
@@ -7970,7 +7993,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
7970 struct root_domain *old_rd = NULL; 7993 struct root_domain *old_rd = NULL;
7971 unsigned long flags; 7994 unsigned long flags;
7972 7995
7973 spin_lock_irqsave(&rq->lock, flags); 7996 raw_spin_lock_irqsave(&rq->lock, flags);
7974 7997
7975 if (rq->rd) { 7998 if (rq->rd) {
7976 old_rd = rq->rd; 7999 old_rd = rq->rd;
@@ -7996,7 +8019,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
7996 if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) 8019 if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
7997 set_rq_online(rq); 8020 set_rq_online(rq);
7998 8021
7999 spin_unlock_irqrestore(&rq->lock, flags); 8022 raw_spin_unlock_irqrestore(&rq->lock, flags);
8000 8023
8001 if (old_rd) 8024 if (old_rd)
8002 free_rootdomain(old_rd); 8025 free_rootdomain(old_rd);
@@ -8282,14 +8305,14 @@ enum s_alloc {
8282 */ 8305 */
8283#ifdef CONFIG_SCHED_SMT 8306#ifdef CONFIG_SCHED_SMT
8284static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); 8307static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
8285static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); 8308static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
8286 8309
8287static int 8310static int
8288cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, 8311cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
8289 struct sched_group **sg, struct cpumask *unused) 8312 struct sched_group **sg, struct cpumask *unused)
8290{ 8313{
8291 if (sg) 8314 if (sg)
8292 *sg = &per_cpu(sched_group_cpus, cpu).sg; 8315 *sg = &per_cpu(sched_groups, cpu).sg;
8293 return cpu; 8316 return cpu;
8294} 8317}
8295#endif /* CONFIG_SCHED_SMT */ 8318#endif /* CONFIG_SCHED_SMT */
@@ -8420,8 +8443,7 @@ static int build_numa_sched_groups(struct s_data *d,
8420 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), 8443 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
8421 GFP_KERNEL, num); 8444 GFP_KERNEL, num);
8422 if (!sg) { 8445 if (!sg) {
8423 printk(KERN_WARNING "Can not alloc domain group for node %d\n", 8446 pr_warning("Can not alloc domain group for node %d\n", num);
8424 num);
8425 return -ENOMEM; 8447 return -ENOMEM;
8426 } 8448 }
8427 d->sched_group_nodes[num] = sg; 8449 d->sched_group_nodes[num] = sg;
@@ -8450,8 +8472,8 @@ static int build_numa_sched_groups(struct s_data *d,
8450 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), 8472 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
8451 GFP_KERNEL, num); 8473 GFP_KERNEL, num);
8452 if (!sg) { 8474 if (!sg) {
8453 printk(KERN_WARNING 8475 pr_warning("Can not alloc domain group for node %d\n",
8454 "Can not alloc domain group for node %d\n", j); 8476 j);
8455 return -ENOMEM; 8477 return -ENOMEM;
8456 } 8478 }
8457 sg->cpu_power = 0; 8479 sg->cpu_power = 0;
@@ -8679,7 +8701,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
8679 d->sched_group_nodes = kcalloc(nr_node_ids, 8701 d->sched_group_nodes = kcalloc(nr_node_ids,
8680 sizeof(struct sched_group *), GFP_KERNEL); 8702 sizeof(struct sched_group *), GFP_KERNEL);
8681 if (!d->sched_group_nodes) { 8703 if (!d->sched_group_nodes) {
8682 printk(KERN_WARNING "Can not alloc sched group node list\n"); 8704 pr_warning("Can not alloc sched group node list\n");
8683 return sa_notcovered; 8705 return sa_notcovered;
8684 } 8706 }
8685 sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes; 8707 sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
@@ -8696,7 +8718,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
8696 return sa_send_covered; 8718 return sa_send_covered;
8697 d->rd = alloc_rootdomain(); 8719 d->rd = alloc_rootdomain();
8698 if (!d->rd) { 8720 if (!d->rd) {
8699 printk(KERN_WARNING "Cannot alloc root domain\n"); 8721 pr_warning("Cannot alloc root domain\n");
8700 return sa_tmpmask; 8722 return sa_tmpmask;
8701 } 8723 }
8702 return sa_rootdomain; 8724 return sa_rootdomain;
@@ -9099,7 +9121,7 @@ match1:
9099 if (doms_new == NULL) { 9121 if (doms_new == NULL) {
9100 ndoms_cur = 0; 9122 ndoms_cur = 0;
9101 doms_new = &fallback_doms; 9123 doms_new = &fallback_doms;
9102 cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); 9124 cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
9103 WARN_ON_ONCE(dattr_new); 9125 WARN_ON_ONCE(dattr_new);
9104 } 9126 }
9105 9127
@@ -9230,8 +9252,10 @@ static int update_sched_domains(struct notifier_block *nfb,
9230 switch (action) { 9252 switch (action) {
9231 case CPU_ONLINE: 9253 case CPU_ONLINE:
9232 case CPU_ONLINE_FROZEN: 9254 case CPU_ONLINE_FROZEN:
9233 case CPU_DEAD: 9255 case CPU_DOWN_PREPARE:
9234 case CPU_DEAD_FROZEN: 9256 case CPU_DOWN_PREPARE_FROZEN:
9257 case CPU_DOWN_FAILED:
9258 case CPU_DOWN_FAILED_FROZEN:
9235 partition_sched_domains(1, NULL, NULL); 9259 partition_sched_domains(1, NULL, NULL);
9236 return NOTIFY_OK; 9260 return NOTIFY_OK;
9237 9261
@@ -9278,7 +9302,7 @@ void __init sched_init_smp(void)
9278#endif 9302#endif
9279 get_online_cpus(); 9303 get_online_cpus();
9280 mutex_lock(&sched_domains_mutex); 9304 mutex_lock(&sched_domains_mutex);
9281 arch_init_sched_domains(cpu_online_mask); 9305 arch_init_sched_domains(cpu_active_mask);
9282 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); 9306 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
9283 if (cpumask_empty(non_isolated_cpus)) 9307 if (cpumask_empty(non_isolated_cpus))
9284 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); 9308 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -9351,13 +9375,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
9351#ifdef CONFIG_SMP 9375#ifdef CONFIG_SMP
9352 rt_rq->rt_nr_migratory = 0; 9376 rt_rq->rt_nr_migratory = 0;
9353 rt_rq->overloaded = 0; 9377 rt_rq->overloaded = 0;
9354 plist_head_init(&rt_rq->pushable_tasks, &rq->lock); 9378 plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
9355#endif 9379#endif
9356 9380
9357 rt_rq->rt_time = 0; 9381 rt_rq->rt_time = 0;
9358 rt_rq->rt_throttled = 0; 9382 rt_rq->rt_throttled = 0;
9359 rt_rq->rt_runtime = 0; 9383 rt_rq->rt_runtime = 0;
9360 spin_lock_init(&rt_rq->rt_runtime_lock); 9384 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
9361 9385
9362#ifdef CONFIG_RT_GROUP_SCHED 9386#ifdef CONFIG_RT_GROUP_SCHED
9363 rt_rq->rt_nr_boosted = 0; 9387 rt_rq->rt_nr_boosted = 0;
@@ -9517,7 +9541,7 @@ void __init sched_init(void)
9517 struct rq *rq; 9541 struct rq *rq;
9518 9542
9519 rq = cpu_rq(i); 9543 rq = cpu_rq(i);
9520 spin_lock_init(&rq->lock); 9544 raw_spin_lock_init(&rq->lock);
9521 rq->nr_running = 0; 9545 rq->nr_running = 0;
9522 rq->calc_load_active = 0; 9546 rq->calc_load_active = 0;
9523 rq->calc_load_update = jiffies + LOAD_FREQ; 9547 rq->calc_load_update = jiffies + LOAD_FREQ;
@@ -9577,7 +9601,7 @@ void __init sched_init(void)
9577#elif defined CONFIG_USER_SCHED 9601#elif defined CONFIG_USER_SCHED
9578 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); 9602 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
9579 init_tg_rt_entry(&init_task_group, 9603 init_tg_rt_entry(&init_task_group,
9580 &per_cpu(init_rt_rq, i), 9604 &per_cpu(init_rt_rq_var, i),
9581 &per_cpu(init_sched_rt_entity, i), i, 1, 9605 &per_cpu(init_sched_rt_entity, i), i, 1,
9582 root_task_group.rt_se[i]); 9606 root_task_group.rt_se[i]);
9583#endif 9607#endif
@@ -9615,7 +9639,7 @@ void __init sched_init(void)
9615#endif 9639#endif
9616 9640
9617#ifdef CONFIG_RT_MUTEXES 9641#ifdef CONFIG_RT_MUTEXES
9618 plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); 9642 plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
9619#endif 9643#endif
9620 9644
9621 /* 9645 /*
@@ -9659,7 +9683,7 @@ void __init sched_init(void)
9659#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 9683#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
9660static inline int preempt_count_equals(int preempt_offset) 9684static inline int preempt_count_equals(int preempt_offset)
9661{ 9685{
9662 int nested = preempt_count() & ~PREEMPT_ACTIVE; 9686 int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
9663 9687
9664 return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); 9688 return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
9665} 9689}
@@ -9676,13 +9700,11 @@ void __might_sleep(char *file, int line, int preempt_offset)
9676 return; 9700 return;
9677 prev_jiffy = jiffies; 9701 prev_jiffy = jiffies;
9678 9702
9679 printk(KERN_ERR 9703 pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
9680 "BUG: sleeping function called from invalid context at %s:%d\n", 9704 file, line);
9681 file, line); 9705 pr_err("in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
9682 printk(KERN_ERR 9706 in_atomic(), irqs_disabled(),
9683 "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", 9707 current->pid, current->comm);
9684 in_atomic(), irqs_disabled(),
9685 current->pid, current->comm);
9686 9708
9687 debug_show_held_locks(current); 9709 debug_show_held_locks(current);
9688 if (irqs_disabled()) 9710 if (irqs_disabled())
@@ -9740,13 +9762,13 @@ void normalize_rt_tasks(void)
9740 continue; 9762 continue;
9741 } 9763 }
9742 9764
9743 spin_lock(&p->pi_lock); 9765 raw_spin_lock(&p->pi_lock);
9744 rq = __task_rq_lock(p); 9766 rq = __task_rq_lock(p);
9745 9767
9746 normalize_task(rq, p); 9768 normalize_task(rq, p);
9747 9769
9748 __task_rq_unlock(rq); 9770 __task_rq_unlock(rq);
9749 spin_unlock(&p->pi_lock); 9771 raw_spin_unlock(&p->pi_lock);
9750 } while_each_thread(g, p); 9772 } while_each_thread(g, p);
9751 9773
9752 read_unlock_irqrestore(&tasklist_lock, flags); 9774 read_unlock_irqrestore(&tasklist_lock, flags);
@@ -9842,13 +9864,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
9842 se = kzalloc_node(sizeof(struct sched_entity), 9864 se = kzalloc_node(sizeof(struct sched_entity),
9843 GFP_KERNEL, cpu_to_node(i)); 9865 GFP_KERNEL, cpu_to_node(i));
9844 if (!se) 9866 if (!se)
9845 goto err; 9867 goto err_free_rq;
9846 9868
9847 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); 9869 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
9848 } 9870 }
9849 9871
9850 return 1; 9872 return 1;
9851 9873
9874 err_free_rq:
9875 kfree(cfs_rq);
9852 err: 9876 err:
9853 return 0; 9877 return 0;
9854} 9878}
@@ -9930,13 +9954,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
9930 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 9954 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
9931 GFP_KERNEL, cpu_to_node(i)); 9955 GFP_KERNEL, cpu_to_node(i));
9932 if (!rt_se) 9956 if (!rt_se)
9933 goto err; 9957 goto err_free_rq;
9934 9958
9935 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); 9959 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
9936 } 9960 }
9937 9961
9938 return 1; 9962 return 1;
9939 9963
9964 err_free_rq:
9965 kfree(rt_rq);
9940 err: 9966 err:
9941 return 0; 9967 return 0;
9942} 9968}
@@ -10070,7 +10096,7 @@ void sched_move_task(struct task_struct *tsk)
10070 10096
10071#ifdef CONFIG_FAIR_GROUP_SCHED 10097#ifdef CONFIG_FAIR_GROUP_SCHED
10072 if (tsk->sched_class->moved_group) 10098 if (tsk->sched_class->moved_group)
10073 tsk->sched_class->moved_group(tsk); 10099 tsk->sched_class->moved_group(tsk, on_rq);
10074#endif 10100#endif
10075 10101
10076 if (unlikely(running)) 10102 if (unlikely(running))
@@ -10105,9 +10131,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
10105 struct rq *rq = cfs_rq->rq; 10131 struct rq *rq = cfs_rq->rq;
10106 unsigned long flags; 10132 unsigned long flags;
10107 10133
10108 spin_lock_irqsave(&rq->lock, flags); 10134 raw_spin_lock_irqsave(&rq->lock, flags);
10109 __set_se_shares(se, shares); 10135 __set_se_shares(se, shares);
10110 spin_unlock_irqrestore(&rq->lock, flags); 10136 raw_spin_unlock_irqrestore(&rq->lock, flags);
10111} 10137}
10112 10138
10113static DEFINE_MUTEX(shares_mutex); 10139static DEFINE_MUTEX(shares_mutex);
@@ -10292,18 +10318,18 @@ static int tg_set_bandwidth(struct task_group *tg,
10292 if (err) 10318 if (err)
10293 goto unlock; 10319 goto unlock;
10294 10320
10295 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); 10321 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
10296 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 10322 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
10297 tg->rt_bandwidth.rt_runtime = rt_runtime; 10323 tg->rt_bandwidth.rt_runtime = rt_runtime;
10298 10324
10299 for_each_possible_cpu(i) { 10325 for_each_possible_cpu(i) {
10300 struct rt_rq *rt_rq = tg->rt_rq[i]; 10326 struct rt_rq *rt_rq = tg->rt_rq[i];
10301 10327
10302 spin_lock(&rt_rq->rt_runtime_lock); 10328 raw_spin_lock(&rt_rq->rt_runtime_lock);
10303 rt_rq->rt_runtime = rt_runtime; 10329 rt_rq->rt_runtime = rt_runtime;
10304 spin_unlock(&rt_rq->rt_runtime_lock); 10330 raw_spin_unlock(&rt_rq->rt_runtime_lock);
10305 } 10331 }
10306 spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); 10332 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
10307 unlock: 10333 unlock:
10308 read_unlock(&tasklist_lock); 10334 read_unlock(&tasklist_lock);
10309 mutex_unlock(&rt_constraints_mutex); 10335 mutex_unlock(&rt_constraints_mutex);
@@ -10408,15 +10434,15 @@ static int sched_rt_global_constraints(void)
10408 if (sysctl_sched_rt_runtime == 0) 10434 if (sysctl_sched_rt_runtime == 0)
10409 return -EBUSY; 10435 return -EBUSY;
10410 10436
10411 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 10437 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
10412 for_each_possible_cpu(i) { 10438 for_each_possible_cpu(i) {
10413 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 10439 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
10414 10440
10415 spin_lock(&rt_rq->rt_runtime_lock); 10441 raw_spin_lock(&rt_rq->rt_runtime_lock);
10416 rt_rq->rt_runtime = global_rt_runtime(); 10442 rt_rq->rt_runtime = global_rt_runtime();
10417 spin_unlock(&rt_rq->rt_runtime_lock); 10443 raw_spin_unlock(&rt_rq->rt_runtime_lock);
10418 } 10444 }
10419 spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 10445 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
10420 10446
10421 return 0; 10447 return 0;
10422} 10448}
@@ -10707,9 +10733,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
10707 /* 10733 /*
10708 * Take rq->lock to make 64-bit read safe on 32-bit platforms. 10734 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
10709 */ 10735 */
10710 spin_lock_irq(&cpu_rq(cpu)->lock); 10736 raw_spin_lock_irq(&cpu_rq(cpu)->lock);
10711 data = *cpuusage; 10737 data = *cpuusage;
10712 spin_unlock_irq(&cpu_rq(cpu)->lock); 10738 raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
10713#else 10739#else
10714 data = *cpuusage; 10740 data = *cpuusage;
10715#endif 10741#endif
@@ -10725,9 +10751,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
10725 /* 10751 /*
10726 * Take rq->lock to make 64-bit write safe on 32-bit platforms. 10752 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
10727 */ 10753 */
10728 spin_lock_irq(&cpu_rq(cpu)->lock); 10754 raw_spin_lock_irq(&cpu_rq(cpu)->lock);
10729 *cpuusage = val; 10755 *cpuusage = val;
10730 spin_unlock_irq(&cpu_rq(cpu)->lock); 10756 raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
10731#else 10757#else
10732 *cpuusage = val; 10758 *cpuusage = val;
10733#endif 10759#endif
@@ -10961,9 +10987,9 @@ void synchronize_sched_expedited(void)
10961 init_completion(&req->done); 10987 init_completion(&req->done);
10962 req->task = NULL; 10988 req->task = NULL;
10963 req->dest_cpu = RCU_MIGRATION_NEED_QS; 10989 req->dest_cpu = RCU_MIGRATION_NEED_QS;
10964 spin_lock_irqsave(&rq->lock, flags); 10990 raw_spin_lock_irqsave(&rq->lock, flags);
10965 list_add(&req->list, &rq->migration_queue); 10991 list_add(&req->list, &rq->migration_queue);
10966 spin_unlock_irqrestore(&rq->lock, flags); 10992 raw_spin_unlock_irqrestore(&rq->lock, flags);
10967 wake_up_process(rq->migration_thread); 10993 wake_up_process(rq->migration_thread);
10968 } 10994 }
10969 for_each_online_cpu(cpu) { 10995 for_each_online_cpu(cpu) {
@@ -10971,11 +10997,11 @@ void synchronize_sched_expedited(void)
10971 req = &per_cpu(rcu_migration_req, cpu); 10997 req = &per_cpu(rcu_migration_req, cpu);
10972 rq = cpu_rq(cpu); 10998 rq = cpu_rq(cpu);
10973 wait_for_completion(&req->done); 10999 wait_for_completion(&req->done);
10974 spin_lock_irqsave(&rq->lock, flags); 11000 raw_spin_lock_irqsave(&rq->lock, flags);
10975 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) 11001 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
10976 need_full_sync = 1; 11002 need_full_sync = 1;
10977 req->dest_cpu = RCU_MIGRATION_IDLE; 11003 req->dest_cpu = RCU_MIGRATION_IDLE;
10978 spin_unlock_irqrestore(&rq->lock, flags); 11004 raw_spin_unlock_irqrestore(&rq->lock, flags);
10979 } 11005 }
10980 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; 11006 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10981 synchronize_sched_expedited_count++; 11007 synchronize_sched_expedited_count++;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 479ce5682d7c..5b496132c28a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
236} 236}
237EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 237EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
238 238
239unsigned long long cpu_clock(int cpu)
240{
241 unsigned long long clock;
242 unsigned long flags;
243
244 local_irq_save(flags);
245 clock = sched_clock_cpu(cpu);
246 local_irq_restore(flags);
247
248 return clock;
249}
250
239#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 251#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
240 252
241void sched_clock_init(void) 253void sched_clock_init(void)
@@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu)
251 return sched_clock(); 263 return sched_clock();
252} 264}
253 265
254#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
255 266
256unsigned long long cpu_clock(int cpu) 267unsigned long long cpu_clock(int cpu)
257{ 268{
258 unsigned long long clock; 269 return sched_clock_cpu(cpu);
259 unsigned long flags; 270}
260 271
261 local_irq_save(flags); 272#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
262 clock = sched_clock_cpu(cpu);
263 local_irq_restore(flags);
264 273
265 return clock;
266}
267EXPORT_SYMBOL_GPL(cpu_clock); 274EXPORT_SYMBOL_GPL(cpu_clock);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 0f052fc674d5..597b33099dfa 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -135,26 +135,26 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
135 if (likely(newpri != CPUPRI_INVALID)) { 135 if (likely(newpri != CPUPRI_INVALID)) {
136 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; 136 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
137 137
138 spin_lock_irqsave(&vec->lock, flags); 138 raw_spin_lock_irqsave(&vec->lock, flags);
139 139
140 cpumask_set_cpu(cpu, vec->mask); 140 cpumask_set_cpu(cpu, vec->mask);
141 vec->count++; 141 vec->count++;
142 if (vec->count == 1) 142 if (vec->count == 1)
143 set_bit(newpri, cp->pri_active); 143 set_bit(newpri, cp->pri_active);
144 144
145 spin_unlock_irqrestore(&vec->lock, flags); 145 raw_spin_unlock_irqrestore(&vec->lock, flags);
146 } 146 }
147 if (likely(oldpri != CPUPRI_INVALID)) { 147 if (likely(oldpri != CPUPRI_INVALID)) {
148 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; 148 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
149 149
150 spin_lock_irqsave(&vec->lock, flags); 150 raw_spin_lock_irqsave(&vec->lock, flags);
151 151
152 vec->count--; 152 vec->count--;
153 if (!vec->count) 153 if (!vec->count)
154 clear_bit(oldpri, cp->pri_active); 154 clear_bit(oldpri, cp->pri_active);
155 cpumask_clear_cpu(cpu, vec->mask); 155 cpumask_clear_cpu(cpu, vec->mask);
156 156
157 spin_unlock_irqrestore(&vec->lock, flags); 157 raw_spin_unlock_irqrestore(&vec->lock, flags);
158 } 158 }
159 159
160 *currpri = newpri; 160 *currpri = newpri;
@@ -180,7 +180,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
180 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { 180 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
181 struct cpupri_vec *vec = &cp->pri_to_cpu[i]; 181 struct cpupri_vec *vec = &cp->pri_to_cpu[i];
182 182
183 spin_lock_init(&vec->lock); 183 raw_spin_lock_init(&vec->lock);
184 vec->count = 0; 184 vec->count = 0;
185 if (!zalloc_cpumask_var(&vec->mask, gfp)) 185 if (!zalloc_cpumask_var(&vec->mask, gfp))
186 goto cleanup; 186 goto cleanup;
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index 9a7e859b8fbf..7cb5bb6b95be 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -12,7 +12,7 @@
12/* values 2-101 are RT priorities 0-99 */ 12/* values 2-101 are RT priorities 0-99 */
13 13
14struct cpupri_vec { 14struct cpupri_vec {
15 spinlock_t lock; 15 raw_spinlock_t lock;
16 int count; 16 int count;
17 cpumask_var_t mask; 17 cpumask_var_t mask;
18}; 18};
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6988cf08f705..67f95aada4b9 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -184,7 +184,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
184 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 184 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
185 SPLIT_NS(cfs_rq->exec_clock)); 185 SPLIT_NS(cfs_rq->exec_clock));
186 186
187 spin_lock_irqsave(&rq->lock, flags); 187 raw_spin_lock_irqsave(&rq->lock, flags);
188 if (cfs_rq->rb_leftmost) 188 if (cfs_rq->rb_leftmost)
189 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; 189 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
190 last = __pick_last_entity(cfs_rq); 190 last = __pick_last_entity(cfs_rq);
@@ -192,7 +192,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
192 max_vruntime = last->vruntime; 192 max_vruntime = last->vruntime;
193 min_vruntime = cfs_rq->min_vruntime; 193 min_vruntime = cfs_rq->min_vruntime;
194 rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; 194 rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
195 spin_unlock_irqrestore(&rq->lock, flags); 195 raw_spin_unlock_irqrestore(&rq->lock, flags);
196 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 196 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
197 SPLIT_NS(MIN_vruntime)); 197 SPLIT_NS(MIN_vruntime));
198 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", 198 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu)
309 print_rq(m, rq, cpu); 309 print_rq(m, rq, cpu);
310} 310}
311 311
312static const char *sched_tunable_scaling_names[] = {
313 "none",
314 "logaritmic",
315 "linear"
316};
317
312static int sched_debug_show(struct seq_file *m, void *v) 318static int sched_debug_show(struct seq_file *m, void *v)
313{ 319{
314 u64 now = ktime_to_ns(ktime_get()); 320 u64 now = ktime_to_ns(ktime_get());
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v)
334#undef PN 340#undef PN
335#undef P 341#undef P
336 342
343 SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
344 sysctl_sched_tunable_scaling,
345 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
346
337 for_each_online_cpu(cpu) 347 for_each_online_cpu(cpu)
338 print_cpu(m, cpu); 348 print_cpu(m, cpu);
339 349
@@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
399 PN(se.sum_exec_runtime); 409 PN(se.sum_exec_runtime);
400 PN(se.avg_overlap); 410 PN(se.avg_overlap);
401 PN(se.avg_wakeup); 411 PN(se.avg_wakeup);
402 PN(se.avg_running);
403 412
404 nr_switches = p->nvcsw + p->nivcsw; 413 nr_switches = p->nvcsw + p->nivcsw;
405 414
@@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
423 P(se.nr_failed_migrations_running); 432 P(se.nr_failed_migrations_running);
424 P(se.nr_failed_migrations_hot); 433 P(se.nr_failed_migrations_hot);
425 P(se.nr_forced_migrations); 434 P(se.nr_forced_migrations);
426 P(se.nr_forced2_migrations);
427 P(se.nr_wakeups); 435 P(se.nr_wakeups);
428 P(se.nr_wakeups_sync); 436 P(se.nr_wakeups_sync);
429 P(se.nr_wakeups_migrate); 437 P(se.nr_wakeups_migrate);
@@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p)
499 p->se.nr_failed_migrations_running = 0; 507 p->se.nr_failed_migrations_running = 0;
500 p->se.nr_failed_migrations_hot = 0; 508 p->se.nr_failed_migrations_hot = 0;
501 p->se.nr_forced_migrations = 0; 509 p->se.nr_forced_migrations = 0;
502 p->se.nr_forced2_migrations = 0;
503 p->se.nr_wakeups = 0; 510 p->se.nr_wakeups = 0;
504 p->se.nr_wakeups_sync = 0; 511 p->se.nr_wakeups_sync = 0;
505 p->se.nr_wakeups_migrate = 0; 512 p->se.nr_wakeups_migrate = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f61837ad336d..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23#include <linux/latencytop.h> 23#include <linux/latencytop.h>
24#include <linux/sched.h>
24 25
25/* 26/*
26 * Targeted preemption latency for CPU-bound tasks: 27 * Targeted preemption latency for CPU-bound tasks:
@@ -35,12 +36,26 @@
35 * run vmstat and monitor the context-switches (cs) field) 36 * run vmstat and monitor the context-switches (cs) field)
36 */ 37 */
37unsigned int sysctl_sched_latency = 5000000ULL; 38unsigned int sysctl_sched_latency = 5000000ULL;
39unsigned int normalized_sysctl_sched_latency = 5000000ULL;
40
41/*
42 * The initial- and re-scaling of tunables is configurable
43 * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
44 *
45 * Options are:
46 * SCHED_TUNABLESCALING_NONE - unscaled, always *1
47 * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
48 * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
49 */
50enum sched_tunable_scaling sysctl_sched_tunable_scaling
51 = SCHED_TUNABLESCALING_LOG;
38 52
39/* 53/*
40 * Minimal preemption granularity for CPU-bound tasks: 54 * Minimal preemption granularity for CPU-bound tasks:
41 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 55 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
42 */ 56 */
43unsigned int sysctl_sched_min_granularity = 1000000ULL; 57unsigned int sysctl_sched_min_granularity = 1000000ULL;
58unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL;
44 59
45/* 60/*
46 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity 61 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
70 * have immediate wakeup/sleep latencies. 85 * have immediate wakeup/sleep latencies.
71 */ 86 */
72unsigned int sysctl_sched_wakeup_granularity = 1000000UL; 87unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
88unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
73 89
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 90const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 91
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
383 */ 399 */
384 400
385#ifdef CONFIG_SCHED_DEBUG 401#ifdef CONFIG_SCHED_DEBUG
386int sched_nr_latency_handler(struct ctl_table *table, int write, 402int sched_proc_update_handler(struct ctl_table *table, int write,
387 void __user *buffer, size_t *lenp, 403 void __user *buffer, size_t *lenp,
388 loff_t *ppos) 404 loff_t *ppos)
389{ 405{
390 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 406 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
407 int factor = get_update_sysctl_factor();
391 408
392 if (ret || !write) 409 if (ret || !write)
393 return ret; 410 return ret;
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
395 sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, 412 sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
396 sysctl_sched_min_granularity); 413 sysctl_sched_min_granularity);
397 414
415#define WRT_SYSCTL(name) \
416 (normalized_sysctl_##name = sysctl_##name / (factor))
417 WRT_SYSCTL(sched_min_granularity);
418 WRT_SYSCTL(sched_latency);
419 WRT_SYSCTL(sched_wakeup_granularity);
420 WRT_SYSCTL(sched_shares_ratelimit);
421#undef WRT_SYSCTL
422
398 return 0; 423 return 0;
399} 424}
400#endif 425#endif
@@ -485,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
485 curr->sum_exec_runtime += delta_exec; 510 curr->sum_exec_runtime += delta_exec;
486 schedstat_add(cfs_rq, exec_clock, delta_exec); 511 schedstat_add(cfs_rq, exec_clock, delta_exec);
487 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 512 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
513
488 curr->vruntime += delta_exec_weighted; 514 curr->vruntime += delta_exec_weighted;
489 update_min_vruntime(cfs_rq); 515 update_min_vruntime(cfs_rq);
490} 516}
@@ -740,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
740 se->vruntime = vruntime; 766 se->vruntime = vruntime;
741} 767}
742 768
769#define ENQUEUE_WAKEUP 1
770#define ENQUEUE_MIGRATE 2
771
743static void 772static void
744enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) 773enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
745{ 774{
746 /* 775 /*
776 * Update the normalized vruntime before updating min_vruntime
777 * through callig update_curr().
778 */
779 if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
780 se->vruntime += cfs_rq->min_vruntime;
781
782 /*
747 * Update run-time statistics of the 'current'. 783 * Update run-time statistics of the 'current'.
748 */ 784 */
749 update_curr(cfs_rq); 785 update_curr(cfs_rq);
750 account_entity_enqueue(cfs_rq, se); 786 account_entity_enqueue(cfs_rq, se);
751 787
752 if (wakeup) { 788 if (flags & ENQUEUE_WAKEUP) {
753 place_entity(cfs_rq, se, 0); 789 place_entity(cfs_rq, se, 0);
754 enqueue_sleeper(cfs_rq, se); 790 enqueue_sleeper(cfs_rq, se);
755 } 791 }
@@ -803,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
803 __dequeue_entity(cfs_rq, se); 839 __dequeue_entity(cfs_rq, se);
804 account_entity_dequeue(cfs_rq, se); 840 account_entity_dequeue(cfs_rq, se);
805 update_min_vruntime(cfs_rq); 841 update_min_vruntime(cfs_rq);
842
843 /*
844 * Normalize the entity after updating the min_vruntime because the
845 * update can refer to the ->curr item and we need to reflect this
846 * movement in our normalized position.
847 */
848 if (!sleep)
849 se->vruntime -= cfs_rq->min_vruntime;
806} 850}
807 851
808/* 852/*
@@ -1013,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
1013{ 1057{
1014 struct cfs_rq *cfs_rq; 1058 struct cfs_rq *cfs_rq;
1015 struct sched_entity *se = &p->se; 1059 struct sched_entity *se = &p->se;
1060 int flags = 0;
1061
1062 if (wakeup)
1063 flags |= ENQUEUE_WAKEUP;
1064 if (p->state == TASK_WAKING)
1065 flags |= ENQUEUE_MIGRATE;
1016 1066
1017 for_each_sched_entity(se) { 1067 for_each_sched_entity(se) {
1018 if (se->on_rq) 1068 if (se->on_rq)
1019 break; 1069 break;
1020 cfs_rq = cfs_rq_of(se); 1070 cfs_rq = cfs_rq_of(se);
1021 enqueue_entity(cfs_rq, se, wakeup); 1071 enqueue_entity(cfs_rq, se, flags);
1022 wakeup = 1; 1072 flags = ENQUEUE_WAKEUP;
1023 } 1073 }
1024 1074
1025 hrtick_update(rq); 1075 hrtick_update(rq);
@@ -1095,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
1095 1145
1096#ifdef CONFIG_SMP 1146#ifdef CONFIG_SMP
1097 1147
1148static void task_waking_fair(struct rq *rq, struct task_struct *p)
1149{
1150 struct sched_entity *se = &p->se;
1151 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1152
1153 se->vruntime -= cfs_rq->min_vruntime;
1154}
1155
1098#ifdef CONFIG_FAIR_GROUP_SCHED 1156#ifdef CONFIG_FAIR_GROUP_SCHED
1099/* 1157/*
1100 * effective_load() calculates the load change as seen from the root_task_group 1158 * effective_load() calculates the load change as seen from the root_task_group
@@ -1403,8 +1461,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1403 new_cpu = prev_cpu; 1461 new_cpu = prev_cpu;
1404 } 1462 }
1405 1463
1406 rcu_read_lock();
1407 for_each_domain(cpu, tmp) { 1464 for_each_domain(cpu, tmp) {
1465 if (!(tmp->flags & SD_LOAD_BALANCE))
1466 continue;
1467
1408 /* 1468 /*
1409 * If power savings logic is enabled for a domain, see if we 1469 * If power savings logic is enabled for a domain, see if we
1410 * are not overloaded, if so, don't balance wider. 1470 * are not overloaded, if so, don't balance wider.
@@ -1484,10 +1544,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1484 update_shares(tmp); 1544 update_shares(tmp);
1485 } 1545 }
1486 1546
1487 if (affine_sd && wake_affine(affine_sd, p, sync)) { 1547 if (affine_sd && wake_affine(affine_sd, p, sync))
1488 new_cpu = cpu; 1548 return cpu;
1489 goto out;
1490 }
1491 1549
1492 while (sd) { 1550 while (sd) {
1493 int load_idx = sd->forkexec_idx; 1551 int load_idx = sd->forkexec_idx;
@@ -1528,8 +1586,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1528 /* while loop will break here if sd == NULL */ 1586 /* while loop will break here if sd == NULL */
1529 } 1587 }
1530 1588
1531out:
1532 rcu_read_unlock();
1533 return new_cpu; 1589 return new_cpu;
1534} 1590}
1535#endif /* CONFIG_SMP */ 1591#endif /* CONFIG_SMP */
@@ -1651,12 +1707,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1651 int sync = wake_flags & WF_SYNC; 1707 int sync = wake_flags & WF_SYNC;
1652 int scale = cfs_rq->nr_running >= sched_nr_latency; 1708 int scale = cfs_rq->nr_running >= sched_nr_latency;
1653 1709
1654 update_curr(cfs_rq); 1710 if (unlikely(rt_prio(p->prio)))
1655 1711 goto preempt;
1656 if (unlikely(rt_prio(p->prio))) {
1657 resched_task(curr);
1658 return;
1659 }
1660 1712
1661 if (unlikely(p->sched_class != &fair_sched_class)) 1713 if (unlikely(p->sched_class != &fair_sched_class))
1662 return; 1714 return;
@@ -1682,50 +1734,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1682 return; 1734 return;
1683 1735
1684 /* Idle tasks are by definition preempted by everybody. */ 1736 /* Idle tasks are by definition preempted by everybody. */
1685 if (unlikely(curr->policy == SCHED_IDLE)) { 1737 if (unlikely(curr->policy == SCHED_IDLE))
1686 resched_task(curr); 1738 goto preempt;
1687 return;
1688 }
1689 1739
1690 if ((sched_feat(WAKEUP_SYNC) && sync) || 1740 if (sched_feat(WAKEUP_SYNC) && sync)
1691 (sched_feat(WAKEUP_OVERLAP) && 1741 goto preempt;
1692 (se->avg_overlap < sysctl_sched_migration_cost &&
1693 pse->avg_overlap < sysctl_sched_migration_cost))) {
1694 resched_task(curr);
1695 return;
1696 }
1697 1742
1698 if (sched_feat(WAKEUP_RUNNING)) { 1743 if (sched_feat(WAKEUP_OVERLAP) &&
1699 if (pse->avg_running < se->avg_running) { 1744 se->avg_overlap < sysctl_sched_migration_cost &&
1700 set_next_buddy(pse); 1745 pse->avg_overlap < sysctl_sched_migration_cost)
1701 resched_task(curr); 1746 goto preempt;
1702 return;
1703 }
1704 }
1705 1747
1706 if (!sched_feat(WAKEUP_PREEMPT)) 1748 if (!sched_feat(WAKEUP_PREEMPT))
1707 return; 1749 return;
1708 1750
1751 update_curr(cfs_rq);
1709 find_matching_se(&se, &pse); 1752 find_matching_se(&se, &pse);
1710
1711 BUG_ON(!pse); 1753 BUG_ON(!pse);
1754 if (wakeup_preempt_entity(se, pse) == 1)
1755 goto preempt;
1712 1756
1713 if (wakeup_preempt_entity(se, pse) == 1) { 1757 return;
1714 resched_task(curr); 1758
1715 /* 1759preempt:
1716 * Only set the backward buddy when the current task is still 1760 resched_task(curr);
1717 * on the rq. This can happen when a wakeup gets interleaved 1761 /*
1718 * with schedule on the ->pre_schedule() or idle_balance() 1762 * Only set the backward buddy when the current task is still
1719 * point, either of which can * drop the rq lock. 1763 * on the rq. This can happen when a wakeup gets interleaved
1720 * 1764 * with schedule on the ->pre_schedule() or idle_balance()
1721 * Also, during early boot the idle thread is in the fair class, 1765 * point, either of which can * drop the rq lock.
1722 * for obvious reasons its a bad idea to schedule back to it. 1766 *
1723 */ 1767 * Also, during early boot the idle thread is in the fair class,
1724 if (unlikely(!se->on_rq || curr == rq->idle)) 1768 * for obvious reasons its a bad idea to schedule back to it.
1725 return; 1769 */
1726 if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) 1770 if (unlikely(!se->on_rq || curr == rq->idle))
1727 set_last_buddy(se); 1771 return;
1728 } 1772
1773 if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
1774 set_last_buddy(se);
1729} 1775}
1730 1776
1731static struct task_struct *pick_next_task_fair(struct rq *rq) 1777static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1905,6 +1951,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1905 1951
1906 return 0; 1952 return 0;
1907} 1953}
1954
1955static void rq_online_fair(struct rq *rq)
1956{
1957 update_sysctl();
1958}
1959
1960static void rq_offline_fair(struct rq *rq)
1961{
1962 update_sysctl();
1963}
1964
1908#endif /* CONFIG_SMP */ 1965#endif /* CONFIG_SMP */
1909 1966
1910/* 1967/*
@@ -1922,28 +1979,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1922} 1979}
1923 1980
1924/* 1981/*
1925 * Share the fairness runtime between parent and child, thus the 1982 * called on fork with the child task as argument from the parent's context
1926 * total amount of pressure for CPU stays equal - new tasks 1983 * - child not yet on the tasklist
1927 * get a chance to run but frequent forkers are not allowed to 1984 * - preemption disabled
1928 * monopolize the CPU. Note: the parent runqueue is locked,
1929 * the child is not running yet.
1930 */ 1985 */
1931static void task_new_fair(struct rq *rq, struct task_struct *p) 1986static void task_fork_fair(struct task_struct *p)
1932{ 1987{
1933 struct cfs_rq *cfs_rq = task_cfs_rq(p); 1988 struct cfs_rq *cfs_rq = task_cfs_rq(current);
1934 struct sched_entity *se = &p->se, *curr = cfs_rq->curr; 1989 struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
1935 int this_cpu = smp_processor_id(); 1990 int this_cpu = smp_processor_id();
1991 struct rq *rq = this_rq();
1992 unsigned long flags;
1936 1993
1937 sched_info_queued(p); 1994 raw_spin_lock_irqsave(&rq->lock, flags);
1995
1996 if (unlikely(task_cpu(p) != this_cpu))
1997 __set_task_cpu(p, this_cpu);
1938 1998
1939 update_curr(cfs_rq); 1999 update_curr(cfs_rq);
2000
1940 if (curr) 2001 if (curr)
1941 se->vruntime = curr->vruntime; 2002 se->vruntime = curr->vruntime;
1942 place_entity(cfs_rq, se, 1); 2003 place_entity(cfs_rq, se, 1);
1943 2004
1944 /* 'curr' will be NULL if the child belongs to a different group */ 2005 if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
1945 if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
1946 curr && entity_before(curr, se)) {
1947 /* 2006 /*
1948 * Upon rescheduling, sched_class::put_prev_task() will place 2007 * Upon rescheduling, sched_class::put_prev_task() will place
1949 * 'current' within the tree based on its new key value. 2008 * 'current' within the tree based on its new key value.
@@ -1952,7 +2011,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1952 resched_task(rq->curr); 2011 resched_task(rq->curr);
1953 } 2012 }
1954 2013
1955 enqueue_task_fair(rq, p, 0); 2014 se->vruntime -= cfs_rq->min_vruntime;
2015
2016 raw_spin_unlock_irqrestore(&rq->lock, flags);
1956} 2017}
1957 2018
1958/* 2019/*
@@ -2005,30 +2066,27 @@ static void set_curr_task_fair(struct rq *rq)
2005} 2066}
2006 2067
2007#ifdef CONFIG_FAIR_GROUP_SCHED 2068#ifdef CONFIG_FAIR_GROUP_SCHED
2008static void moved_group_fair(struct task_struct *p) 2069static void moved_group_fair(struct task_struct *p, int on_rq)
2009{ 2070{
2010 struct cfs_rq *cfs_rq = task_cfs_rq(p); 2071 struct cfs_rq *cfs_rq = task_cfs_rq(p);
2011 2072
2012 update_curr(cfs_rq); 2073 update_curr(cfs_rq);
2013 place_entity(cfs_rq, &p->se, 1); 2074 if (!on_rq)
2075 place_entity(cfs_rq, &p->se, 1);
2014} 2076}
2015#endif 2077#endif
2016 2078
2017unsigned int get_rr_interval_fair(struct task_struct *task) 2079unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
2018{ 2080{
2019 struct sched_entity *se = &task->se; 2081 struct sched_entity *se = &task->se;
2020 unsigned long flags;
2021 struct rq *rq;
2022 unsigned int rr_interval = 0; 2082 unsigned int rr_interval = 0;
2023 2083
2024 /* 2084 /*
2025 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise 2085 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
2026 * idle runqueue: 2086 * idle runqueue:
2027 */ 2087 */
2028 rq = task_rq_lock(task, &flags);
2029 if (rq->cfs.load.weight) 2088 if (rq->cfs.load.weight)
2030 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); 2089 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
2031 task_rq_unlock(rq, &flags);
2032 2090
2033 return rr_interval; 2091 return rr_interval;
2034} 2092}
@@ -2052,11 +2110,15 @@ static const struct sched_class fair_sched_class = {
2052 2110
2053 .load_balance = load_balance_fair, 2111 .load_balance = load_balance_fair,
2054 .move_one_task = move_one_task_fair, 2112 .move_one_task = move_one_task_fair,
2113 .rq_online = rq_online_fair,
2114 .rq_offline = rq_offline_fair,
2115
2116 .task_waking = task_waking_fair,
2055#endif 2117#endif
2056 2118
2057 .set_curr_task = set_curr_task_fair, 2119 .set_curr_task = set_curr_task_fair,
2058 .task_tick = task_tick_fair, 2120 .task_tick = task_tick_fair,
2059 .task_new = task_new_fair, 2121 .task_fork = task_fork_fair,
2060 2122
2061 .prio_changed = prio_changed_fair, 2123 .prio_changed = prio_changed_fair,
2062 .switched_to = switched_to_fair, 2124 .switched_to = switched_to_fair,
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 0d94083582c7..d5059fd761d9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
54SCHED_FEAT(WAKEUP_OVERLAP, 0) 54SCHED_FEAT(WAKEUP_OVERLAP, 0)
55 55
56/* 56/*
57 * Wakeup preemption towards tasks that run short
58 */
59SCHED_FEAT(WAKEUP_RUNNING, 0)
60
61/*
62 * Use the SYNC wakeup hint, pipes and the likes use this to indicate 57 * Use the SYNC wakeup hint, pipes and the likes use this to indicate
63 * the remote end is likely to consume the data we just wrote, and 58 * the remote end is likely to consume the data we just wrote, and
64 * therefore has cache benefit from being placed on the same cpu, see 59 * therefore has cache benefit from being placed on the same cpu, see
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index b133a28fcde3..21b969a28725 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -34,10 +34,10 @@ static struct task_struct *pick_next_task_idle(struct rq *rq)
34static void 34static void
35dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) 35dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
36{ 36{
37 spin_unlock_irq(&rq->lock); 37 raw_spin_unlock_irq(&rq->lock);
38 printk(KERN_ERR "bad: scheduling from the idle thread!\n"); 38 pr_err("bad: scheduling from the idle thread!\n");
39 dump_stack(); 39 dump_stack();
40 spin_lock_irq(&rq->lock); 40 raw_spin_lock_irq(&rq->lock);
41} 41}
42 42
43static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) 43static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
97 check_preempt_curr(rq, p, 0); 97 check_preempt_curr(rq, p, 0);
98} 98}
99 99
100unsigned int get_rr_interval_idle(struct task_struct *task) 100unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
101{ 101{
102 return 0; 102 return 0;
103} 103}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5c5fef378415..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -327,7 +327,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
327 327
328 weight = cpumask_weight(rd->span); 328 weight = cpumask_weight(rd->span);
329 329
330 spin_lock(&rt_b->rt_runtime_lock); 330 raw_spin_lock(&rt_b->rt_runtime_lock);
331 rt_period = ktime_to_ns(rt_b->rt_period); 331 rt_period = ktime_to_ns(rt_b->rt_period);
332 for_each_cpu(i, rd->span) { 332 for_each_cpu(i, rd->span) {
333 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 333 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
@@ -336,7 +336,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
336 if (iter == rt_rq) 336 if (iter == rt_rq)
337 continue; 337 continue;
338 338
339 spin_lock(&iter->rt_runtime_lock); 339 raw_spin_lock(&iter->rt_runtime_lock);
340 /* 340 /*
341 * Either all rqs have inf runtime and there's nothing to steal 341 * Either all rqs have inf runtime and there's nothing to steal
342 * or __disable_runtime() below sets a specific rq to inf to 342 * or __disable_runtime() below sets a specific rq to inf to
@@ -358,14 +358,14 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
358 rt_rq->rt_runtime += diff; 358 rt_rq->rt_runtime += diff;
359 more = 1; 359 more = 1;
360 if (rt_rq->rt_runtime == rt_period) { 360 if (rt_rq->rt_runtime == rt_period) {
361 spin_unlock(&iter->rt_runtime_lock); 361 raw_spin_unlock(&iter->rt_runtime_lock);
362 break; 362 break;
363 } 363 }
364 } 364 }
365next: 365next:
366 spin_unlock(&iter->rt_runtime_lock); 366 raw_spin_unlock(&iter->rt_runtime_lock);
367 } 367 }
368 spin_unlock(&rt_b->rt_runtime_lock); 368 raw_spin_unlock(&rt_b->rt_runtime_lock);
369 369
370 return more; 370 return more;
371} 371}
@@ -386,8 +386,8 @@ static void __disable_runtime(struct rq *rq)
386 s64 want; 386 s64 want;
387 int i; 387 int i;
388 388
389 spin_lock(&rt_b->rt_runtime_lock); 389 raw_spin_lock(&rt_b->rt_runtime_lock);
390 spin_lock(&rt_rq->rt_runtime_lock); 390 raw_spin_lock(&rt_rq->rt_runtime_lock);
391 /* 391 /*
392 * Either we're all inf and nobody needs to borrow, or we're 392 * Either we're all inf and nobody needs to borrow, or we're
393 * already disabled and thus have nothing to do, or we have 393 * already disabled and thus have nothing to do, or we have
@@ -396,7 +396,7 @@ static void __disable_runtime(struct rq *rq)
396 if (rt_rq->rt_runtime == RUNTIME_INF || 396 if (rt_rq->rt_runtime == RUNTIME_INF ||
397 rt_rq->rt_runtime == rt_b->rt_runtime) 397 rt_rq->rt_runtime == rt_b->rt_runtime)
398 goto balanced; 398 goto balanced;
399 spin_unlock(&rt_rq->rt_runtime_lock); 399 raw_spin_unlock(&rt_rq->rt_runtime_lock);
400 400
401 /* 401 /*
402 * Calculate the difference between what we started out with 402 * Calculate the difference between what we started out with
@@ -418,7 +418,7 @@ static void __disable_runtime(struct rq *rq)
418 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 418 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
419 continue; 419 continue;
420 420
421 spin_lock(&iter->rt_runtime_lock); 421 raw_spin_lock(&iter->rt_runtime_lock);
422 if (want > 0) { 422 if (want > 0) {
423 diff = min_t(s64, iter->rt_runtime, want); 423 diff = min_t(s64, iter->rt_runtime, want);
424 iter->rt_runtime -= diff; 424 iter->rt_runtime -= diff;
@@ -427,13 +427,13 @@ static void __disable_runtime(struct rq *rq)
427 iter->rt_runtime -= want; 427 iter->rt_runtime -= want;
428 want -= want; 428 want -= want;
429 } 429 }
430 spin_unlock(&iter->rt_runtime_lock); 430 raw_spin_unlock(&iter->rt_runtime_lock);
431 431
432 if (!want) 432 if (!want)
433 break; 433 break;
434 } 434 }
435 435
436 spin_lock(&rt_rq->rt_runtime_lock); 436 raw_spin_lock(&rt_rq->rt_runtime_lock);
437 /* 437 /*
438 * We cannot be left wanting - that would mean some runtime 438 * We cannot be left wanting - that would mean some runtime
439 * leaked out of the system. 439 * leaked out of the system.
@@ -445,8 +445,8 @@ balanced:
445 * runtime - in which case borrowing doesn't make sense. 445 * runtime - in which case borrowing doesn't make sense.
446 */ 446 */
447 rt_rq->rt_runtime = RUNTIME_INF; 447 rt_rq->rt_runtime = RUNTIME_INF;
448 spin_unlock(&rt_rq->rt_runtime_lock); 448 raw_spin_unlock(&rt_rq->rt_runtime_lock);
449 spin_unlock(&rt_b->rt_runtime_lock); 449 raw_spin_unlock(&rt_b->rt_runtime_lock);
450 } 450 }
451} 451}
452 452
@@ -454,9 +454,9 @@ static void disable_runtime(struct rq *rq)
454{ 454{
455 unsigned long flags; 455 unsigned long flags;
456 456
457 spin_lock_irqsave(&rq->lock, flags); 457 raw_spin_lock_irqsave(&rq->lock, flags);
458 __disable_runtime(rq); 458 __disable_runtime(rq);
459 spin_unlock_irqrestore(&rq->lock, flags); 459 raw_spin_unlock_irqrestore(&rq->lock, flags);
460} 460}
461 461
462static void __enable_runtime(struct rq *rq) 462static void __enable_runtime(struct rq *rq)
@@ -472,13 +472,13 @@ static void __enable_runtime(struct rq *rq)
472 for_each_leaf_rt_rq(rt_rq, rq) { 472 for_each_leaf_rt_rq(rt_rq, rq) {
473 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 473 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
474 474
475 spin_lock(&rt_b->rt_runtime_lock); 475 raw_spin_lock(&rt_b->rt_runtime_lock);
476 spin_lock(&rt_rq->rt_runtime_lock); 476 raw_spin_lock(&rt_rq->rt_runtime_lock);
477 rt_rq->rt_runtime = rt_b->rt_runtime; 477 rt_rq->rt_runtime = rt_b->rt_runtime;
478 rt_rq->rt_time = 0; 478 rt_rq->rt_time = 0;
479 rt_rq->rt_throttled = 0; 479 rt_rq->rt_throttled = 0;
480 spin_unlock(&rt_rq->rt_runtime_lock); 480 raw_spin_unlock(&rt_rq->rt_runtime_lock);
481 spin_unlock(&rt_b->rt_runtime_lock); 481 raw_spin_unlock(&rt_b->rt_runtime_lock);
482 } 482 }
483} 483}
484 484
@@ -486,9 +486,9 @@ static void enable_runtime(struct rq *rq)
486{ 486{
487 unsigned long flags; 487 unsigned long flags;
488 488
489 spin_lock_irqsave(&rq->lock, flags); 489 raw_spin_lock_irqsave(&rq->lock, flags);
490 __enable_runtime(rq); 490 __enable_runtime(rq);
491 spin_unlock_irqrestore(&rq->lock, flags); 491 raw_spin_unlock_irqrestore(&rq->lock, flags);
492} 492}
493 493
494static int balance_runtime(struct rt_rq *rt_rq) 494static int balance_runtime(struct rt_rq *rt_rq)
@@ -496,9 +496,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
496 int more = 0; 496 int more = 0;
497 497
498 if (rt_rq->rt_time > rt_rq->rt_runtime) { 498 if (rt_rq->rt_time > rt_rq->rt_runtime) {
499 spin_unlock(&rt_rq->rt_runtime_lock); 499 raw_spin_unlock(&rt_rq->rt_runtime_lock);
500 more = do_balance_runtime(rt_rq); 500 more = do_balance_runtime(rt_rq);
501 spin_lock(&rt_rq->rt_runtime_lock); 501 raw_spin_lock(&rt_rq->rt_runtime_lock);
502 } 502 }
503 503
504 return more; 504 return more;
@@ -524,11 +524,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
524 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 524 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
525 struct rq *rq = rq_of_rt_rq(rt_rq); 525 struct rq *rq = rq_of_rt_rq(rt_rq);
526 526
527 spin_lock(&rq->lock); 527 raw_spin_lock(&rq->lock);
528 if (rt_rq->rt_time) { 528 if (rt_rq->rt_time) {
529 u64 runtime; 529 u64 runtime;
530 530
531 spin_lock(&rt_rq->rt_runtime_lock); 531 raw_spin_lock(&rt_rq->rt_runtime_lock);
532 if (rt_rq->rt_throttled) 532 if (rt_rq->rt_throttled)
533 balance_runtime(rt_rq); 533 balance_runtime(rt_rq);
534 runtime = rt_rq->rt_runtime; 534 runtime = rt_rq->rt_runtime;
@@ -539,13 +539,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
539 } 539 }
540 if (rt_rq->rt_time || rt_rq->rt_nr_running) 540 if (rt_rq->rt_time || rt_rq->rt_nr_running)
541 idle = 0; 541 idle = 0;
542 spin_unlock(&rt_rq->rt_runtime_lock); 542 raw_spin_unlock(&rt_rq->rt_runtime_lock);
543 } else if (rt_rq->rt_nr_running) 543 } else if (rt_rq->rt_nr_running)
544 idle = 0; 544 idle = 0;
545 545
546 if (enqueue) 546 if (enqueue)
547 sched_rt_rq_enqueue(rt_rq); 547 sched_rt_rq_enqueue(rt_rq);
548 spin_unlock(&rq->lock); 548 raw_spin_unlock(&rq->lock);
549 } 549 }
550 550
551 return idle; 551 return idle;
@@ -624,11 +624,11 @@ static void update_curr_rt(struct rq *rq)
624 rt_rq = rt_rq_of_se(rt_se); 624 rt_rq = rt_rq_of_se(rt_se);
625 625
626 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 626 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
627 spin_lock(&rt_rq->rt_runtime_lock); 627 raw_spin_lock(&rt_rq->rt_runtime_lock);
628 rt_rq->rt_time += delta_exec; 628 rt_rq->rt_time += delta_exec;
629 if (sched_rt_runtime_exceeded(rt_rq)) 629 if (sched_rt_runtime_exceeded(rt_rq))
630 resched_task(curr); 630 resched_task(curr);
631 spin_unlock(&rt_rq->rt_runtime_lock); 631 raw_spin_unlock(&rt_rq->rt_runtime_lock);
632 } 632 }
633 } 633 }
634} 634}
@@ -1246,7 +1246,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1246 task_running(rq, task) || 1246 task_running(rq, task) ||
1247 !task->se.on_rq)) { 1247 !task->se.on_rq)) {
1248 1248
1249 spin_unlock(&lowest_rq->lock); 1249 raw_spin_unlock(&lowest_rq->lock);
1250 lowest_rq = NULL; 1250 lowest_rq = NULL;
1251 break; 1251 break;
1252 } 1252 }
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
1472 * If we are not running and we are not going to reschedule soon, we should 1472 * If we are not running and we are not going to reschedule soon, we should
1473 * try to push tasks away now 1473 * try to push tasks away now
1474 */ 1474 */
1475static void task_wake_up_rt(struct rq *rq, struct task_struct *p) 1475static void task_woken_rt(struct rq *rq, struct task_struct *p)
1476{ 1476{
1477 if (!task_running(rq, p) && 1477 if (!task_running(rq, p) &&
1478 !test_tsk_need_resched(rq->curr) && 1478 !test_tsk_need_resched(rq->curr) &&
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq)
1721 dequeue_pushable_task(rq, p); 1721 dequeue_pushable_task(rq, p);
1722} 1722}
1723 1723
1724unsigned int get_rr_interval_rt(struct task_struct *task) 1724unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
1725{ 1725{
1726 /* 1726 /*
1727 * Time slice is 0 for SCHED_FIFO tasks 1727 * Time slice is 0 for SCHED_FIFO tasks
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
1753 .rq_offline = rq_offline_rt, 1753 .rq_offline = rq_offline_rt,
1754 .pre_schedule = pre_schedule_rt, 1754 .pre_schedule = pre_schedule_rt,
1755 .post_schedule = post_schedule_rt, 1755 .post_schedule = post_schedule_rt,
1756 .task_wake_up = task_wake_up_rt, 1756 .task_woken = task_woken_rt,
1757 .switched_from = switched_from_rt, 1757 .switched_from = switched_from_rt,
1758#endif 1758#endif
1759 1759
diff --git a/kernel/signal.c b/kernel/signal.c
index 6b982f2cf524..d09692b40376 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
218 struct user_struct *user; 218 struct user_struct *user;
219 219
220 /* 220 /*
221 * We won't get problems with the target's UID changing under us 221 * Protect access to @t credentials. This can go away when all
222 * because changing it requires RCU be used, and if t != current, the 222 * callers hold rcu read lock.
223 * caller must be holding the RCU readlock (by way of a spinlock) and
224 * we use RCU protection here
225 */ 223 */
224 rcu_read_lock();
226 user = get_uid(__task_cred(t)->user); 225 user = get_uid(__task_cred(t)->user);
227 atomic_inc(&user->sigpending); 226 atomic_inc(&user->sigpending);
227 rcu_read_unlock();
228 228
229 if (override_rlimit || 229 if (override_rlimit ||
230 atomic_read(&user->sigpending) <= 230 atomic_read(&user->sigpending) <=
@@ -423,7 +423,7 @@ still_pending:
423 */ 423 */
424 info->si_signo = sig; 424 info->si_signo = sig;
425 info->si_errno = 0; 425 info->si_errno = 0;
426 info->si_code = 0; 426 info->si_code = SI_USER;
427 info->si_pid = 0; 427 info->si_pid = 0;
428 info->si_uid = 0; 428 info->si_uid = 0;
429 } 429 }
@@ -607,6 +607,17 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
607 return 1; 607 return 1;
608} 608}
609 609
610static inline int is_si_special(const struct siginfo *info)
611{
612 return info <= SEND_SIG_FORCED;
613}
614
615static inline bool si_fromuser(const struct siginfo *info)
616{
617 return info == SEND_SIG_NOINFO ||
618 (!is_si_special(info) && SI_FROMUSER(info));
619}
620
610/* 621/*
611 * Bad permissions for sending the signal 622 * Bad permissions for sending the signal
612 * - the caller must hold at least the RCU read lock 623 * - the caller must hold at least the RCU read lock
@@ -621,7 +632,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
621 if (!valid_signal(sig)) 632 if (!valid_signal(sig))
622 return -EINVAL; 633 return -EINVAL;
623 634
624 if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info))) 635 if (!si_fromuser(info))
625 return 0; 636 return 0;
626 637
627 error = audit_signal_info(sig, t); /* Let audit system see the signal */ 638 error = audit_signal_info(sig, t); /* Let audit system see the signal */
@@ -949,9 +960,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
949 int from_ancestor_ns = 0; 960 int from_ancestor_ns = 0;
950 961
951#ifdef CONFIG_PID_NS 962#ifdef CONFIG_PID_NS
952 if (!is_si_special(info) && SI_FROMUSER(info) && 963 from_ancestor_ns = si_fromuser(info) &&
953 task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0) 964 !task_pid_nr_ns(current, task_active_pid_ns(t));
954 from_ancestor_ns = 1;
955#endif 965#endif
956 966
957 return __send_signal(sig, info, t, group, from_ancestor_ns); 967 return __send_signal(sig, info, t, group, from_ancestor_ns);
@@ -1052,12 +1062,6 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
1052 return ret; 1062 return ret;
1053} 1063}
1054 1064
1055void
1056force_sig_specific(int sig, struct task_struct *t)
1057{
1058 force_sig_info(sig, SEND_SIG_FORCED, t);
1059}
1060
1061/* 1065/*
1062 * Nuke all other threads in the group. 1066 * Nuke all other threads in the group.
1063 */ 1067 */
@@ -1175,19 +1179,19 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1175 int ret = -EINVAL; 1179 int ret = -EINVAL;
1176 struct task_struct *p; 1180 struct task_struct *p;
1177 const struct cred *pcred; 1181 const struct cred *pcred;
1182 unsigned long flags;
1178 1183
1179 if (!valid_signal(sig)) 1184 if (!valid_signal(sig))
1180 return ret; 1185 return ret;
1181 1186
1182 read_lock(&tasklist_lock); 1187 rcu_read_lock();
1183 p = pid_task(pid, PIDTYPE_PID); 1188 p = pid_task(pid, PIDTYPE_PID);
1184 if (!p) { 1189 if (!p) {
1185 ret = -ESRCH; 1190 ret = -ESRCH;
1186 goto out_unlock; 1191 goto out_unlock;
1187 } 1192 }
1188 pcred = __task_cred(p); 1193 pcred = __task_cred(p);
1189 if ((info == SEND_SIG_NOINFO || 1194 if (si_fromuser(info) &&
1190 (!is_si_special(info) && SI_FROMUSER(info))) &&
1191 euid != pcred->suid && euid != pcred->uid && 1195 euid != pcred->suid && euid != pcred->uid &&
1192 uid != pcred->suid && uid != pcred->uid) { 1196 uid != pcred->suid && uid != pcred->uid) {
1193 ret = -EPERM; 1197 ret = -EPERM;
@@ -1196,14 +1200,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1196 ret = security_task_kill(p, info, sig, secid); 1200 ret = security_task_kill(p, info, sig, secid);
1197 if (ret) 1201 if (ret)
1198 goto out_unlock; 1202 goto out_unlock;
1199 if (sig && p->sighand) { 1203
1200 unsigned long flags; 1204 if (sig) {
1201 spin_lock_irqsave(&p->sighand->siglock, flags); 1205 if (lock_task_sighand(p, &flags)) {
1202 ret = __send_signal(sig, info, p, 1, 0); 1206 ret = __send_signal(sig, info, p, 1, 0);
1203 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1207 unlock_task_sighand(p, &flags);
1208 } else
1209 ret = -ESRCH;
1204 } 1210 }
1205out_unlock: 1211out_unlock:
1206 read_unlock(&tasklist_lock); 1212 rcu_read_unlock();
1207 return ret; 1213 return ret;
1208} 1214}
1209EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); 1215EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
@@ -1837,11 +1843,6 @@ relock:
1837 1843
1838 for (;;) { 1844 for (;;) {
1839 struct k_sigaction *ka; 1845 struct k_sigaction *ka;
1840
1841 if (unlikely(signal->group_stop_count > 0) &&
1842 do_signal_stop(0))
1843 goto relock;
1844
1845 /* 1846 /*
1846 * Tracing can induce an artifical signal and choose sigaction. 1847 * Tracing can induce an artifical signal and choose sigaction.
1847 * The return value in @signr determines the default action, 1848 * The return value in @signr determines the default action,
@@ -1853,6 +1854,10 @@ relock:
1853 if (unlikely(signr != 0)) 1854 if (unlikely(signr != 0))
1854 ka = return_ka; 1855 ka = return_ka;
1855 else { 1856 else {
1857 if (unlikely(signal->group_stop_count > 0) &&
1858 do_signal_stop(0))
1859 goto relock;
1860
1856 signr = dequeue_signal(current, &current->blocked, 1861 signr = dequeue_signal(current, &current->blocked,
1857 info); 1862 info);
1858 1863
diff --git a/kernel/smp.c b/kernel/smp.c
index a8c76069cf50..de735a6637d0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -16,11 +16,11 @@ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
16 16
17static struct { 17static struct {
18 struct list_head queue; 18 struct list_head queue;
19 spinlock_t lock; 19 raw_spinlock_t lock;
20} call_function __cacheline_aligned_in_smp = 20} call_function __cacheline_aligned_in_smp =
21 { 21 {
22 .queue = LIST_HEAD_INIT(call_function.queue), 22 .queue = LIST_HEAD_INIT(call_function.queue),
23 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), 23 .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
24 }; 24 };
25 25
26enum { 26enum {
@@ -35,7 +35,7 @@ struct call_function_data {
35 35
36struct call_single_queue { 36struct call_single_queue {
37 struct list_head list; 37 struct list_head list;
38 spinlock_t lock; 38 raw_spinlock_t lock;
39}; 39};
40 40
41static DEFINE_PER_CPU(struct call_function_data, cfd_data); 41static DEFINE_PER_CPU(struct call_function_data, cfd_data);
@@ -80,7 +80,7 @@ static int __cpuinit init_call_single_data(void)
80 for_each_possible_cpu(i) { 80 for_each_possible_cpu(i) {
81 struct call_single_queue *q = &per_cpu(call_single_queue, i); 81 struct call_single_queue *q = &per_cpu(call_single_queue, i);
82 82
83 spin_lock_init(&q->lock); 83 raw_spin_lock_init(&q->lock);
84 INIT_LIST_HEAD(&q->list); 84 INIT_LIST_HEAD(&q->list);
85 } 85 }
86 86
@@ -141,10 +141,10 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
141 unsigned long flags; 141 unsigned long flags;
142 int ipi; 142 int ipi;
143 143
144 spin_lock_irqsave(&dst->lock, flags); 144 raw_spin_lock_irqsave(&dst->lock, flags);
145 ipi = list_empty(&dst->list); 145 ipi = list_empty(&dst->list);
146 list_add_tail(&data->list, &dst->list); 146 list_add_tail(&data->list, &dst->list);
147 spin_unlock_irqrestore(&dst->lock, flags); 147 raw_spin_unlock_irqrestore(&dst->lock, flags);
148 148
149 /* 149 /*
150 * The list addition should be visible before sending the IPI 150 * The list addition should be visible before sending the IPI
@@ -171,7 +171,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
171void generic_smp_call_function_interrupt(void) 171void generic_smp_call_function_interrupt(void)
172{ 172{
173 struct call_function_data *data; 173 struct call_function_data *data;
174 int cpu = get_cpu(); 174 int cpu = smp_processor_id();
175 175
176 /* 176 /*
177 * Shouldn't receive this interrupt on a cpu that is not yet online. 177 * Shouldn't receive this interrupt on a cpu that is not yet online.
@@ -201,9 +201,9 @@ void generic_smp_call_function_interrupt(void)
201 refs = atomic_dec_return(&data->refs); 201 refs = atomic_dec_return(&data->refs);
202 WARN_ON(refs < 0); 202 WARN_ON(refs < 0);
203 if (!refs) { 203 if (!refs) {
204 spin_lock(&call_function.lock); 204 raw_spin_lock(&call_function.lock);
205 list_del_rcu(&data->csd.list); 205 list_del_rcu(&data->csd.list);
206 spin_unlock(&call_function.lock); 206 raw_spin_unlock(&call_function.lock);
207 } 207 }
208 208
209 if (refs) 209 if (refs)
@@ -212,7 +212,6 @@ void generic_smp_call_function_interrupt(void)
212 csd_unlock(&data->csd); 212 csd_unlock(&data->csd);
213 } 213 }
214 214
215 put_cpu();
216} 215}
217 216
218/* 217/*
@@ -230,9 +229,9 @@ void generic_smp_call_function_single_interrupt(void)
230 */ 229 */
231 WARN_ON_ONCE(!cpu_online(smp_processor_id())); 230 WARN_ON_ONCE(!cpu_online(smp_processor_id()));
232 231
233 spin_lock(&q->lock); 232 raw_spin_lock(&q->lock);
234 list_replace_init(&q->list, &list); 233 list_replace_init(&q->list, &list);
235 spin_unlock(&q->lock); 234 raw_spin_unlock(&q->lock);
236 235
237 while (!list_empty(&list)) { 236 while (!list_empty(&list)) {
238 struct call_single_data *data; 237 struct call_single_data *data;
@@ -449,14 +448,14 @@ void smp_call_function_many(const struct cpumask *mask,
449 cpumask_clear_cpu(this_cpu, data->cpumask); 448 cpumask_clear_cpu(this_cpu, data->cpumask);
450 atomic_set(&data->refs, cpumask_weight(data->cpumask)); 449 atomic_set(&data->refs, cpumask_weight(data->cpumask));
451 450
452 spin_lock_irqsave(&call_function.lock, flags); 451 raw_spin_lock_irqsave(&call_function.lock, flags);
453 /* 452 /*
454 * Place entry at the _HEAD_ of the list, so that any cpu still 453 * Place entry at the _HEAD_ of the list, so that any cpu still
455 * observing the entry in generic_smp_call_function_interrupt() 454 * observing the entry in generic_smp_call_function_interrupt()
456 * will not miss any other list entries: 455 * will not miss any other list entries:
457 */ 456 */
458 list_add_rcu(&data->csd.list, &call_function.queue); 457 list_add_rcu(&data->csd.list, &call_function.queue);
459 spin_unlock_irqrestore(&call_function.lock, flags); 458 raw_spin_unlock_irqrestore(&call_function.lock, flags);
460 459
461 /* 460 /*
462 * Make the list addition visible before sending the ipi. 461 * Make the list addition visible before sending the ipi.
@@ -501,20 +500,20 @@ EXPORT_SYMBOL(smp_call_function);
501 500
502void ipi_call_lock(void) 501void ipi_call_lock(void)
503{ 502{
504 spin_lock(&call_function.lock); 503 raw_spin_lock(&call_function.lock);
505} 504}
506 505
507void ipi_call_unlock(void) 506void ipi_call_unlock(void)
508{ 507{
509 spin_unlock(&call_function.lock); 508 raw_spin_unlock(&call_function.lock);
510} 509}
511 510
512void ipi_call_lock_irq(void) 511void ipi_call_lock_irq(void)
513{ 512{
514 spin_lock_irq(&call_function.lock); 513 raw_spin_lock_irq(&call_function.lock);
515} 514}
516 515
517void ipi_call_unlock_irq(void) 516void ipi_call_unlock_irq(void)
518{ 517{
519 spin_unlock_irq(&call_function.lock); 518 raw_spin_unlock_irq(&call_function.lock);
520} 519}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 21939d9e830e..a09502e2ef75 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -697,7 +697,7 @@ void __init softirq_init(void)
697 open_softirq(HI_SOFTIRQ, tasklet_hi_action); 697 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
698} 698}
699 699
700static int ksoftirqd(void * __bind_cpu) 700static int run_ksoftirqd(void * __bind_cpu)
701{ 701{
702 set_current_state(TASK_INTERRUPTIBLE); 702 set_current_state(TASK_INTERRUPTIBLE);
703 703
@@ -810,7 +810,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
810 switch (action) { 810 switch (action) {
811 case CPU_UP_PREPARE: 811 case CPU_UP_PREPARE:
812 case CPU_UP_PREPARE_FROZEN: 812 case CPU_UP_PREPARE_FROZEN:
813 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); 813 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
814 if (IS_ERR(p)) { 814 if (IS_ERR(p)) {
815 printk("ksoftirqd for %i failed\n", hotcpu); 815 printk("ksoftirqd for %i failed\n", hotcpu);
816 return NOTIFY_BAD; 816 return NOTIFY_BAD;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 81324d12eb35..d22579087e27 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -22,9 +22,9 @@
22 22
23static DEFINE_SPINLOCK(print_lock); 23static DEFINE_SPINLOCK(print_lock);
24 24
25static DEFINE_PER_CPU(unsigned long, touch_timestamp); 25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
26static DEFINE_PER_CPU(unsigned long, print_timestamp); 26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
27static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
28 28
29static int __read_mostly did_panic; 29static int __read_mostly did_panic;
30int __read_mostly softlockup_thresh = 60; 30int __read_mostly softlockup_thresh = 60;
@@ -70,12 +70,12 @@ static void __touch_softlockup_watchdog(void)
70{ 70{
71 int this_cpu = raw_smp_processor_id(); 71 int this_cpu = raw_smp_processor_id();
72 72
73 __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); 73 __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
74} 74}
75 75
76void touch_softlockup_watchdog(void) 76void touch_softlockup_watchdog(void)
77{ 77{
78 __raw_get_cpu_var(touch_timestamp) = 0; 78 __raw_get_cpu_var(softlockup_touch_ts) = 0;
79} 79}
80EXPORT_SYMBOL(touch_softlockup_watchdog); 80EXPORT_SYMBOL(touch_softlockup_watchdog);
81 81
@@ -85,7 +85,7 @@ void touch_all_softlockup_watchdogs(void)
85 85
86 /* Cause each CPU to re-update its timestamp rather than complain */ 86 /* Cause each CPU to re-update its timestamp rather than complain */
87 for_each_online_cpu(cpu) 87 for_each_online_cpu(cpu)
88 per_cpu(touch_timestamp, cpu) = 0; 88 per_cpu(softlockup_touch_ts, cpu) = 0;
89} 89}
90EXPORT_SYMBOL(touch_all_softlockup_watchdogs); 90EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
91 91
@@ -104,28 +104,28 @@ int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
104void softlockup_tick(void) 104void softlockup_tick(void)
105{ 105{
106 int this_cpu = smp_processor_id(); 106 int this_cpu = smp_processor_id();
107 unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); 107 unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
108 unsigned long print_timestamp; 108 unsigned long print_ts;
109 struct pt_regs *regs = get_irq_regs(); 109 struct pt_regs *regs = get_irq_regs();
110 unsigned long now; 110 unsigned long now;
111 111
112 /* Is detection switched off? */ 112 /* Is detection switched off? */
113 if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) { 113 if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
114 /* Be sure we don't false trigger if switched back on */ 114 /* Be sure we don't false trigger if switched back on */
115 if (touch_timestamp) 115 if (touch_ts)
116 per_cpu(touch_timestamp, this_cpu) = 0; 116 per_cpu(softlockup_touch_ts, this_cpu) = 0;
117 return; 117 return;
118 } 118 }
119 119
120 if (touch_timestamp == 0) { 120 if (touch_ts == 0) {
121 __touch_softlockup_watchdog(); 121 __touch_softlockup_watchdog();
122 return; 122 return;
123 } 123 }
124 124
125 print_timestamp = per_cpu(print_timestamp, this_cpu); 125 print_ts = per_cpu(softlockup_print_ts, this_cpu);
126 126
127 /* report at most once a second */ 127 /* report at most once a second */
128 if (print_timestamp == touch_timestamp || did_panic) 128 if (print_ts == touch_ts || did_panic)
129 return; 129 return;
130 130
131 /* do not print during early bootup: */ 131 /* do not print during early bootup: */
@@ -140,18 +140,18 @@ void softlockup_tick(void)
140 * Wake up the high-prio watchdog task twice per 140 * Wake up the high-prio watchdog task twice per
141 * threshold timespan. 141 * threshold timespan.
142 */ 142 */
143 if (now > touch_timestamp + softlockup_thresh/2) 143 if (now > touch_ts + softlockup_thresh/2)
144 wake_up_process(per_cpu(watchdog_task, this_cpu)); 144 wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
145 145
146 /* Warn about unreasonable delays: */ 146 /* Warn about unreasonable delays: */
147 if (now <= (touch_timestamp + softlockup_thresh)) 147 if (now <= (touch_ts + softlockup_thresh))
148 return; 148 return;
149 149
150 per_cpu(print_timestamp, this_cpu) = touch_timestamp; 150 per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
151 151
152 spin_lock(&print_lock); 152 spin_lock(&print_lock);
153 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", 153 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
154 this_cpu, now - touch_timestamp, 154 this_cpu, now - touch_ts,
155 current->comm, task_pid_nr(current)); 155 current->comm, task_pid_nr(current));
156 print_modules(); 156 print_modules();
157 print_irqtrace_events(current); 157 print_irqtrace_events(current);
@@ -209,32 +209,32 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
209 switch (action) { 209 switch (action) {
210 case CPU_UP_PREPARE: 210 case CPU_UP_PREPARE:
211 case CPU_UP_PREPARE_FROZEN: 211 case CPU_UP_PREPARE_FROZEN:
212 BUG_ON(per_cpu(watchdog_task, hotcpu)); 212 BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
213 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); 213 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
214 if (IS_ERR(p)) { 214 if (IS_ERR(p)) {
215 printk(KERN_ERR "watchdog for %i failed\n", hotcpu); 215 printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
216 return NOTIFY_BAD; 216 return NOTIFY_BAD;
217 } 217 }
218 per_cpu(touch_timestamp, hotcpu) = 0; 218 per_cpu(softlockup_touch_ts, hotcpu) = 0;
219 per_cpu(watchdog_task, hotcpu) = p; 219 per_cpu(softlockup_watchdog, hotcpu) = p;
220 kthread_bind(p, hotcpu); 220 kthread_bind(p, hotcpu);
221 break; 221 break;
222 case CPU_ONLINE: 222 case CPU_ONLINE:
223 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
224 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
225 break; 225 break;
226#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
227 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
228 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
229 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(softlockup_watchdog, hotcpu))
230 break; 230 break;
231 /* Unbind so it can run. Fall thru. */ 231 /* Unbind so it can run. Fall thru. */
232 kthread_bind(per_cpu(watchdog_task, hotcpu), 232 kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
233 cpumask_any(cpu_online_mask)); 233 cpumask_any(cpu_online_mask));
234 case CPU_DEAD: 234 case CPU_DEAD:
235 case CPU_DEAD_FROZEN: 235 case CPU_DEAD_FROZEN:
236 p = per_cpu(watchdog_task, hotcpu); 236 p = per_cpu(softlockup_watchdog, hotcpu);
237 per_cpu(watchdog_task, hotcpu) = NULL; 237 per_cpu(softlockup_watchdog, hotcpu) = NULL;
238 kthread_stop(p); 238 kthread_stop(p);
239 break; 239 break;
240#endif /* CONFIG_HOTPLUG_CPU */ 240#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 41e042219ff6..be6517fb9c14 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -32,6 +32,8 @@
32 * include/linux/spinlock_api_smp.h 32 * include/linux/spinlock_api_smp.h
33 */ 33 */
34#else 34#else
35#define raw_read_can_lock(l) read_can_lock(l)
36#define raw_write_can_lock(l) write_can_lock(l)
35/* 37/*
36 * We build the __lock_function inlines here. They are too large for 38 * We build the __lock_function inlines here. They are too large for
37 * inlining all over the place, but here is only one user per function 39 * inlining all over the place, but here is only one user per function
@@ -42,49 +44,49 @@
42 * towards that other CPU that it should break the lock ASAP. 44 * towards that other CPU that it should break the lock ASAP.
43 */ 45 */
44#define BUILD_LOCK_OPS(op, locktype) \ 46#define BUILD_LOCK_OPS(op, locktype) \
45void __lockfunc __##op##_lock(locktype##_t *lock) \ 47void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
46{ \ 48{ \
47 for (;;) { \ 49 for (;;) { \
48 preempt_disable(); \ 50 preempt_disable(); \
49 if (likely(_raw_##op##_trylock(lock))) \ 51 if (likely(do_raw_##op##_trylock(lock))) \
50 break; \ 52 break; \
51 preempt_enable(); \ 53 preempt_enable(); \
52 \ 54 \
53 if (!(lock)->break_lock) \ 55 if (!(lock)->break_lock) \
54 (lock)->break_lock = 1; \ 56 (lock)->break_lock = 1; \
55 while (!op##_can_lock(lock) && (lock)->break_lock) \ 57 while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
56 _raw_##op##_relax(&lock->raw_lock); \ 58 arch_##op##_relax(&lock->raw_lock); \
57 } \ 59 } \
58 (lock)->break_lock = 0; \ 60 (lock)->break_lock = 0; \
59} \ 61} \
60 \ 62 \
61unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \ 63unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
62{ \ 64{ \
63 unsigned long flags; \ 65 unsigned long flags; \
64 \ 66 \
65 for (;;) { \ 67 for (;;) { \
66 preempt_disable(); \ 68 preempt_disable(); \
67 local_irq_save(flags); \ 69 local_irq_save(flags); \
68 if (likely(_raw_##op##_trylock(lock))) \ 70 if (likely(do_raw_##op##_trylock(lock))) \
69 break; \ 71 break; \
70 local_irq_restore(flags); \ 72 local_irq_restore(flags); \
71 preempt_enable(); \ 73 preempt_enable(); \
72 \ 74 \
73 if (!(lock)->break_lock) \ 75 if (!(lock)->break_lock) \
74 (lock)->break_lock = 1; \ 76 (lock)->break_lock = 1; \
75 while (!op##_can_lock(lock) && (lock)->break_lock) \ 77 while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
76 _raw_##op##_relax(&lock->raw_lock); \ 78 arch_##op##_relax(&lock->raw_lock); \
77 } \ 79 } \
78 (lock)->break_lock = 0; \ 80 (lock)->break_lock = 0; \
79 return flags; \ 81 return flags; \
80} \ 82} \
81 \ 83 \
82void __lockfunc __##op##_lock_irq(locktype##_t *lock) \ 84void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \
83{ \ 85{ \
84 _##op##_lock_irqsave(lock); \ 86 _raw_##op##_lock_irqsave(lock); \
85} \ 87} \
86 \ 88 \
87void __lockfunc __##op##_lock_bh(locktype##_t *lock) \ 89void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
88{ \ 90{ \
89 unsigned long flags; \ 91 unsigned long flags; \
90 \ 92 \
@@ -93,7 +95,7 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
93 /* irq-disabling. We use the generic preemption-aware */ \ 95 /* irq-disabling. We use the generic preemption-aware */ \
94 /* function: */ \ 96 /* function: */ \
95 /**/ \ 97 /**/ \
96 flags = _##op##_lock_irqsave(lock); \ 98 flags = _raw_##op##_lock_irqsave(lock); \
97 local_bh_disable(); \ 99 local_bh_disable(); \
98 local_irq_restore(flags); \ 100 local_irq_restore(flags); \
99} \ 101} \
@@ -107,269 +109,269 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
107 * __[spin|read|write]_lock_irqsave() 109 * __[spin|read|write]_lock_irqsave()
108 * __[spin|read|write]_lock_bh() 110 * __[spin|read|write]_lock_bh()
109 */ 111 */
110BUILD_LOCK_OPS(spin, spinlock); 112BUILD_LOCK_OPS(spin, raw_spinlock);
111BUILD_LOCK_OPS(read, rwlock); 113BUILD_LOCK_OPS(read, rwlock);
112BUILD_LOCK_OPS(write, rwlock); 114BUILD_LOCK_OPS(write, rwlock);
113 115
114#endif 116#endif
115 117
116#ifdef CONFIG_DEBUG_LOCK_ALLOC 118#ifndef CONFIG_INLINE_SPIN_TRYLOCK
117 119int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
118void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
119{ 120{
120 preempt_disable(); 121 return __raw_spin_trylock(lock);
121 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
122 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
123} 122}
124EXPORT_SYMBOL(_spin_lock_nested); 123EXPORT_SYMBOL(_raw_spin_trylock);
124#endif
125 125
126unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, 126#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
127 int subclass) 127int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
128{ 128{
129 unsigned long flags; 129 return __raw_spin_trylock_bh(lock);
130
131 local_irq_save(flags);
132 preempt_disable();
133 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
134 LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock,
135 _raw_spin_lock_flags, &flags);
136 return flags;
137} 130}
138EXPORT_SYMBOL(_spin_lock_irqsave_nested); 131EXPORT_SYMBOL(_raw_spin_trylock_bh);
132#endif
139 133
140void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, 134#ifndef CONFIG_INLINE_SPIN_LOCK
141 struct lockdep_map *nest_lock) 135void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
142{ 136{
143 preempt_disable(); 137 __raw_spin_lock(lock);
144 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
145 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
146} 138}
147EXPORT_SYMBOL(_spin_lock_nest_lock); 139EXPORT_SYMBOL(_raw_spin_lock);
148
149#endif 140#endif
150 141
151#ifndef CONFIG_INLINE_SPIN_TRYLOCK 142#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
152int __lockfunc _spin_trylock(spinlock_t *lock) 143unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
153{ 144{
154 return __spin_trylock(lock); 145 return __raw_spin_lock_irqsave(lock);
155} 146}
156EXPORT_SYMBOL(_spin_trylock); 147EXPORT_SYMBOL(_raw_spin_lock_irqsave);
157#endif 148#endif
158 149
159#ifndef CONFIG_INLINE_READ_TRYLOCK 150#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
160int __lockfunc _read_trylock(rwlock_t *lock) 151void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
161{ 152{
162 return __read_trylock(lock); 153 __raw_spin_lock_irq(lock);
163} 154}
164EXPORT_SYMBOL(_read_trylock); 155EXPORT_SYMBOL(_raw_spin_lock_irq);
165#endif 156#endif
166 157
167#ifndef CONFIG_INLINE_WRITE_TRYLOCK 158#ifndef CONFIG_INLINE_SPIN_LOCK_BH
168int __lockfunc _write_trylock(rwlock_t *lock) 159void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
169{ 160{
170 return __write_trylock(lock); 161 __raw_spin_lock_bh(lock);
171} 162}
172EXPORT_SYMBOL(_write_trylock); 163EXPORT_SYMBOL(_raw_spin_lock_bh);
173#endif 164#endif
174 165
175#ifndef CONFIG_INLINE_READ_LOCK 166#ifndef CONFIG_INLINE_SPIN_UNLOCK
176void __lockfunc _read_lock(rwlock_t *lock) 167void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
177{ 168{
178 __read_lock(lock); 169 __raw_spin_unlock(lock);
179} 170}
180EXPORT_SYMBOL(_read_lock); 171EXPORT_SYMBOL(_raw_spin_unlock);
181#endif 172#endif
182 173
183#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE 174#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
184unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) 175void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
185{ 176{
186 return __spin_lock_irqsave(lock); 177 __raw_spin_unlock_irqrestore(lock, flags);
187} 178}
188EXPORT_SYMBOL(_spin_lock_irqsave); 179EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
189#endif 180#endif
190 181
191#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ 182#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
192void __lockfunc _spin_lock_irq(spinlock_t *lock) 183void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
193{ 184{
194 __spin_lock_irq(lock); 185 __raw_spin_unlock_irq(lock);
195} 186}
196EXPORT_SYMBOL(_spin_lock_irq); 187EXPORT_SYMBOL(_raw_spin_unlock_irq);
197#endif 188#endif
198 189
199#ifndef CONFIG_INLINE_SPIN_LOCK_BH 190#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
200void __lockfunc _spin_lock_bh(spinlock_t *lock) 191void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
201{ 192{
202 __spin_lock_bh(lock); 193 __raw_spin_unlock_bh(lock);
203} 194}
204EXPORT_SYMBOL(_spin_lock_bh); 195EXPORT_SYMBOL(_raw_spin_unlock_bh);
205#endif 196#endif
206 197
207#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE 198#ifndef CONFIG_INLINE_READ_TRYLOCK
208unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) 199int __lockfunc _raw_read_trylock(rwlock_t *lock)
209{ 200{
210 return __read_lock_irqsave(lock); 201 return __raw_read_trylock(lock);
211} 202}
212EXPORT_SYMBOL(_read_lock_irqsave); 203EXPORT_SYMBOL(_raw_read_trylock);
213#endif 204#endif
214 205
215#ifndef CONFIG_INLINE_READ_LOCK_IRQ 206#ifndef CONFIG_INLINE_READ_LOCK
216void __lockfunc _read_lock_irq(rwlock_t *lock) 207void __lockfunc _raw_read_lock(rwlock_t *lock)
217{ 208{
218 __read_lock_irq(lock); 209 __raw_read_lock(lock);
219} 210}
220EXPORT_SYMBOL(_read_lock_irq); 211EXPORT_SYMBOL(_raw_read_lock);
221#endif 212#endif
222 213
223#ifndef CONFIG_INLINE_READ_LOCK_BH 214#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
224void __lockfunc _read_lock_bh(rwlock_t *lock) 215unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
225{ 216{
226 __read_lock_bh(lock); 217 return __raw_read_lock_irqsave(lock);
227} 218}
228EXPORT_SYMBOL(_read_lock_bh); 219EXPORT_SYMBOL(_raw_read_lock_irqsave);
229#endif 220#endif
230 221
231#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE 222#ifndef CONFIG_INLINE_READ_LOCK_IRQ
232unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) 223void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
233{ 224{
234 return __write_lock_irqsave(lock); 225 __raw_read_lock_irq(lock);
235} 226}
236EXPORT_SYMBOL(_write_lock_irqsave); 227EXPORT_SYMBOL(_raw_read_lock_irq);
237#endif 228#endif
238 229
239#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ 230#ifndef CONFIG_INLINE_READ_LOCK_BH
240void __lockfunc _write_lock_irq(rwlock_t *lock) 231void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
241{ 232{
242 __write_lock_irq(lock); 233 __raw_read_lock_bh(lock);
243} 234}
244EXPORT_SYMBOL(_write_lock_irq); 235EXPORT_SYMBOL(_raw_read_lock_bh);
245#endif 236#endif
246 237
247#ifndef CONFIG_INLINE_WRITE_LOCK_BH 238#ifndef CONFIG_INLINE_READ_UNLOCK
248void __lockfunc _write_lock_bh(rwlock_t *lock) 239void __lockfunc _raw_read_unlock(rwlock_t *lock)
249{ 240{
250 __write_lock_bh(lock); 241 __raw_read_unlock(lock);
251} 242}
252EXPORT_SYMBOL(_write_lock_bh); 243EXPORT_SYMBOL(_raw_read_unlock);
253#endif 244#endif
254 245
255#ifndef CONFIG_INLINE_SPIN_LOCK 246#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
256void __lockfunc _spin_lock(spinlock_t *lock) 247void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
257{ 248{
258 __spin_lock(lock); 249 __raw_read_unlock_irqrestore(lock, flags);
259} 250}
260EXPORT_SYMBOL(_spin_lock); 251EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
261#endif 252#endif
262 253
263#ifndef CONFIG_INLINE_WRITE_LOCK 254#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
264void __lockfunc _write_lock(rwlock_t *lock) 255void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
265{ 256{
266 __write_lock(lock); 257 __raw_read_unlock_irq(lock);
267} 258}
268EXPORT_SYMBOL(_write_lock); 259EXPORT_SYMBOL(_raw_read_unlock_irq);
269#endif 260#endif
270 261
271#ifndef CONFIG_INLINE_SPIN_UNLOCK 262#ifndef CONFIG_INLINE_READ_UNLOCK_BH
272void __lockfunc _spin_unlock(spinlock_t *lock) 263void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
273{ 264{
274 __spin_unlock(lock); 265 __raw_read_unlock_bh(lock);
275} 266}
276EXPORT_SYMBOL(_spin_unlock); 267EXPORT_SYMBOL(_raw_read_unlock_bh);
277#endif 268#endif
278 269
279#ifndef CONFIG_INLINE_WRITE_UNLOCK 270#ifndef CONFIG_INLINE_WRITE_TRYLOCK
280void __lockfunc _write_unlock(rwlock_t *lock) 271int __lockfunc _raw_write_trylock(rwlock_t *lock)
281{ 272{
282 __write_unlock(lock); 273 return __raw_write_trylock(lock);
283} 274}
284EXPORT_SYMBOL(_write_unlock); 275EXPORT_SYMBOL(_raw_write_trylock);
285#endif 276#endif
286 277
287#ifndef CONFIG_INLINE_READ_UNLOCK 278#ifndef CONFIG_INLINE_WRITE_LOCK
288void __lockfunc _read_unlock(rwlock_t *lock) 279void __lockfunc _raw_write_lock(rwlock_t *lock)
289{ 280{
290 __read_unlock(lock); 281 __raw_write_lock(lock);
291} 282}
292EXPORT_SYMBOL(_read_unlock); 283EXPORT_SYMBOL(_raw_write_lock);
293#endif 284#endif
294 285
295#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE 286#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
296void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) 287unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
297{ 288{
298 __spin_unlock_irqrestore(lock, flags); 289 return __raw_write_lock_irqsave(lock);
299} 290}
300EXPORT_SYMBOL(_spin_unlock_irqrestore); 291EXPORT_SYMBOL(_raw_write_lock_irqsave);
301#endif 292#endif
302 293
303#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ 294#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
304void __lockfunc _spin_unlock_irq(spinlock_t *lock) 295void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
305{ 296{
306 __spin_unlock_irq(lock); 297 __raw_write_lock_irq(lock);
307} 298}
308EXPORT_SYMBOL(_spin_unlock_irq); 299EXPORT_SYMBOL(_raw_write_lock_irq);
309#endif 300#endif
310 301
311#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH 302#ifndef CONFIG_INLINE_WRITE_LOCK_BH
312void __lockfunc _spin_unlock_bh(spinlock_t *lock) 303void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
313{ 304{
314 __spin_unlock_bh(lock); 305 __raw_write_lock_bh(lock);
315} 306}
316EXPORT_SYMBOL(_spin_unlock_bh); 307EXPORT_SYMBOL(_raw_write_lock_bh);
317#endif 308#endif
318 309
319#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE 310#ifndef CONFIG_INLINE_WRITE_UNLOCK
320void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 311void __lockfunc _raw_write_unlock(rwlock_t *lock)
321{ 312{
322 __read_unlock_irqrestore(lock, flags); 313 __raw_write_unlock(lock);
323} 314}
324EXPORT_SYMBOL(_read_unlock_irqrestore); 315EXPORT_SYMBOL(_raw_write_unlock);
325#endif 316#endif
326 317
327#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ 318#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
328void __lockfunc _read_unlock_irq(rwlock_t *lock) 319void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
329{ 320{
330 __read_unlock_irq(lock); 321 __raw_write_unlock_irqrestore(lock, flags);
331} 322}
332EXPORT_SYMBOL(_read_unlock_irq); 323EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
333#endif 324#endif
334 325
335#ifndef CONFIG_INLINE_READ_UNLOCK_BH 326#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
336void __lockfunc _read_unlock_bh(rwlock_t *lock) 327void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
337{ 328{
338 __read_unlock_bh(lock); 329 __raw_write_unlock_irq(lock);
339} 330}
340EXPORT_SYMBOL(_read_unlock_bh); 331EXPORT_SYMBOL(_raw_write_unlock_irq);
341#endif 332#endif
342 333
343#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE 334#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
344void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 335void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
345{ 336{
346 __write_unlock_irqrestore(lock, flags); 337 __raw_write_unlock_bh(lock);
347} 338}
348EXPORT_SYMBOL(_write_unlock_irqrestore); 339EXPORT_SYMBOL(_raw_write_unlock_bh);
349#endif 340#endif
350 341
351#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ 342#ifdef CONFIG_DEBUG_LOCK_ALLOC
352void __lockfunc _write_unlock_irq(rwlock_t *lock) 343
344void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
353{ 345{
354 __write_unlock_irq(lock); 346 preempt_disable();
347 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
348 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
355} 349}
356EXPORT_SYMBOL(_write_unlock_irq); 350EXPORT_SYMBOL(_raw_spin_lock_nested);
357#endif
358 351
359#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH 352unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
360void __lockfunc _write_unlock_bh(rwlock_t *lock) 353 int subclass)
361{ 354{
362 __write_unlock_bh(lock); 355 unsigned long flags;
356
357 local_irq_save(flags);
358 preempt_disable();
359 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
360 LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
361 do_raw_spin_lock_flags, &flags);
362 return flags;
363} 363}
364EXPORT_SYMBOL(_write_unlock_bh); 364EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
365#endif
366 365
367#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH 366void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
368int __lockfunc _spin_trylock_bh(spinlock_t *lock) 367 struct lockdep_map *nest_lock)
369{ 368{
370 return __spin_trylock_bh(lock); 369 preempt_disable();
370 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
371 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
371} 372}
372EXPORT_SYMBOL(_spin_trylock_bh); 373EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
374
373#endif 375#endif
374 376
375notrace int in_lock_functions(unsigned long addr) 377notrace int in_lock_functions(unsigned long addr)
diff --git a/kernel/sys.c b/kernel/sys.c
index 585d6cd10040..26a6b73a6b85 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -162,6 +162,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
162 if (niceval > 19) 162 if (niceval > 19)
163 niceval = 19; 163 niceval = 19;
164 164
165 rcu_read_lock();
165 read_lock(&tasklist_lock); 166 read_lock(&tasklist_lock);
166 switch (which) { 167 switch (which) {
167 case PRIO_PROCESS: 168 case PRIO_PROCESS:
@@ -189,16 +190,17 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
189 !(user = find_user(who))) 190 !(user = find_user(who)))
190 goto out_unlock; /* No processes for this user */ 191 goto out_unlock; /* No processes for this user */
191 192
192 do_each_thread(g, p) 193 do_each_thread(g, p) {
193 if (__task_cred(p)->uid == who) 194 if (__task_cred(p)->uid == who)
194 error = set_one_prio(p, niceval, error); 195 error = set_one_prio(p, niceval, error);
195 while_each_thread(g, p); 196 } while_each_thread(g, p);
196 if (who != cred->uid) 197 if (who != cred->uid)
197 free_uid(user); /* For find_user() */ 198 free_uid(user); /* For find_user() */
198 break; 199 break;
199 } 200 }
200out_unlock: 201out_unlock:
201 read_unlock(&tasklist_lock); 202 read_unlock(&tasklist_lock);
203 rcu_read_unlock();
202out: 204out:
203 return error; 205 return error;
204} 206}
@@ -252,13 +254,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
252 !(user = find_user(who))) 254 !(user = find_user(who)))
253 goto out_unlock; /* No processes for this user */ 255 goto out_unlock; /* No processes for this user */
254 256
255 do_each_thread(g, p) 257 do_each_thread(g, p) {
256 if (__task_cred(p)->uid == who) { 258 if (__task_cred(p)->uid == who) {
257 niceval = 20 - task_nice(p); 259 niceval = 20 - task_nice(p);
258 if (niceval > retval) 260 if (niceval > retval)
259 retval = niceval; 261 retval = niceval;
260 } 262 }
261 while_each_thread(g, p); 263 } while_each_thread(g, p);
262 if (who != cred->uid) 264 if (who != cred->uid)
263 free_uid(user); /* for find_user() */ 265 free_uid(user); /* for find_user() */
264 break; 266 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9327a26765c5..8a68b2448468 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -244,6 +244,10 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */
244static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ 244static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
245static int min_wakeup_granularity_ns; /* 0 usecs */ 245static int min_wakeup_granularity_ns; /* 0 usecs */
246static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ 246static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
247static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
248static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
249static int min_sched_shares_ratelimit = 100000; /* 100 usec */
250static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
247#endif 251#endif
248 252
249static struct ctl_table kern_table[] = { 253static struct ctl_table kern_table[] = {
@@ -260,7 +264,7 @@ static struct ctl_table kern_table[] = {
260 .data = &sysctl_sched_min_granularity, 264 .data = &sysctl_sched_min_granularity,
261 .maxlen = sizeof(unsigned int), 265 .maxlen = sizeof(unsigned int),
262 .mode = 0644, 266 .mode = 0644,
263 .proc_handler = sched_nr_latency_handler, 267 .proc_handler = sched_proc_update_handler,
264 .extra1 = &min_sched_granularity_ns, 268 .extra1 = &min_sched_granularity_ns,
265 .extra2 = &max_sched_granularity_ns, 269 .extra2 = &max_sched_granularity_ns,
266 }, 270 },
@@ -269,7 +273,7 @@ static struct ctl_table kern_table[] = {
269 .data = &sysctl_sched_latency, 273 .data = &sysctl_sched_latency,
270 .maxlen = sizeof(unsigned int), 274 .maxlen = sizeof(unsigned int),
271 .mode = 0644, 275 .mode = 0644,
272 .proc_handler = sched_nr_latency_handler, 276 .proc_handler = sched_proc_update_handler,
273 .extra1 = &min_sched_granularity_ns, 277 .extra1 = &min_sched_granularity_ns,
274 .extra2 = &max_sched_granularity_ns, 278 .extra2 = &max_sched_granularity_ns,
275 }, 279 },
@@ -278,7 +282,7 @@ static struct ctl_table kern_table[] = {
278 .data = &sysctl_sched_wakeup_granularity, 282 .data = &sysctl_sched_wakeup_granularity,
279 .maxlen = sizeof(unsigned int), 283 .maxlen = sizeof(unsigned int),
280 .mode = 0644, 284 .mode = 0644,
281 .proc_handler = proc_dointvec_minmax, 285 .proc_handler = sched_proc_update_handler,
282 .extra1 = &min_wakeup_granularity_ns, 286 .extra1 = &min_wakeup_granularity_ns,
283 .extra2 = &max_wakeup_granularity_ns, 287 .extra2 = &max_wakeup_granularity_ns,
284 }, 288 },
@@ -287,7 +291,18 @@ static struct ctl_table kern_table[] = {
287 .data = &sysctl_sched_shares_ratelimit, 291 .data = &sysctl_sched_shares_ratelimit,
288 .maxlen = sizeof(unsigned int), 292 .maxlen = sizeof(unsigned int),
289 .mode = 0644, 293 .mode = 0644,
290 .proc_handler = proc_dointvec, 294 .proc_handler = sched_proc_update_handler,
295 .extra1 = &min_sched_shares_ratelimit,
296 .extra2 = &max_sched_shares_ratelimit,
297 },
298 {
299 .procname = "sched_tunable_scaling",
300 .data = &sysctl_sched_tunable_scaling,
301 .maxlen = sizeof(enum sched_tunable_scaling),
302 .mode = 0644,
303 .proc_handler = sched_proc_update_handler,
304 .extra1 = &min_sched_tunable_scaling,
305 .extra2 = &max_sched_tunable_scaling,
291 }, 306 },
292 { 307 {
293 .procname = "sched_shares_thresh", 308 .procname = "sched_shares_thresh",
@@ -298,13 +313,6 @@ static struct ctl_table kern_table[] = {
298 .extra1 = &zero, 313 .extra1 = &zero,
299 }, 314 },
300 { 315 {
301 .procname = "sched_features",
302 .data = &sysctl_sched_features,
303 .maxlen = sizeof(unsigned int),
304 .mode = 0644,
305 .proc_handler = proc_dointvec,
306 },
307 {
308 .procname = "sched_migration_cost", 316 .procname = "sched_migration_cost",
309 .data = &sysctl_sched_migration_cost, 317 .data = &sysctl_sched_migration_cost,
310 .maxlen = sizeof(unsigned int), 318 .maxlen = sizeof(unsigned int),
@@ -1043,7 +1051,7 @@ static struct ctl_table vm_table[] = {
1043 .extra2 = &one_hundred, 1051 .extra2 = &one_hundred,
1044 }, 1052 },
1045#ifdef CONFIG_HUGETLB_PAGE 1053#ifdef CONFIG_HUGETLB_PAGE
1046 { 1054 {
1047 .procname = "nr_hugepages", 1055 .procname = "nr_hugepages",
1048 .data = NULL, 1056 .data = NULL,
1049 .maxlen = sizeof(unsigned long), 1057 .maxlen = sizeof(unsigned long),
@@ -1051,7 +1059,18 @@ static struct ctl_table vm_table[] = {
1051 .proc_handler = hugetlb_sysctl_handler, 1059 .proc_handler = hugetlb_sysctl_handler,
1052 .extra1 = (void *)&hugetlb_zero, 1060 .extra1 = (void *)&hugetlb_zero,
1053 .extra2 = (void *)&hugetlb_infinity, 1061 .extra2 = (void *)&hugetlb_infinity,
1054 }, 1062 },
1063#ifdef CONFIG_NUMA
1064 {
1065 .procname = "nr_hugepages_mempolicy",
1066 .data = NULL,
1067 .maxlen = sizeof(unsigned long),
1068 .mode = 0644,
1069 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1070 .extra1 = (void *)&hugetlb_zero,
1071 .extra2 = (void *)&hugetlb_infinity,
1072 },
1073#endif
1055 { 1074 {
1056 .procname = "hugetlb_shm_group", 1075 .procname = "hugetlb_shm_group",
1057 .data = &sysctl_hugetlb_shm_group, 1076 .data = &sysctl_hugetlb_shm_group,
@@ -1112,7 +1131,8 @@ static struct ctl_table vm_table[] = {
1112 .data = &sysctl_max_map_count, 1131 .data = &sysctl_max_map_count,
1113 .maxlen = sizeof(sysctl_max_map_count), 1132 .maxlen = sizeof(sysctl_max_map_count),
1114 .mode = 0644, 1133 .mode = 0644,
1115 .proc_handler = proc_dointvec 1134 .proc_handler = proc_dointvec_minmax,
1135 .extra1 = &zero,
1116 }, 1136 },
1117#else 1137#else
1118 { 1138 {
@@ -1194,6 +1214,7 @@ static struct ctl_table vm_table[] = {
1194 .proc_handler = proc_dointvec_jiffies, 1214 .proc_handler = proc_dointvec_jiffies,
1195 }, 1215 },
1196#endif 1216#endif
1217#ifdef CONFIG_MMU
1197 { 1218 {
1198 .procname = "mmap_min_addr", 1219 .procname = "mmap_min_addr",
1199 .data = &dac_mmap_min_addr, 1220 .data = &dac_mmap_min_addr,
@@ -1201,6 +1222,7 @@ static struct ctl_table vm_table[] = {
1201 .mode = 0644, 1222 .mode = 0644,
1202 .proc_handler = mmap_min_addr_handler, 1223 .proc_handler = mmap_min_addr_handler,
1203 }, 1224 },
1225#endif
1204#ifdef CONFIG_NUMA 1226#ifdef CONFIG_NUMA
1205 { 1227 {
1206 .procname = "numa_zonelist_order", 1228 .procname = "numa_zonelist_order",
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b75dbf40f573..112533d5fc08 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1399,6 +1399,13 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
1399{ 1399{
1400 int i; 1400 int i;
1401 1401
1402 /*
1403 * CTL_KERN/KERN_VERSION is used by older glibc and cannot
1404 * ever go away.
1405 */
1406 if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
1407 return;
1408
1402 if (printk_ratelimit()) { 1409 if (printk_ratelimit()) {
1403 printk(KERN_INFO 1410 printk(KERN_INFO
1404 "warning: process `%s' used the deprecated sysctl " 1411 "warning: process `%s' used the deprecated sysctl "
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 20a8920029ee..6f740d9f0948 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -30,7 +30,7 @@ static LIST_HEAD(clockevents_released);
30static RAW_NOTIFIER_HEAD(clockevents_chain); 30static RAW_NOTIFIER_HEAD(clockevents_chain);
31 31
32/* Protection for the above */ 32/* Protection for the above */
33static DEFINE_SPINLOCK(clockevents_lock); 33static DEFINE_RAW_SPINLOCK(clockevents_lock);
34 34
35/** 35/**
36 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds 36 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -141,9 +141,9 @@ int clockevents_register_notifier(struct notifier_block *nb)
141 unsigned long flags; 141 unsigned long flags;
142 int ret; 142 int ret;
143 143
144 spin_lock_irqsave(&clockevents_lock, flags); 144 raw_spin_lock_irqsave(&clockevents_lock, flags);
145 ret = raw_notifier_chain_register(&clockevents_chain, nb); 145 ret = raw_notifier_chain_register(&clockevents_chain, nb);
146 spin_unlock_irqrestore(&clockevents_lock, flags); 146 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
147 147
148 return ret; 148 return ret;
149} 149}
@@ -185,13 +185,13 @@ void clockevents_register_device(struct clock_event_device *dev)
185 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 185 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
186 BUG_ON(!dev->cpumask); 186 BUG_ON(!dev->cpumask);
187 187
188 spin_lock_irqsave(&clockevents_lock, flags); 188 raw_spin_lock_irqsave(&clockevents_lock, flags);
189 189
190 list_add(&dev->list, &clockevent_devices); 190 list_add(&dev->list, &clockevent_devices);
191 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); 191 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
192 clockevents_notify_released(); 192 clockevents_notify_released();
193 193
194 spin_unlock_irqrestore(&clockevents_lock, flags); 194 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
195} 195}
196EXPORT_SYMBOL_GPL(clockevents_register_device); 196EXPORT_SYMBOL_GPL(clockevents_register_device);
197 197
@@ -238,10 +238,11 @@ void clockevents_exchange_device(struct clock_event_device *old,
238 */ 238 */
239void clockevents_notify(unsigned long reason, void *arg) 239void clockevents_notify(unsigned long reason, void *arg)
240{ 240{
241 struct list_head *node, *tmp; 241 struct clock_event_device *dev, *tmp;
242 unsigned long flags; 242 unsigned long flags;
243 int cpu;
243 244
244 spin_lock_irqsave(&clockevents_lock, flags); 245 raw_spin_lock_irqsave(&clockevents_lock, flags);
245 clockevents_do_notify(reason, arg); 246 clockevents_do_notify(reason, arg);
246 247
247 switch (reason) { 248 switch (reason) {
@@ -250,13 +251,24 @@ void clockevents_notify(unsigned long reason, void *arg)
250 * Unregister the clock event devices which were 251 * Unregister the clock event devices which were
251 * released from the users in the notify chain. 252 * released from the users in the notify chain.
252 */ 253 */
253 list_for_each_safe(node, tmp, &clockevents_released) 254 list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
254 list_del(node); 255 list_del(&dev->list);
256 /*
257 * Now check whether the CPU has left unused per cpu devices
258 */
259 cpu = *((int *)arg);
260 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
261 if (cpumask_test_cpu(cpu, dev->cpumask) &&
262 cpumask_weight(dev->cpumask) == 1) {
263 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
264 list_del(&dev->list);
265 }
266 }
255 break; 267 break;
256 default: 268 default:
257 break; 269 break;
258 } 270 }
259 spin_unlock_irqrestore(&clockevents_lock, flags); 271 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
260} 272}
261EXPORT_SYMBOL_GPL(clockevents_notify); 273EXPORT_SYMBOL_GPL(clockevents_notify);
262#endif 274#endif
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index c2ec25087a35..b3bafd5fc66d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31/* FIXME: Use cpumask_var_t. */
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33static DECLARE_BITMAP(tmpmask, NR_CPUS); 33static DECLARE_BITMAP(tmpmask, NR_CPUS);
34static DEFINE_SPINLOCK(tick_broadcast_lock); 34static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35static int tick_broadcast_force; 35static int tick_broadcast_force;
36 36
37#ifdef CONFIG_TICK_ONESHOT 37#ifdef CONFIG_TICK_ONESHOT
@@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
96 unsigned long flags; 96 unsigned long flags;
97 int ret = 0; 97 int ret = 0;
98 98
99 spin_lock_irqsave(&tick_broadcast_lock, flags); 99 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
100 100
101 /* 101 /*
102 * Devices might be registered with both periodic and oneshot 102 * Devices might be registered with both periodic and oneshot
@@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
122 tick_broadcast_clear_oneshot(cpu); 122 tick_broadcast_clear_oneshot(cpu);
123 } 123 }
124 } 124 }
125 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 125 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
126 return ret; 126 return ret;
127} 127}
128 128
@@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask)
161 */ 161 */
162static void tick_do_periodic_broadcast(void) 162static void tick_do_periodic_broadcast(void)
163{ 163{
164 spin_lock(&tick_broadcast_lock); 164 raw_spin_lock(&tick_broadcast_lock);
165 165
166 cpumask_and(to_cpumask(tmpmask), 166 cpumask_and(to_cpumask(tmpmask),
167 cpu_online_mask, tick_get_broadcast_mask()); 167 cpu_online_mask, tick_get_broadcast_mask());
168 tick_do_broadcast(to_cpumask(tmpmask)); 168 tick_do_broadcast(to_cpumask(tmpmask));
169 169
170 spin_unlock(&tick_broadcast_lock); 170 raw_spin_unlock(&tick_broadcast_lock);
171} 171}
172 172
173/* 173/*
@@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
212 unsigned long flags; 212 unsigned long flags;
213 int cpu, bc_stopped; 213 int cpu, bc_stopped;
214 214
215 spin_lock_irqsave(&tick_broadcast_lock, flags); 215 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
216 216
217 cpu = smp_processor_id(); 217 cpu = smp_processor_id();
218 td = &per_cpu(tick_cpu_device, cpu); 218 td = &per_cpu(tick_cpu_device, cpu);
@@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
263 tick_broadcast_setup_oneshot(bc); 263 tick_broadcast_setup_oneshot(bc);
264 } 264 }
265out: 265out:
266 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 266 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
267} 267}
268 268
269/* 269/*
@@ -299,7 +299,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
299 unsigned long flags; 299 unsigned long flags;
300 unsigned int cpu = *cpup; 300 unsigned int cpu = *cpup;
301 301
302 spin_lock_irqsave(&tick_broadcast_lock, flags); 302 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
303 303
304 bc = tick_broadcast_device.evtdev; 304 bc = tick_broadcast_device.evtdev;
305 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 305 cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
@@ -309,7 +309,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
309 clockevents_shutdown(bc); 309 clockevents_shutdown(bc);
310 } 310 }
311 311
312 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 312 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
313} 313}
314 314
315void tick_suspend_broadcast(void) 315void tick_suspend_broadcast(void)
@@ -317,13 +317,13 @@ void tick_suspend_broadcast(void)
317 struct clock_event_device *bc; 317 struct clock_event_device *bc;
318 unsigned long flags; 318 unsigned long flags;
319 319
320 spin_lock_irqsave(&tick_broadcast_lock, flags); 320 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
321 321
322 bc = tick_broadcast_device.evtdev; 322 bc = tick_broadcast_device.evtdev;
323 if (bc) 323 if (bc)
324 clockevents_shutdown(bc); 324 clockevents_shutdown(bc);
325 325
326 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 326 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
327} 327}
328 328
329int tick_resume_broadcast(void) 329int tick_resume_broadcast(void)
@@ -332,7 +332,7 @@ int tick_resume_broadcast(void)
332 unsigned long flags; 332 unsigned long flags;
333 int broadcast = 0; 333 int broadcast = 0;
334 334
335 spin_lock_irqsave(&tick_broadcast_lock, flags); 335 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
336 336
337 bc = tick_broadcast_device.evtdev; 337 bc = tick_broadcast_device.evtdev;
338 338
@@ -351,7 +351,7 @@ int tick_resume_broadcast(void)
351 break; 351 break;
352 } 352 }
353 } 353 }
354 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 354 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
355 355
356 return broadcast; 356 return broadcast;
357} 357}
@@ -405,7 +405,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
405 ktime_t now, next_event; 405 ktime_t now, next_event;
406 int cpu; 406 int cpu;
407 407
408 spin_lock(&tick_broadcast_lock); 408 raw_spin_lock(&tick_broadcast_lock);
409again: 409again:
410 dev->next_event.tv64 = KTIME_MAX; 410 dev->next_event.tv64 = KTIME_MAX;
411 next_event.tv64 = KTIME_MAX; 411 next_event.tv64 = KTIME_MAX;
@@ -443,7 +443,7 @@ again:
443 if (tick_broadcast_set_event(next_event, 0)) 443 if (tick_broadcast_set_event(next_event, 0))
444 goto again; 444 goto again;
445 } 445 }
446 spin_unlock(&tick_broadcast_lock); 446 raw_spin_unlock(&tick_broadcast_lock);
447} 447}
448 448
449/* 449/*
@@ -457,7 +457,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
457 unsigned long flags; 457 unsigned long flags;
458 int cpu; 458 int cpu;
459 459
460 spin_lock_irqsave(&tick_broadcast_lock, flags); 460 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
461 461
462 /* 462 /*
463 * Periodic mode does not care about the enter/exit of power 463 * Periodic mode does not care about the enter/exit of power
@@ -492,7 +492,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
492 } 492 }
493 493
494out: 494out:
495 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 495 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
496} 496}
497 497
498/* 498/*
@@ -563,13 +563,13 @@ void tick_broadcast_switch_to_oneshot(void)
563 struct clock_event_device *bc; 563 struct clock_event_device *bc;
564 unsigned long flags; 564 unsigned long flags;
565 565
566 spin_lock_irqsave(&tick_broadcast_lock, flags); 566 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
567 567
568 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 568 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
569 bc = tick_broadcast_device.evtdev; 569 bc = tick_broadcast_device.evtdev;
570 if (bc) 570 if (bc)
571 tick_broadcast_setup_oneshot(bc); 571 tick_broadcast_setup_oneshot(bc);
572 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 572 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
573} 573}
574 574
575 575
@@ -581,7 +581,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
581 unsigned long flags; 581 unsigned long flags;
582 unsigned int cpu = *cpup; 582 unsigned int cpu = *cpup;
583 583
584 spin_lock_irqsave(&tick_broadcast_lock, flags); 584 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
585 585
586 /* 586 /*
587 * Clear the broadcast mask flag for the dead cpu, but do not 587 * Clear the broadcast mask flag for the dead cpu, but do not
@@ -589,7 +589,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
589 */ 589 */
590 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 590 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
591 591
592 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 592 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
593} 593}
594 594
595/* 595/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 83c4417b6a3c..b6b898d2eeef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
34ktime_t tick_next_period; 34ktime_t tick_next_period;
35ktime_t tick_period; 35ktime_t tick_period;
36int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; 36int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
37DEFINE_SPINLOCK(tick_device_lock); 37static DEFINE_RAW_SPINLOCK(tick_device_lock);
38 38
39/* 39/*
40 * Debugging: see timer_list.c 40 * Debugging: see timer_list.c
@@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
209 int cpu, ret = NOTIFY_OK; 209 int cpu, ret = NOTIFY_OK;
210 unsigned long flags; 210 unsigned long flags;
211 211
212 spin_lock_irqsave(&tick_device_lock, flags); 212 raw_spin_lock_irqsave(&tick_device_lock, flags);
213 213
214 cpu = smp_processor_id(); 214 cpu = smp_processor_id();
215 if (!cpumask_test_cpu(cpu, newdev->cpumask)) 215 if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
268 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 268 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
269 tick_oneshot_notify(); 269 tick_oneshot_notify();
270 270
271 spin_unlock_irqrestore(&tick_device_lock, flags); 271 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
272 return NOTIFY_STOP; 272 return NOTIFY_STOP;
273 273
274out_bc: 274out_bc:
@@ -278,7 +278,7 @@ out_bc:
278 if (tick_check_broadcast_device(newdev)) 278 if (tick_check_broadcast_device(newdev))
279 ret = NOTIFY_STOP; 279 ret = NOTIFY_STOP;
280 280
281 spin_unlock_irqrestore(&tick_device_lock, flags); 281 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
282 282
283 return ret; 283 return ret;
284} 284}
@@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup)
311 struct clock_event_device *dev = td->evtdev; 311 struct clock_event_device *dev = td->evtdev;
312 unsigned long flags; 312 unsigned long flags;
313 313
314 spin_lock_irqsave(&tick_device_lock, flags); 314 raw_spin_lock_irqsave(&tick_device_lock, flags);
315 td->mode = TICKDEV_MODE_PERIODIC; 315 td->mode = TICKDEV_MODE_PERIODIC;
316 if (dev) { 316 if (dev) {
317 /* 317 /*
@@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup)
322 clockevents_exchange_device(dev, NULL); 322 clockevents_exchange_device(dev, NULL);
323 td->evtdev = NULL; 323 td->evtdev = NULL;
324 } 324 }
325 spin_unlock_irqrestore(&tick_device_lock, flags); 325 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
326} 326}
327 327
328static void tick_suspend(void) 328static void tick_suspend(void)
@@ -330,9 +330,9 @@ static void tick_suspend(void)
330 struct tick_device *td = &__get_cpu_var(tick_cpu_device); 330 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
331 unsigned long flags; 331 unsigned long flags;
332 332
333 spin_lock_irqsave(&tick_device_lock, flags); 333 raw_spin_lock_irqsave(&tick_device_lock, flags);
334 clockevents_shutdown(td->evtdev); 334 clockevents_shutdown(td->evtdev);
335 spin_unlock_irqrestore(&tick_device_lock, flags); 335 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
336} 336}
337 337
338static void tick_resume(void) 338static void tick_resume(void)
@@ -341,7 +341,7 @@ static void tick_resume(void)
341 unsigned long flags; 341 unsigned long flags;
342 int broadcast = tick_resume_broadcast(); 342 int broadcast = tick_resume_broadcast();
343 343
344 spin_lock_irqsave(&tick_device_lock, flags); 344 raw_spin_lock_irqsave(&tick_device_lock, flags);
345 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); 345 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
346 346
347 if (!broadcast) { 347 if (!broadcast) {
@@ -350,7 +350,7 @@ static void tick_resume(void)
350 else 350 else
351 tick_resume_oneshot(); 351 tick_resume_oneshot();
352 } 352 }
353 spin_unlock_irqrestore(&tick_device_lock, flags); 353 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
354} 354}
355 355
356/* 356/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b1c05bf75ee0..290eefbc1f60 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,7 +6,6 @@
6#define TICK_DO_TIMER_BOOT -2 6#define TICK_DO_TIMER_BOOT -2
7 7
8DECLARE_PER_CPU(struct tick_device, tick_cpu_device); 8DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
9extern spinlock_t tick_device_lock;
10extern ktime_t tick_next_period; 9extern ktime_t tick_next_period;
11extern ktime_t tick_period; 10extern ktime_t tick_period;
12extern int tick_do_timer_cpu __read_mostly; 11extern int tick_do_timer_cpu __read_mostly;
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 96ff643a5a59..12f5c55090be 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync,
89 * source time 89 * source time
90 */ 90 */
91 sample.offset = 91 sample.offset =
92 ktime_to_ns(ktime_add(end, start)) / 2 - 92 (ktime_to_ns(end) + ktime_to_ns(start)) / 2 -
93 ts; 93 ts;
94 94
95 /* simple insertion sort based on duration */ 95 /* simple insertion sort based on duration */
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 665c76edbf17..bdfb8dd1050c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
84 84
85next_one: 85next_one:
86 i = 0; 86 i = 0;
87 spin_lock_irqsave(&base->cpu_base->lock, flags); 87 raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
88 88
89 curr = base->first; 89 curr = base->first;
90 /* 90 /*
@@ -100,13 +100,13 @@ next_one:
100 100
101 timer = rb_entry(curr, struct hrtimer, node); 101 timer = rb_entry(curr, struct hrtimer, node);
102 tmp = *timer; 102 tmp = *timer;
103 spin_unlock_irqrestore(&base->cpu_base->lock, flags); 103 raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
104 104
105 print_timer(m, timer, &tmp, i, now); 105 print_timer(m, timer, &tmp, i, now);
106 next++; 106 next++;
107 goto next_one; 107 goto next_one;
108 } 108 }
109 spin_unlock_irqrestore(&base->cpu_base->lock, flags); 109 raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
110} 110}
111 111
112static void 112static void
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
150 P_ns(expires_next); 150 P_ns(expires_next);
151 P(hres_active); 151 P(hres_active);
152 P(nr_events); 152 P(nr_events);
153 P(nr_retries);
154 P(nr_hangs);
155 P_ns(max_hang_time);
153#endif 156#endif
154#undef P 157#undef P
155#undef P_ns 158#undef P_ns
@@ -234,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
234#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 237#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
235 print_tickdevice(m, tick_get_broadcast_device(), -1); 238 print_tickdevice(m, tick_get_broadcast_device(), -1);
236 SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 239 SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
237 tick_get_broadcast_mask()->bits[0]); 240 cpumask_bits(tick_get_broadcast_mask())[0]);
238#ifdef CONFIG_TICK_ONESHOT 241#ifdef CONFIG_TICK_ONESHOT
239 SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", 242 SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
240 tick_get_broadcast_oneshot_mask()->bits[0]); 243 cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
241#endif 244#endif
242 SEQ_printf(m, "\n"); 245 SEQ_printf(m, "\n");
243#endif 246#endif
@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
254 u64 now = ktime_to_ns(ktime_get()); 257 u64 now = ktime_to_ns(ktime_get());
255 int cpu; 258 int cpu;
256 259
257 SEQ_printf(m, "Timer List Version: v0.4\n"); 260 SEQ_printf(m, "Timer List Version: v0.5\n");
258 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 261 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
259 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 262 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
260 263
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index ee5681f8d7ec..2f3b585b8d7d 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock);
86/* 86/*
87 * Per-CPU lookup locks for fast hash lookup: 87 * Per-CPU lookup locks for fast hash lookup:
88 */ 88 */
89static DEFINE_PER_CPU(spinlock_t, lookup_lock); 89static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
90 90
91/* 91/*
92 * Mutex to serialize state changes with show-stats activities: 92 * Mutex to serialize state changes with show-stats activities:
@@ -238,14 +238,14 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
238 /* 238 /*
239 * It doesnt matter which lock we take: 239 * It doesnt matter which lock we take:
240 */ 240 */
241 spinlock_t *lock; 241 raw_spinlock_t *lock;
242 struct entry *entry, input; 242 struct entry *entry, input;
243 unsigned long flags; 243 unsigned long flags;
244 244
245 if (likely(!timer_stats_active)) 245 if (likely(!timer_stats_active))
246 return; 246 return;
247 247
248 lock = &per_cpu(lookup_lock, raw_smp_processor_id()); 248 lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
249 249
250 input.timer = timer; 250 input.timer = timer;
251 input.start_func = startf; 251 input.start_func = startf;
@@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
253 input.pid = pid; 253 input.pid = pid;
254 input.timer_flag = timer_flag; 254 input.timer_flag = timer_flag;
255 255
256 spin_lock_irqsave(lock, flags); 256 raw_spin_lock_irqsave(lock, flags);
257 if (!timer_stats_active) 257 if (!timer_stats_active)
258 goto out_unlock; 258 goto out_unlock;
259 259
@@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
264 atomic_inc(&overflow_count); 264 atomic_inc(&overflow_count);
265 265
266 out_unlock: 266 out_unlock:
267 spin_unlock_irqrestore(lock, flags); 267 raw_spin_unlock_irqrestore(lock, flags);
268} 268}
269 269
270static void print_name_offset(struct seq_file *m, unsigned long addr) 270static void print_name_offset(struct seq_file *m, unsigned long addr)
@@ -348,9 +348,11 @@ static void sync_access(void)
348 int cpu; 348 int cpu;
349 349
350 for_each_online_cpu(cpu) { 350 for_each_online_cpu(cpu) {
351 spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); 351 raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
352
353 raw_spin_lock_irqsave(lock, flags);
352 /* nothing */ 354 /* nothing */
353 spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); 355 raw_spin_unlock_irqrestore(lock, flags);
354 } 356 }
355} 357}
356 358
@@ -408,7 +410,7 @@ void __init init_timer_stats(void)
408 int cpu; 410 int cpu;
409 411
410 for_each_possible_cpu(cpu) 412 for_each_possible_cpu(cpu)
411 spin_lock_init(&per_cpu(lookup_lock, cpu)); 413 raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
412} 414}
413 415
414static int __init init_tstats_procfs(void) 416static int __init init_tstats_procfs(void)
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..15533b792397 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
656 656
657 debug_activate(timer, expires); 657 debug_activate(timer, expires);
658 658
659 new_base = __get_cpu_var(tvec_bases);
660
661 cpu = smp_processor_id(); 659 cpu = smp_processor_id();
662 660
663#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 661#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e51a1bcb7bed..7968762c8167 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1724,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1724 return ftrace_match(str, regex, len, type); 1724 return ftrace_match(str, regex, len, type);
1725} 1725}
1726 1726
1727static void ftrace_match_records(char *buff, int len, int enable) 1727static int ftrace_match_records(char *buff, int len, int enable)
1728{ 1728{
1729 unsigned int search_len; 1729 unsigned int search_len;
1730 struct ftrace_page *pg; 1730 struct ftrace_page *pg;
@@ -1733,6 +1733,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1733 char *search; 1733 char *search;
1734 int type; 1734 int type;
1735 int not; 1735 int not;
1736 int found = 0;
1736 1737
1737 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1738 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1738 type = filter_parse_regex(buff, len, &search, &not); 1739 type = filter_parse_regex(buff, len, &search, &not);
@@ -1750,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1750 rec->flags &= ~flag; 1751 rec->flags &= ~flag;
1751 else 1752 else
1752 rec->flags |= flag; 1753 rec->flags |= flag;
1754 found = 1;
1753 } 1755 }
1754 /* 1756 /*
1755 * Only enable filtering if we have a function that 1757 * Only enable filtering if we have a function that
@@ -1759,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1759 ftrace_filtered = 1; 1761 ftrace_filtered = 1;
1760 } while_for_each_ftrace_rec(); 1762 } while_for_each_ftrace_rec();
1761 mutex_unlock(&ftrace_lock); 1763 mutex_unlock(&ftrace_lock);
1764
1765 return found;
1762} 1766}
1763 1767
1764static int 1768static int
@@ -1780,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1780 return 1; 1784 return 1;
1781} 1785}
1782 1786
1783static void ftrace_match_module_records(char *buff, char *mod, int enable) 1787static int ftrace_match_module_records(char *buff, char *mod, int enable)
1784{ 1788{
1785 unsigned search_len = 0; 1789 unsigned search_len = 0;
1786 struct ftrace_page *pg; 1790 struct ftrace_page *pg;
@@ -1789,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1789 char *search = buff; 1793 char *search = buff;
1790 unsigned long flag; 1794 unsigned long flag;
1791 int not = 0; 1795 int not = 0;
1796 int found = 0;
1792 1797
1793 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1798 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1794 1799
@@ -1819,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1819 rec->flags &= ~flag; 1824 rec->flags &= ~flag;
1820 else 1825 else
1821 rec->flags |= flag; 1826 rec->flags |= flag;
1827 found = 1;
1822 } 1828 }
1823 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1829 if (enable && (rec->flags & FTRACE_FL_FILTER))
1824 ftrace_filtered = 1; 1830 ftrace_filtered = 1;
1825 1831
1826 } while_for_each_ftrace_rec(); 1832 } while_for_each_ftrace_rec();
1827 mutex_unlock(&ftrace_lock); 1833 mutex_unlock(&ftrace_lock);
1834
1835 return found;
1828} 1836}
1829 1837
1830/* 1838/*
@@ -1853,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1853 if (!strlen(mod)) 1861 if (!strlen(mod))
1854 return -EINVAL; 1862 return -EINVAL;
1855 1863
1856 ftrace_match_module_records(func, mod, enable); 1864 if (ftrace_match_module_records(func, mod, enable))
1857 return 0; 1865 return 0;
1866 return -EINVAL;
1858} 1867}
1859 1868
1860static struct ftrace_func_command ftrace_mod_cmd = { 1869static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2151,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2151 func = strsep(&next, ":"); 2160 func = strsep(&next, ":");
2152 2161
2153 if (!next) { 2162 if (!next) {
2154 ftrace_match_records(func, len, enable); 2163 if (ftrace_match_records(func, len, enable))
2155 return 0; 2164 return 0;
2165 return ret;
2156 } 2166 }
2157 2167
2158 /* command found */ 2168 /* command found */
@@ -2198,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2198 !trace_parser_cont(parser)) { 2208 !trace_parser_cont(parser)) {
2199 ret = ftrace_process_regex(parser->buffer, 2209 ret = ftrace_process_regex(parser->buffer,
2200 parser->idx, enable); 2210 parser->idx, enable);
2211 trace_parser_clear(parser);
2201 if (ret) 2212 if (ret)
2202 goto out_unlock; 2213 goto out_unlock;
2203
2204 trace_parser_clear(parser);
2205 } 2214 }
2206 2215
2207 ret = read; 2216 ret = read;
@@ -2543,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2543 exists = true; 2552 exists = true;
2544 break; 2553 break;
2545 } 2554 }
2546 if (!exists) { 2555 if (!exists)
2547 array[(*idx)++] = rec->ip; 2556 array[(*idx)++] = rec->ip;
2548 found = 1; 2557 found = 1;
2549 }
2550 } 2558 }
2551 } while_for_each_ftrace_rec(); 2559 } while_for_each_ftrace_rec();
2552 2560
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
14#define CREATE_TRACE_POINTS 14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 15#include <trace/events/power.h>
16 16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca4956ab5e..2326b04c95c4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -423,7 +423,7 @@ struct ring_buffer_per_cpu {
423 int cpu; 423 int cpu;
424 struct ring_buffer *buffer; 424 struct ring_buffer *buffer;
425 spinlock_t reader_lock; /* serialize readers */ 425 spinlock_t reader_lock; /* serialize readers */
426 raw_spinlock_t lock; 426 arch_spinlock_t lock;
427 struct lock_class_key lock_key; 427 struct lock_class_key lock_key;
428 struct list_head *pages; 428 struct list_head *pages;
429 struct buffer_page *head_page; /* read from head */ 429 struct buffer_page *head_page; /* read from head */
@@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
998 cpu_buffer->buffer = buffer; 998 cpu_buffer->buffer = buffer;
999 spin_lock_init(&cpu_buffer->reader_lock); 999 spin_lock_init(&cpu_buffer->reader_lock);
1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1001 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1001 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1002 1002
1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1004 GFP_KERNEL, cpu_to_node(cpu)); 1004 GFP_KERNEL, cpu_to_node(cpu));
@@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1193 struct list_head *p; 1193 struct list_head *p;
1194 unsigned i; 1194 unsigned i;
1195 1195
1196 atomic_inc(&cpu_buffer->record_disabled);
1197 synchronize_sched();
1198
1199 spin_lock_irq(&cpu_buffer->reader_lock); 1196 spin_lock_irq(&cpu_buffer->reader_lock);
1200 rb_head_page_deactivate(cpu_buffer); 1197 rb_head_page_deactivate(cpu_buffer);
1201 1198
@@ -1211,12 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1211 return; 1208 return;
1212 1209
1213 rb_reset_cpu(cpu_buffer); 1210 rb_reset_cpu(cpu_buffer);
1214 spin_unlock_irq(&cpu_buffer->reader_lock);
1215
1216 rb_check_pages(cpu_buffer); 1211 rb_check_pages(cpu_buffer);
1217 1212
1218 atomic_dec(&cpu_buffer->record_disabled); 1213 spin_unlock_irq(&cpu_buffer->reader_lock);
1219
1220} 1214}
1221 1215
1222static void 1216static void
@@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1227 struct list_head *p; 1221 struct list_head *p;
1228 unsigned i; 1222 unsigned i;
1229 1223
1230 atomic_inc(&cpu_buffer->record_disabled);
1231 synchronize_sched();
1232
1233 spin_lock_irq(&cpu_buffer->reader_lock); 1224 spin_lock_irq(&cpu_buffer->reader_lock);
1234 rb_head_page_deactivate(cpu_buffer); 1225 rb_head_page_deactivate(cpu_buffer);
1235 1226
@@ -1242,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1242 list_add_tail(&bpage->list, cpu_buffer->pages); 1233 list_add_tail(&bpage->list, cpu_buffer->pages);
1243 } 1234 }
1244 rb_reset_cpu(cpu_buffer); 1235 rb_reset_cpu(cpu_buffer);
1245 spin_unlock_irq(&cpu_buffer->reader_lock);
1246
1247 rb_check_pages(cpu_buffer); 1236 rb_check_pages(cpu_buffer);
1248 1237
1249 atomic_dec(&cpu_buffer->record_disabled); 1238 spin_unlock_irq(&cpu_buffer->reader_lock);
1250} 1239}
1251 1240
1252/** 1241/**
@@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1254 * @buffer: the buffer to resize. 1243 * @buffer: the buffer to resize.
1255 * @size: the new size. 1244 * @size: the new size.
1256 * 1245 *
1257 * The tracer is responsible for making sure that the buffer is
1258 * not being used while changing the size.
1259 * Note: We may be able to change the above requirement by using
1260 * RCU synchronizations.
1261 *
1262 * Minimum size is 2 * BUF_PAGE_SIZE. 1246 * Minimum size is 2 * BUF_PAGE_SIZE.
1263 * 1247 *
1264 * Returns -1 on failure. 1248 * Returns -1 on failure.
@@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1290 if (size == buffer_size) 1274 if (size == buffer_size)
1291 return size; 1275 return size;
1292 1276
1277 atomic_inc(&buffer->record_disabled);
1278
1279 /* Make sure all writers are done with this buffer. */
1280 synchronize_sched();
1281
1293 mutex_lock(&buffer->mutex); 1282 mutex_lock(&buffer->mutex);
1294 get_online_cpus(); 1283 get_online_cpus();
1295 1284
@@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1352 put_online_cpus(); 1341 put_online_cpus();
1353 mutex_unlock(&buffer->mutex); 1342 mutex_unlock(&buffer->mutex);
1354 1343
1344 atomic_dec(&buffer->record_disabled);
1345
1355 return size; 1346 return size;
1356 1347
1357 free_pages: 1348 free_pages:
@@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1361 } 1352 }
1362 put_online_cpus(); 1353 put_online_cpus();
1363 mutex_unlock(&buffer->mutex); 1354 mutex_unlock(&buffer->mutex);
1355 atomic_dec(&buffer->record_disabled);
1364 return -ENOMEM; 1356 return -ENOMEM;
1365 1357
1366 /* 1358 /*
@@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1370 out_fail: 1362 out_fail:
1371 put_online_cpus(); 1363 put_online_cpus();
1372 mutex_unlock(&buffer->mutex); 1364 mutex_unlock(&buffer->mutex);
1365 atomic_dec(&buffer->record_disabled);
1373 return -1; 1366 return -1;
1374} 1367}
1375EXPORT_SYMBOL_GPL(ring_buffer_resize); 1368EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -2834,7 +2827,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2834 int ret; 2827 int ret;
2835 2828
2836 local_irq_save(flags); 2829 local_irq_save(flags);
2837 __raw_spin_lock(&cpu_buffer->lock); 2830 arch_spin_lock(&cpu_buffer->lock);
2838 2831
2839 again: 2832 again:
2840 /* 2833 /*
@@ -2923,7 +2916,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2923 goto again; 2916 goto again;
2924 2917
2925 out: 2918 out:
2926 __raw_spin_unlock(&cpu_buffer->lock); 2919 arch_spin_unlock(&cpu_buffer->lock);
2927 local_irq_restore(flags); 2920 local_irq_restore(flags);
2928 2921
2929 return reader; 2922 return reader;
@@ -3286,9 +3279,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3286 synchronize_sched(); 3279 synchronize_sched();
3287 3280
3288 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3289 __raw_spin_lock(&cpu_buffer->lock); 3282 arch_spin_lock(&cpu_buffer->lock);
3290 rb_iter_reset(iter); 3283 rb_iter_reset(iter);
3291 __raw_spin_unlock(&cpu_buffer->lock); 3284 arch_spin_unlock(&cpu_buffer->lock);
3292 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3293 3286
3294 return iter; 3287 return iter;
@@ -3408,11 +3401,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3408 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3401 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3409 goto out; 3402 goto out;
3410 3403
3411 __raw_spin_lock(&cpu_buffer->lock); 3404 arch_spin_lock(&cpu_buffer->lock);
3412 3405
3413 rb_reset_cpu(cpu_buffer); 3406 rb_reset_cpu(cpu_buffer);
3414 3407
3415 __raw_spin_unlock(&cpu_buffer->lock); 3408 arch_spin_unlock(&cpu_buffer->lock);
3416 3409
3417 out: 3410 out:
3418 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3411 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f2893cff0..8b9f20ab8eed 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -86,17 +86,17 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
86 */ 86 */
87static int tracing_disabled = 1; 87static int tracing_disabled = 1;
88 88
89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 89DEFINE_PER_CPU(int, ftrace_cpu_disabled);
90 90
91static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
92{ 92{
93 preempt_disable(); 93 preempt_disable();
94 local_inc(&__get_cpu_var(ftrace_cpu_disabled)); 94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
95} 95}
96 96
97static inline void ftrace_enable_cpu(void) 97static inline void ftrace_enable_cpu(void)
98{ 98{
99 local_dec(&__get_cpu_var(ftrace_cpu_disabled)); 99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
100 preempt_enable(); 100 preempt_enable();
101} 101}
102 102
@@ -203,7 +203,7 @@ cycle_t ftrace_now(int cpu)
203 */ 203 */
204static struct trace_array max_tr; 204static struct trace_array max_tr;
205 205
206static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 206static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
207 207
208/* tracer_enabled is used to toggle activation of a tracer */ 208/* tracer_enabled is used to toggle activation of a tracer */
209static int tracer_enabled = 1; 209static int tracer_enabled = 1;
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
313 "bin", 313 "bin",
314 "block", 314 "block",
315 "stacktrace", 315 "stacktrace",
316 "sched-tree",
317 "trace_printk", 316 "trace_printk",
318 "ftrace_preempt", 317 "ftrace_preempt",
319 "branch", 318 "branch",
@@ -493,15 +492,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 492 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 493 * needs its own lock.
495 * 494 *
496 * This is defined as a raw_spinlock_t in order to help 495 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 496 * with performance when lockdep debugging is enabled.
498 * 497 *
499 * It is also used in other places outside the update_max_tr 498 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 499 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 500 * CONFIG_TRACER_MAX_TRACE.
502 */ 501 */
503static raw_spinlock_t ftrace_max_lock = 502static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
505 504
506#ifdef CONFIG_TRACER_MAX_TRACE 505#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 506unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +554,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 554 return;
556 555
557 WARN_ON_ONCE(!irqs_disabled()); 556 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 557 arch_spin_lock(&ftrace_max_lock);
559 558
560 tr->buffer = max_tr.buffer; 559 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 560 max_tr.buffer = buf;
562 561
563 __update_max_tr(tr, tsk, cpu); 562 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 563 arch_spin_unlock(&ftrace_max_lock);
565} 564}
566 565
567/** 566/**
@@ -581,7 +580,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 580 return;
582 581
583 WARN_ON_ONCE(!irqs_disabled()); 582 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 583 arch_spin_lock(&ftrace_max_lock);
585 584
586 ftrace_disable_cpu(); 585 ftrace_disable_cpu();
587 586
@@ -603,7 +602,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 602 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 603
605 __update_max_tr(tr, tsk, cpu); 604 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 605 arch_spin_unlock(&ftrace_max_lock);
607} 606}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 607#endif /* CONFIG_TRACER_MAX_TRACE */
609 608
@@ -802,7 +801,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 801static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 802static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 803static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 804static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 805
807/* temporary disable recording */ 806/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 807static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +914,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 914 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 915 * so if we miss here, then better luck next time.
917 */ 916 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 917 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 918 return;
920 919
921 idx = map_pid_to_cmdline[tsk->pid]; 920 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +939,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 939
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 940 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 941
943 __raw_spin_unlock(&trace_cmdline_lock); 942 arch_spin_unlock(&trace_cmdline_lock);
944} 943}
945 944
946void trace_find_cmdline(int pid, char comm[]) 945void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +957,14 @@ void trace_find_cmdline(int pid, char comm[])
958 } 957 }
959 958
960 preempt_disable(); 959 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 960 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 961 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 962 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 963 strcpy(comm, saved_cmdlines[map]);
965 else 964 else
966 strcpy(comm, "<...>"); 965 strcpy(comm, "<...>");
967 966
968 __raw_spin_unlock(&trace_cmdline_lock); 967 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 968 preempt_enable();
970} 969}
971 970
@@ -1085,7 +1084,7 @@ trace_function(struct trace_array *tr,
1085 struct ftrace_entry *entry; 1084 struct ftrace_entry *entry;
1086 1085
1087 /* If we are reading the ring buffer, don't trace */ 1086 /* If we are reading the ring buffer, don't trace */
1088 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1087 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
1089 return; 1088 return;
1090 1089
1091 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1090 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1150 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1151}
1153 1152
1153/**
1154 * trace_dump_stack - record a stack back trace in the trace buffer
1155 */
1156void trace_dump_stack(void)
1157{
1158 unsigned long flags;
1159
1160 if (tracing_disabled || tracing_selftest_running)
1161 return;
1162
1163 local_save_flags(flags);
1164
1165 /* skipping 3 traces, seems to get us at the caller of this function */
1166 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1167}
1168
1154void 1169void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1170ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1171{
@@ -1251,8 +1266,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1266 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1267int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1268{
1254 static raw_spinlock_t trace_buf_lock = 1269 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1270 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1271 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1272
1258 struct ftrace_event_call *call = &event_bprint; 1273 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1298,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1298
1284 /* Lockdep uses trace_printk for lock tracing */ 1299 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1300 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1301 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1302 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1303
1289 if (len > TRACE_BUF_SIZE || len < 0) 1304 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1319,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1304 ring_buffer_unlock_commit(buffer, event); 1319 ring_buffer_unlock_commit(buffer, event);
1305 1320
1306out_unlock: 1321out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1322 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1323 local_irq_restore(flags);
1309 1324
1310out: 1325out:
@@ -1334,7 +1349,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1349int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1350 unsigned long ip, const char *fmt, va_list args)
1336{ 1351{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1352 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1353 static char trace_buf[TRACE_BUF_SIZE];
1339 1354
1340 struct ftrace_event_call *call = &event_print; 1355 struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1375,8 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1375
1361 pause_graph_tracing(); 1376 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1377 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1378 arch_spin_lock(&trace_buf_lock);
1364 if (args == NULL) { 1379 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
1366 len = strlen(trace_buf);
1367 } else
1368 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1369 1380
1370 size = sizeof(*entry) + len + 1; 1381 size = sizeof(*entry) + len + 1;
1371 buffer = tr->buffer; 1382 buffer = tr->buffer;
@@ -1382,7 +1393,7 @@ int trace_array_vprintk(struct trace_array *tr,
1382 ring_buffer_unlock_commit(buffer, event); 1393 ring_buffer_unlock_commit(buffer, event);
1383 1394
1384 out_unlock: 1395 out_unlock:
1385 __raw_spin_unlock(&trace_buf_lock); 1396 arch_spin_unlock(&trace_buf_lock);
1386 raw_local_irq_restore(irq_flags); 1397 raw_local_irq_restore(irq_flags);
1387 unpause_graph_tracing(); 1398 unpause_graph_tracing();
1388 out: 1399 out:
@@ -1516,6 +1527,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1516 int i = (int)*pos; 1527 int i = (int)*pos;
1517 void *ent; 1528 void *ent;
1518 1529
1530 WARN_ON_ONCE(iter->leftover);
1531
1519 (*pos)++; 1532 (*pos)++;
1520 1533
1521 /* can't go backwards */ 1534 /* can't go backwards */
@@ -1614,8 +1627,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1614 ; 1627 ;
1615 1628
1616 } else { 1629 } else {
1617 l = *pos - 1; 1630 /*
1618 p = s_next(m, p, &l); 1631 * If we overflowed the seq_file before, then we want
1632 * to just reuse the trace_seq buffer again.
1633 */
1634 if (iter->leftover)
1635 p = iter;
1636 else {
1637 l = *pos - 1;
1638 p = s_next(m, p, &l);
1639 }
1619 } 1640 }
1620 1641
1621 trace_event_read_lock(); 1642 trace_event_read_lock();
@@ -1923,6 +1944,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1923static int s_show(struct seq_file *m, void *v) 1944static int s_show(struct seq_file *m, void *v)
1924{ 1945{
1925 struct trace_iterator *iter = v; 1946 struct trace_iterator *iter = v;
1947 int ret;
1926 1948
1927 if (iter->ent == NULL) { 1949 if (iter->ent == NULL) {
1928 if (iter->tr) { 1950 if (iter->tr) {
@@ -1942,9 +1964,27 @@ static int s_show(struct seq_file *m, void *v)
1942 if (!(trace_flags & TRACE_ITER_VERBOSE)) 1964 if (!(trace_flags & TRACE_ITER_VERBOSE))
1943 print_func_help_header(m); 1965 print_func_help_header(m);
1944 } 1966 }
1967 } else if (iter->leftover) {
1968 /*
1969 * If we filled the seq_file buffer earlier, we
1970 * want to just show it now.
1971 */
1972 ret = trace_print_seq(m, &iter->seq);
1973
1974 /* ret should this time be zero, but you never know */
1975 iter->leftover = ret;
1976
1945 } else { 1977 } else {
1946 print_trace_line(iter); 1978 print_trace_line(iter);
1947 trace_print_seq(m, &iter->seq); 1979 ret = trace_print_seq(m, &iter->seq);
1980 /*
1981 * If we overflow the seq_file buffer, then it will
1982 * ask us for this data again at start up.
1983 * Use that instead.
1984 * ret is 0 if seq_file write succeeded.
1985 * -1 otherwise.
1986 */
1987 iter->leftover = ret;
1948 } 1988 }
1949 1989
1950 return 0; 1990 return 0;
@@ -2254,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2254 mutex_lock(&tracing_cpumask_update_lock); 2294 mutex_lock(&tracing_cpumask_update_lock);
2255 2295
2256 local_irq_disable(); 2296 local_irq_disable();
2257 __raw_spin_lock(&ftrace_max_lock); 2297 arch_spin_lock(&ftrace_max_lock);
2258 for_each_tracing_cpu(cpu) { 2298 for_each_tracing_cpu(cpu) {
2259 /* 2299 /*
2260 * Increase/decrease the disabled counter if we are 2300 * Increase/decrease the disabled counter if we are
@@ -2269,7 +2309,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2269 atomic_dec(&global_trace.data[cpu]->disabled); 2309 atomic_dec(&global_trace.data[cpu]->disabled);
2270 } 2310 }
2271 } 2311 }
2272 __raw_spin_unlock(&ftrace_max_lock); 2312 arch_spin_unlock(&ftrace_max_lock);
2273 local_irq_enable(); 2313 local_irq_enable();
2274 2314
2275 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2315 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2291,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
2291 .write = tracing_cpumask_write, 2331 .write = tracing_cpumask_write,
2292}; 2332};
2293 2333
2294static ssize_t 2334static int tracing_trace_options_show(struct seq_file *m, void *v)
2295tracing_trace_options_read(struct file *filp, char __user *ubuf,
2296 size_t cnt, loff_t *ppos)
2297{ 2335{
2298 struct tracer_opt *trace_opts; 2336 struct tracer_opt *trace_opts;
2299 u32 tracer_flags; 2337 u32 tracer_flags;
2300 int len = 0;
2301 char *buf;
2302 int r = 0;
2303 int i; 2338 int i;
2304 2339
2305
2306 /* calculate max size */
2307 for (i = 0; trace_options[i]; i++) {
2308 len += strlen(trace_options[i]);
2309 len += 3; /* "no" and newline */
2310 }
2311
2312 mutex_lock(&trace_types_lock); 2340 mutex_lock(&trace_types_lock);
2313 tracer_flags = current_trace->flags->val; 2341 tracer_flags = current_trace->flags->val;
2314 trace_opts = current_trace->flags->opts; 2342 trace_opts = current_trace->flags->opts;
2315 2343
2316 /*
2317 * Increase the size with names of options specific
2318 * of the current tracer.
2319 */
2320 for (i = 0; trace_opts[i].name; i++) {
2321 len += strlen(trace_opts[i].name);
2322 len += 3; /* "no" and newline */
2323 }
2324
2325 /* +1 for \0 */
2326 buf = kmalloc(len + 1, GFP_KERNEL);
2327 if (!buf) {
2328 mutex_unlock(&trace_types_lock);
2329 return -ENOMEM;
2330 }
2331
2332 for (i = 0; trace_options[i]; i++) { 2344 for (i = 0; trace_options[i]; i++) {
2333 if (trace_flags & (1 << i)) 2345 if (trace_flags & (1 << i))
2334 r += sprintf(buf + r, "%s\n", trace_options[i]); 2346 seq_printf(m, "%s\n", trace_options[i]);
2335 else 2347 else
2336 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2348 seq_printf(m, "no%s\n", trace_options[i]);
2337 } 2349 }
2338 2350
2339 for (i = 0; trace_opts[i].name; i++) { 2351 for (i = 0; trace_opts[i].name; i++) {
2340 if (tracer_flags & trace_opts[i].bit) 2352 if (tracer_flags & trace_opts[i].bit)
2341 r += sprintf(buf + r, "%s\n", 2353 seq_printf(m, "%s\n", trace_opts[i].name);
2342 trace_opts[i].name);
2343 else 2354 else
2344 r += sprintf(buf + r, "no%s\n", 2355 seq_printf(m, "no%s\n", trace_opts[i].name);
2345 trace_opts[i].name);
2346 } 2356 }
2347 mutex_unlock(&trace_types_lock); 2357 mutex_unlock(&trace_types_lock);
2348 2358
2349 WARN_ON(r >= len + 1); 2359 return 0;
2360}
2350 2361
2351 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2362static int __set_tracer_option(struct tracer *trace,
2363 struct tracer_flags *tracer_flags,
2364 struct tracer_opt *opts, int neg)
2365{
2366 int ret;
2352 2367
2353 kfree(buf); 2368 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2354 return r; 2369 if (ret)
2370 return ret;
2371
2372 if (neg)
2373 tracer_flags->val &= ~opts->bit;
2374 else
2375 tracer_flags->val |= opts->bit;
2376 return 0;
2355} 2377}
2356 2378
2357/* Try to assign a tracer specific option */ 2379/* Try to assign a tracer specific option */
@@ -2359,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2359{ 2381{
2360 struct tracer_flags *tracer_flags = trace->flags; 2382 struct tracer_flags *tracer_flags = trace->flags;
2361 struct tracer_opt *opts = NULL; 2383 struct tracer_opt *opts = NULL;
2362 int ret = 0, i = 0; 2384 int i;
2363 int len;
2364 2385
2365 for (i = 0; tracer_flags->opts[i].name; i++) { 2386 for (i = 0; tracer_flags->opts[i].name; i++) {
2366 opts = &tracer_flags->opts[i]; 2387 opts = &tracer_flags->opts[i];
2367 len = strlen(opts->name);
2368 2388
2369 if (strncmp(cmp, opts->name, len) == 0) { 2389 if (strcmp(cmp, opts->name) == 0)
2370 ret = trace->set_flag(tracer_flags->val, 2390 return __set_tracer_option(trace, trace->flags,
2371 opts->bit, !neg); 2391 opts, neg);
2372 break;
2373 }
2374 } 2392 }
2375 /* Not found */
2376 if (!tracer_flags->opts[i].name)
2377 return -EINVAL;
2378
2379 /* Refused to handle */
2380 if (ret)
2381 return ret;
2382
2383 if (neg)
2384 tracer_flags->val &= ~opts->bit;
2385 else
2386 tracer_flags->val |= opts->bit;
2387 2393
2388 return 0; 2394 return -EINVAL;
2389} 2395}
2390 2396
2391static void set_tracer_flags(unsigned int mask, int enabled) 2397static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2405,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2405 size_t cnt, loff_t *ppos) 2411 size_t cnt, loff_t *ppos)
2406{ 2412{
2407 char buf[64]; 2413 char buf[64];
2408 char *cmp = buf; 2414 char *cmp;
2409 int neg = 0; 2415 int neg = 0;
2410 int ret; 2416 int ret;
2411 int i; 2417 int i;
@@ -2417,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2417 return -EFAULT; 2423 return -EFAULT;
2418 2424
2419 buf[cnt] = 0; 2425 buf[cnt] = 0;
2426 cmp = strstrip(buf);
2420 2427
2421 if (strncmp(buf, "no", 2) == 0) { 2428 if (strncmp(cmp, "no", 2) == 0) {
2422 neg = 1; 2429 neg = 1;
2423 cmp += 2; 2430 cmp += 2;
2424 } 2431 }
2425 2432
2426 for (i = 0; trace_options[i]; i++) { 2433 for (i = 0; trace_options[i]; i++) {
2427 int len = strlen(trace_options[i]); 2434 if (strcmp(cmp, trace_options[i]) == 0) {
2428
2429 if (strncmp(cmp, trace_options[i], len) == 0) {
2430 set_tracer_flags(1 << i, !neg); 2435 set_tracer_flags(1 << i, !neg);
2431 break; 2436 break;
2432 } 2437 }
@@ -2446,9 +2451,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2446 return cnt; 2451 return cnt;
2447} 2452}
2448 2453
2454static int tracing_trace_options_open(struct inode *inode, struct file *file)
2455{
2456 if (tracing_disabled)
2457 return -ENODEV;
2458 return single_open(file, tracing_trace_options_show, NULL);
2459}
2460
2449static const struct file_operations tracing_iter_fops = { 2461static const struct file_operations tracing_iter_fops = {
2450 .open = tracing_open_generic, 2462 .open = tracing_trace_options_open,
2451 .read = tracing_trace_options_read, 2463 .read = seq_read,
2464 .llseek = seq_lseek,
2465 .release = single_release,
2452 .write = tracing_trace_options_write, 2466 .write = tracing_trace_options_write,
2453}; 2467};
2454 2468
@@ -2898,6 +2912,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2898 else 2912 else
2899 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); 2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2900 2914
2915
2916 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter);
2918
2901 mutex_unlock(&trace_types_lock); 2919 mutex_unlock(&trace_types_lock);
2902 2920
2903 free_cpumask_var(iter->started); 2921 free_cpumask_var(iter->started);
@@ -3104,7 +3122,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3104 __free_page(spd->pages[idx]); 3122 __free_page(spd->pages[idx]);
3105} 3123}
3106 3124
3107static struct pipe_buf_operations tracing_pipe_buf_ops = { 3125static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3108 .can_merge = 0, 3126 .can_merge = 0,
3109 .map = generic_pipe_buf_map, 3127 .map = generic_pipe_buf_map,
3110 .unmap = generic_pipe_buf_unmap, 3128 .unmap = generic_pipe_buf_unmap,
@@ -3320,6 +3338,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3320 return cnt; 3338 return cnt;
3321} 3339}
3322 3340
3341static int mark_printk(const char *fmt, ...)
3342{
3343 int ret;
3344 va_list args;
3345 va_start(args, fmt);
3346 ret = trace_vprintk(0, fmt, args);
3347 va_end(args);
3348 return ret;
3349}
3350
3323static ssize_t 3351static ssize_t
3324tracing_mark_write(struct file *filp, const char __user *ubuf, 3352tracing_mark_write(struct file *filp, const char __user *ubuf,
3325 size_t cnt, loff_t *fpos) 3353 size_t cnt, loff_t *fpos)
@@ -3346,28 +3374,25 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3346 } else 3374 } else
3347 buf[cnt] = '\0'; 3375 buf[cnt] = '\0';
3348 3376
3349 cnt = trace_vprintk(0, buf, NULL); 3377 cnt = mark_printk("%s", buf);
3350 kfree(buf); 3378 kfree(buf);
3351 *fpos += cnt; 3379 *fpos += cnt;
3352 3380
3353 return cnt; 3381 return cnt;
3354} 3382}
3355 3383
3356static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3384static int tracing_clock_show(struct seq_file *m, void *v)
3357 size_t cnt, loff_t *ppos)
3358{ 3385{
3359 char buf[64];
3360 int bufiter = 0;
3361 int i; 3386 int i;
3362 3387
3363 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3388 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3364 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3389 seq_printf(m,
3365 "%s%s%s%s", i ? " " : "", 3390 "%s%s%s%s", i ? " " : "",
3366 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3391 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3367 i == trace_clock_id ? "]" : ""); 3392 i == trace_clock_id ? "]" : "");
3368 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3393 seq_putc(m, '\n');
3369 3394
3370 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3395 return 0;
3371} 3396}
3372 3397
3373static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3398static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3409,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3409 return cnt; 3434 return cnt;
3410} 3435}
3411 3436
3437static int tracing_clock_open(struct inode *inode, struct file *file)
3438{
3439 if (tracing_disabled)
3440 return -ENODEV;
3441 return single_open(file, tracing_clock_show, NULL);
3442}
3443
3412static const struct file_operations tracing_max_lat_fops = { 3444static const struct file_operations tracing_max_lat_fops = {
3413 .open = tracing_open_generic, 3445 .open = tracing_open_generic,
3414 .read = tracing_max_lat_read, 3446 .read = tracing_max_lat_read,
@@ -3447,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
3447}; 3479};
3448 3480
3449static const struct file_operations trace_clock_fops = { 3481static const struct file_operations trace_clock_fops = {
3450 .open = tracing_open_generic, 3482 .open = tracing_clock_open,
3451 .read = tracing_clock_read, 3483 .read = seq_read,
3484 .llseek = seq_lseek,
3485 .release = single_release,
3452 .write = tracing_clock_write, 3486 .write = tracing_clock_write,
3453}; 3487};
3454 3488
@@ -3578,7 +3612,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3578} 3612}
3579 3613
3580/* Pipe buffer operations for a buffer. */ 3614/* Pipe buffer operations for a buffer. */
3581static struct pipe_buf_operations buffer_pipe_buf_ops = { 3615static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3582 .can_merge = 0, 3616 .can_merge = 0,
3583 .map = generic_pipe_buf_map, 3617 .map = generic_pipe_buf_map,
3584 .unmap = generic_pipe_buf_unmap, 3618 .unmap = generic_pipe_buf_unmap,
@@ -3909,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3909 if (ret < 0) 3943 if (ret < 0)
3910 return ret; 3944 return ret;
3911 3945
3912 ret = 0; 3946 if (val != 0 && val != 1)
3913 switch (val) { 3947 return -EINVAL;
3914 case 0:
3915 /* do nothing if already cleared */
3916 if (!(topt->flags->val & topt->opt->bit))
3917 break;
3918
3919 mutex_lock(&trace_types_lock);
3920 if (current_trace->set_flag)
3921 ret = current_trace->set_flag(topt->flags->val,
3922 topt->opt->bit, 0);
3923 mutex_unlock(&trace_types_lock);
3924 if (ret)
3925 return ret;
3926 topt->flags->val &= ~topt->opt->bit;
3927 break;
3928 case 1:
3929 /* do nothing if already set */
3930 if (topt->flags->val & topt->opt->bit)
3931 break;
3932 3948
3949 if (!!(topt->flags->val & topt->opt->bit) != val) {
3933 mutex_lock(&trace_types_lock); 3950 mutex_lock(&trace_types_lock);
3934 if (current_trace->set_flag) 3951 ret = __set_tracer_option(current_trace, topt->flags,
3935 ret = current_trace->set_flag(topt->flags->val, 3952 topt->opt, val);
3936 topt->opt->bit, 1);
3937 mutex_unlock(&trace_types_lock); 3953 mutex_unlock(&trace_types_lock);
3938 if (ret) 3954 if (ret)
3939 return ret; 3955 return ret;
3940 topt->flags->val |= topt->opt->bit;
3941 break;
3942
3943 default:
3944 return -EINVAL;
3945 } 3956 }
3946 3957
3947 *ppos += cnt; 3958 *ppos += cnt;
@@ -4268,8 +4279,8 @@ trace_printk_seq(struct trace_seq *s)
4268 4279
4269static void __ftrace_dump(bool disable_tracing) 4280static void __ftrace_dump(bool disable_tracing)
4270{ 4281{
4271 static raw_spinlock_t ftrace_dump_lock = 4282 static arch_spinlock_t ftrace_dump_lock =
4272 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4283 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4273 /* use static because iter can be a bit big for the stack */ 4284 /* use static because iter can be a bit big for the stack */
4274 static struct trace_iterator iter; 4285 static struct trace_iterator iter;
4275 unsigned int old_userobj; 4286 unsigned int old_userobj;
@@ -4279,7 +4290,7 @@ static void __ftrace_dump(bool disable_tracing)
4279 4290
4280 /* only one dump */ 4291 /* only one dump */
4281 local_irq_save(flags); 4292 local_irq_save(flags);
4282 __raw_spin_lock(&ftrace_dump_lock); 4293 arch_spin_lock(&ftrace_dump_lock);
4283 if (dump_ran) 4294 if (dump_ran)
4284 goto out; 4295 goto out;
4285 4296
@@ -4354,7 +4365,7 @@ static void __ftrace_dump(bool disable_tracing)
4354 } 4365 }
4355 4366
4356 out: 4367 out:
4357 __raw_spin_unlock(&ftrace_dump_lock); 4368 arch_spin_unlock(&ftrace_dump_lock);
4358 local_irq_restore(flags); 4369 local_irq_restore(flags);
4359} 4370}
4360 4371
@@ -4415,7 +4426,7 @@ __init static int tracer_alloc_buffers(void)
4415 /* Allocate the first page for all buffers */ 4426 /* Allocate the first page for all buffers */
4416 for_each_tracing_cpu(i) { 4427 for_each_tracing_cpu(i) {
4417 global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4428 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4418 max_tr.data[i] = &per_cpu(max_data, i); 4429 max_tr.data[i] = &per_cpu(max_tr_data, i);
4419 } 4430 }
4420 4431
4421 trace_init_cmdlines(); 4432 trace_init_cmdlines();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1d7f4830a80d..4df6a77eb196 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,6 +272,7 @@ struct tracer_flags {
272 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
273 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
274 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
275 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
276 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
277 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -290,6 +291,7 @@ struct tracer {
290 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
291 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
292 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
293 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
294 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
295 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -441,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
441 443
442extern int ring_buffer_expanded; 444extern int ring_buffer_expanded;
443extern bool tracing_selftest_disabled; 445extern bool tracing_selftest_disabled;
444DECLARE_PER_CPU(local_t, ftrace_cpu_disabled); 446DECLARE_PER_CPU(int, ftrace_cpu_disabled);
445 447
446#ifdef CONFIG_FTRACE_STARTUP_TEST 448#ifdef CONFIG_FTRACE_STARTUP_TEST
447extern int trace_selftest_startup_function(struct tracer *trace, 449extern int trace_selftest_startup_function(struct tracer *trace,
@@ -595,18 +597,17 @@ enum trace_iterator_flags {
595 TRACE_ITER_BIN = 0x40, 597 TRACE_ITER_BIN = 0x40,
596 TRACE_ITER_BLOCK = 0x80, 598 TRACE_ITER_BLOCK = 0x80,
597 TRACE_ITER_STACKTRACE = 0x100, 599 TRACE_ITER_STACKTRACE = 0x100,
598 TRACE_ITER_SCHED_TREE = 0x200, 600 TRACE_ITER_PRINTK = 0x200,
599 TRACE_ITER_PRINTK = 0x400, 601 TRACE_ITER_PREEMPTONLY = 0x400,
600 TRACE_ITER_PREEMPTONLY = 0x800, 602 TRACE_ITER_BRANCH = 0x800,
601 TRACE_ITER_BRANCH = 0x1000, 603 TRACE_ITER_ANNOTATE = 0x1000,
602 TRACE_ITER_ANNOTATE = 0x2000, 604 TRACE_ITER_USERSTACKTRACE = 0x2000,
603 TRACE_ITER_USERSTACKTRACE = 0x4000, 605 TRACE_ITER_SYM_USEROBJ = 0x4000,
604 TRACE_ITER_SYM_USEROBJ = 0x8000, 606 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
605 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 607 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
606 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 608 TRACE_ITER_LATENCY_FMT = 0x20000,
607 TRACE_ITER_LATENCY_FMT = 0x40000, 609 TRACE_ITER_SLEEP_TIME = 0x40000,
608 TRACE_ITER_SLEEP_TIME = 0x80000, 610 TRACE_ITER_GRAPH_TIME = 0x80000,
609 TRACE_ITER_GRAPH_TIME = 0x100000,
610}; 611};
611 612
612/* 613/*
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f386ba..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -71,10 +71,10 @@ u64 notrace trace_clock(void)
71/* keep prev_time and lock in the same cacheline. */ 71/* keep prev_time and lock in the same cacheline. */
72static struct { 72static struct {
73 u64 prev_time; 73 u64 prev_time;
74 raw_spinlock_t lock; 74 arch_spinlock_t lock;
75} trace_clock_struct ____cacheline_aligned_in_smp = 75} trace_clock_struct ____cacheline_aligned_in_smp =
76 { 76 {
77 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
78 }; 78 };
79 79
80u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
@@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void)
94 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
95 goto out; 95 goto out;
96 96
97 __raw_spin_lock(&trace_clock_struct.lock); 97 arch_spin_lock(&trace_clock_struct.lock);
98 98
99 /* 99 /*
100 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
@@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void)
106 106
107 trace_clock_struct.prev_time = now; 107 trace_clock_struct.prev_time = now;
108 108
109 __raw_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
110 110
111 out: 111 out:
112 raw_local_irq_restore(flags); 112 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
25 char *buf; 25 char *buf;
26 int ret = -ENOMEM; 26 int ret = -ENOMEM;
27 27
28 if (atomic_inc_return(&event->profile_count)) 28 if (event->profile_count++ > 0)
29 return 0; 29 return 0;
30 30
31 if (!total_profile_count) { 31 if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
56 perf_trace_buf = NULL; 56 perf_trace_buf = NULL;
57 } 57 }
58fail_buf: 58fail_buf:
59 atomic_dec(&event->profile_count); 59 event->profile_count--;
60 60
61 return ret; 61 return ret;
62} 62}
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
83{ 83{
84 char *buf, *nmi_buf; 84 char *buf, *nmi_buf;
85 85
86 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (--event->profile_count > 0)
87 return; 87 return;
88 88
89 event->profile_disable(event); 89 event->profile_disable(event);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 78 if (ret) \
79 return ret; 79 return ret;
80 80
81int trace_define_common_fields(struct ftrace_event_call *call) 81static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 82{
83 int ret; 83 int ret;
84 struct trace_entry ent; 84 struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 91
92 return ret; 92 return ret;
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 94
96void trace_destroy_fields(struct ftrace_event_call *call) 95void trace_destroy_fields(struct ftrace_event_call *call)
97{ 96{
@@ -105,9 +104,25 @@ void trace_destroy_fields(struct ftrace_event_call *call)
105 } 104 }
106} 105}
107 106
108static void ftrace_event_enable_disable(struct ftrace_event_call *call, 107int trace_event_raw_init(struct ftrace_event_call *call)
108{
109 int id;
110
111 id = register_ftrace_event(call->event);
112 if (!id)
113 return -ENODEV;
114 call->id = id;
115 INIT_LIST_HEAD(&call->fields);
116
117 return 0;
118}
119EXPORT_SYMBOL_GPL(trace_event_raw_init);
120
121static int ftrace_event_enable_disable(struct ftrace_event_call *call,
109 int enable) 122 int enable)
110{ 123{
124 int ret = 0;
125
111 switch (enable) { 126 switch (enable) {
112 case 0: 127 case 0:
113 if (call->enabled) { 128 if (call->enabled) {
@@ -118,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
118 break; 133 break;
119 case 1: 134 case 1:
120 if (!call->enabled) { 135 if (!call->enabled) {
121 call->enabled = 1;
122 tracing_start_cmdline_record(); 136 tracing_start_cmdline_record();
123 call->regfunc(call); 137 ret = call->regfunc(call);
138 if (ret) {
139 tracing_stop_cmdline_record();
140 pr_info("event trace: Could not enable event "
141 "%s\n", call->name);
142 break;
143 }
144 call->enabled = 1;
124 } 145 }
125 break; 146 break;
126 } 147 }
148
149 return ret;
127} 150}
128 151
129static void ftrace_clear_events(void) 152static void ftrace_clear_events(void)
@@ -402,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
402 case 0: 425 case 0:
403 case 1: 426 case 1:
404 mutex_lock(&event_mutex); 427 mutex_lock(&event_mutex);
405 ftrace_event_enable_disable(call, val); 428 ret = ftrace_event_enable_disable(call, val);
406 mutex_unlock(&event_mutex); 429 mutex_unlock(&event_mutex);
407 break; 430 break;
408 431
@@ -412,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
412 435
413 *ppos += cnt; 436 *ppos += cnt;
414 437
415 return cnt; 438 return ret ? ret : cnt;
416} 439}
417 440
418static ssize_t 441static ssize_t
@@ -913,7 +936,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
913 id); 936 id);
914 937
915 if (call->define_fields) { 938 if (call->define_fields) {
916 ret = call->define_fields(call); 939 ret = trace_define_common_fields(call);
940 if (!ret)
941 ret = call->define_fields(call);
917 if (ret < 0) { 942 if (ret < 0) {
918 pr_warning("Could not initialize trace point" 943 pr_warning("Could not initialize trace point"
919 " events/%s\n", call->name); 944 " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
184 struct struct_name field; \ 184 struct struct_name field; \
185 int ret; \ 185 int ret; \
186 \ 186 \
187 ret = trace_define_common_fields(event_call); \
188 if (ret) \
189 return ret; \
190 \
191 tstruct; \ 187 tstruct; \
192 \ 188 \
193 return ret; \ 189 return ret; \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..b1342c5d37cf 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21};
22
23struct fgraph_data {
24 struct fgraph_cpu_data *cpu_data;
25
26 /* Place to preserve last processed entry. */
27 struct ftrace_graph_ent_entry ent;
28 struct ftrace_graph_ret_entry ret;
29 int failed;
30 int cpu;
20}; 31};
21 32
22#define TRACE_GRAPH_INDENT 2 33#define TRACE_GRAPH_INDENT 2
@@ -176,7 +187,7 @@ static int __trace_graph_entry(struct trace_array *tr,
176 struct ring_buffer *buffer = tr->buffer; 187 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry; 188 struct ftrace_graph_ent_entry *entry;
178 189
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
180 return 0; 191 return 0;
181 192
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -240,7 +251,7 @@ static void __trace_graph_return(struct trace_array *tr,
240 struct ring_buffer *buffer = tr->buffer; 251 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry; 252 struct ftrace_graph_ret_entry *entry;
242 253
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
244 return; 255 return;
245 256
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 395 if (!data)
385 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
386 397
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 398 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 399
389 if (*last_pid == pid) 400 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 401 return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 446get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 447 struct ftrace_graph_ent_entry *curr)
437{ 448{
438 struct ring_buffer_iter *ring_iter; 449 struct fgraph_data *data = iter->private;
450 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 451 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 452 struct ftrace_graph_ret_entry *next;
441 453
442 ring_iter = iter->buffer_iter[iter->cpu]; 454 /*
455 * If the previous output failed to write to the seq buffer,
456 * then we just reuse the data from before.
457 */
458 if (data && data->failed) {
459 curr = &data->ent;
460 next = &data->ret;
461 } else {
443 462
444 /* First peek to compare current entry and the next one */ 463 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 464
446 event = ring_buffer_iter_peek(ring_iter, NULL); 465 /* First peek to compare current entry and the next one */
447 else { 466 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 467 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 468 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 469 /*
451 NULL); 470 * We need to consume the current entry to see
452 } 471 * the next one.
472 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL);
476 }
453 477
454 if (!event) 478 if (!event)
455 return NULL; 479 return NULL;
480
481 next = ring_buffer_event_data(event);
456 482
457 next = ring_buffer_event_data(event); 483 if (data) {
484 /*
485 * Save current and next entries for later reference
486 * if the output fails.
487 */
488 data->ent = *curr;
489 data->ret = *next;
490 }
491 }
458 492
459 if (next->ent.type != TRACE_GRAPH_RET) 493 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 494 return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
640 674
641 if (data) { 675 if (data) {
642 int cpu = iter->cpu; 676 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
688 722
689 if (data) { 723 if (data) {
690 int cpu = iter->cpu; 724 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth); 725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
692 726
693 *depth = call->depth; 727 *depth = call->depth;
694 } 728 }
@@ -782,19 +816,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 817 struct trace_iterator *iter)
784{ 818{
785 int cpu = iter->cpu; 819 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 820 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 821 struct ftrace_graph_ret_entry *leaf_ret;
822 static enum print_line_t ret;
823 int cpu = iter->cpu;
788 824
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 826 return TRACE_TYPE_PARTIAL_LINE;
791 827
792 leaf_ret = get_return_for_leaf(iter, field); 828 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 829 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 831 else
796 return print_graph_entry_nested(iter, field, s, cpu); 832 ret = print_graph_entry_nested(iter, field, s, cpu);
797 833
834 if (data) {
835 /*
836 * If we failed to write our output, then we need to make
837 * note of it. Because we already consumed our entry.
838 */
839 if (s->full) {
840 data->failed = 1;
841 data->cpu = cpu;
842 } else
843 data->failed = 0;
844 }
845
846 return ret;
798} 847}
799 848
800static enum print_line_t 849static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
810 859
811 if (data) { 860 if (data) {
812 int cpu = iter->cpu; 861 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
814 863
815 /* 864 /*
816 * Comments display at + 1 to depth. This is the 865 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 922 int i;
874 923
875 if (data) 924 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 926
878 if (print_graph_prologue(iter, s, 0, 0)) 927 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 928 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 990enum print_line_t
942print_graph_function(struct trace_iterator *iter) 991print_graph_function(struct trace_iterator *iter)
943{ 992{
993 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 995 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 996 struct trace_seq *s = &iter->seq;
997 int cpu = iter->cpu;
998 int ret;
999
1000 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1001 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1002 return TRACE_TYPE_HANDLED;
1003 }
1004
1005 /*
1006 * If the last output failed, there's a possibility we need
1007 * to print out the missing entry which would never go out.
1008 */
1009 if (data && data->failed) {
1010 field = &data->ent;
1011 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME;
1016 }
1017 iter->cpu = cpu;
1018 return ret;
1019 }
946 1020
947 switch (entry->type) { 1021 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1022 case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1026 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1027 * it can be safely saved at the stack.
954 */ 1028 */
955 struct ftrace_graph_ent_entry *field, saved; 1029 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1030 trace_assign_type(field, entry);
957 saved = *field; 1031 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1032 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1104static void graph_trace_open(struct trace_iterator *iter)
1031{ 1105{
1032 /* pid and depth on the last trace processed */ 1106 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1107 struct fgraph_data *data;
1034 int cpu; 1108 int cpu;
1035 1109
1110 iter->private = NULL;
1111
1112 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1113 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1114 goto out_err;
1038 else 1115
1039 for_each_possible_cpu(cpu) { 1116 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1117 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1118 goto out_err_free;
1042 *pid = -1; 1119
1043 *depth = 0; 1120 for_each_possible_cpu(cpu) {
1044 } 1121 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1122 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1123 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1124 *pid = -1;
1125 *depth = 0;
1126 *ignore = 0;
1127 }
1045 1128
1046 iter->private = data; 1129 iter->private = data;
1130
1131 return;
1132
1133 out_err_free:
1134 kfree(data);
1135 out_err:
1136 pr_warning("function graph tracer: not enough memory\n");
1047} 1137}
1048 1138
1049static void graph_trace_close(struct trace_iterator *iter) 1139static void graph_trace_close(struct trace_iterator *iter)
1050{ 1140{
1051 free_percpu(iter->private); 1141 struct fgraph_data *data = iter->private;
1142
1143 if (data) {
1144 free_percpu(data->cpu_data);
1145 kfree(data);
1146 }
1052} 1147}
1053 1148
1054static struct tracer graph_trace __read_mostly = { 1149static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1150 .name = "function_graph",
1056 .open = graph_trace_open, 1151 .open = graph_trace_open,
1152 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1153 .close = graph_trace_close,
1154 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1155 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1156 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1157 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
20 20
21#define BTS_BUFFER_SIZE (1 << 13) 21#define BTS_BUFFER_SIZE (1 << 13)
22 22
23static DEFINE_PER_CPU(struct bts_tracer *, tracer); 23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); 24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25 25
26#define this_tracer per_cpu(tracer, smp_processor_id()) 26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27 27
28static int trace_hw_branches_enabled __read_mostly; 28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly; 29static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
32 32
33static void bts_trace_init_cpu(int cpu) 33static void bts_trace_init_cpu(int cpu)
34{ 34{
35 per_cpu(tracer, cpu) = 35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, 36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 NULL, (size_t)-1, BTS_KERNEL); 37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
38 39
39 if (IS_ERR(per_cpu(tracer, cpu))) 40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
40 per_cpu(tracer, cpu) = NULL; 41 per_cpu(hwb_tracer, cpu) = NULL;
41} 42}
42 43
43static int bts_trace_init(struct trace_array *tr) 44static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
51 for_each_online_cpu(cpu) { 52 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu); 53 bts_trace_init_cpu(cpu);
53 54
54 if (likely(per_cpu(tracer, cpu))) 55 if (likely(per_cpu(hwb_tracer, cpu)))
55 trace_hw_branches_enabled = 1; 56 trace_hw_branches_enabled = 1;
56 } 57 }
57 trace_hw_branches_suspended = 0; 58 trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
67 68
68 get_online_cpus(); 69 get_online_cpus();
69 for_each_online_cpu(cpu) { 70 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) { 71 if (likely(per_cpu(hwb_tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu)); 72 ds_release_bts(per_cpu(hwb_tracer, cpu));
72 per_cpu(tracer, cpu) = NULL; 73 per_cpu(hwb_tracer, cpu) = NULL;
73 } 74 }
74 } 75 }
75 trace_hw_branches_enabled = 0; 76 trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
83 84
84 get_online_cpus(); 85 get_online_cpus();
85 for_each_online_cpu(cpu) 86 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu))) 87 if (likely(per_cpu(hwb_tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu)); 88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
88 trace_hw_branches_suspended = 0; 89 trace_hw_branches_suspended = 0;
89 put_online_cpus(); 90 put_online_cpus();
90} 91}
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
95 96
96 get_online_cpus(); 97 get_online_cpus();
97 for_each_online_cpu(cpu) 98 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu))) 99 if (likely(per_cpu(hwb_tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu)); 100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
100 trace_hw_branches_suspended = 1; 101 trace_hw_branches_suspended = 1;
101 put_online_cpus(); 102 put_online_cpus();
102} 103}
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
114 bts_trace_init_cpu(cpu); 115 bts_trace_init_cpu(cpu);
115 116
116 if (trace_hw_branches_suspended && 117 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu))) 118 likely(per_cpu(hwb_tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu)); 119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
119 } 120 }
120 break; 121 break;
121 122
122 case CPU_DOWN_PREPARE: 123 case CPU_DOWN_PREPARE:
123 /* The notification is sent with interrupts enabled. */ 124 /* The notification is sent with interrupts enabled. */
124 if (likely(per_cpu(tracer, cpu))) { 125 if (likely(per_cpu(hwb_tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu)); 126 ds_release_bts(per_cpu(hwb_tracer, cpu));
126 per_cpu(tracer, cpu) = NULL; 127 per_cpu(hwb_tracer, cpu) = NULL;
127 } 128 }
128 } 129 }
129 130
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
258 259
259 get_online_cpus(); 260 get_online_cpus();
260 for_each_online_cpu(cpu) 261 for_each_online_cpu(cpu)
261 if (likely(per_cpu(tracer, cpu))) 262 if (likely(per_cpu(hwb_tracer, cpu)))
262 ds_suspend_bts(per_cpu(tracer, cpu)); 263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
263 /* 264 /*
264 * We need to collect the trace on the respective cpu since ftrace 265 * We need to collect the trace on the respective cpu since ftrace
265 * implicitly adds the record for the current cpu. 266 * implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
268 on_each_cpu(trace_bts_cpu, iter->tr, 1); 269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
269 270
270 for_each_online_cpu(cpu) 271 for_each_online_cpu(cpu)
271 if (likely(per_cpu(tracer, cpu))) 272 if (likely(per_cpu(hwb_tracer, cpu)))
272 ds_resume_bts(per_cpu(tracer, cpu)); 273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
273 put_online_cpus(); 274 put_online_cpus();
274} 275}
275 276
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..7ecab06547a5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
606 */ 606 */
607 struct trace_probe *tp; 607 struct trace_probe *tp;
608 int i, ret = 0; 608 int i, ret = 0;
609 int is_return = 0; 609 int is_return = 0, is_delete = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0; 611 unsigned long offset = 0;
612 void *addr = NULL; 612 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN]; 613 char buf[MAX_EVENT_NAME_LEN];
614 614
615 if (argc < 2) { 615 /* argc must be >= 1 */
616 pr_info("Probe point is not specified.\n");
617 return -EINVAL;
618 }
619
620 if (argv[0][0] == 'p') 616 if (argv[0][0] == 'p')
621 is_return = 0; 617 is_return = 0;
622 else if (argv[0][0] == 'r') 618 else if (argv[0][0] == 'r')
623 is_return = 1; 619 is_return = 1;
620 else if (argv[0][0] == '-')
621 is_delete = 1;
624 else { 622 else {
625 pr_info("Probe definition must be started with 'p' or 'r'.\n"); 623 pr_info("Probe definition must be started with 'p', 'r' or"
624 " '-'.\n");
626 return -EINVAL; 625 return -EINVAL;
627 } 626 }
628 627
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
642 return -EINVAL; 641 return -EINVAL;
643 } 642 }
644 } 643 }
644 if (!group)
645 group = KPROBE_EVENT_SYSTEM;
645 646
647 if (is_delete) {
648 if (!event) {
649 pr_info("Delete command needs an event name.\n");
650 return -EINVAL;
651 }
652 tp = find_probe_event(event, group);
653 if (!tp) {
654 pr_info("Event %s/%s doesn't exist.\n", group, event);
655 return -ENOENT;
656 }
657 /* delete an event */
658 unregister_trace_probe(tp);
659 free_trace_probe(tp);
660 return 0;
661 }
662
663 if (argc < 2) {
664 pr_info("Probe point is not specified.\n");
665 return -EINVAL;
666 }
646 if (isdigit(argv[1][0])) { 667 if (isdigit(argv[1][0])) {
647 if (is_return) { 668 if (is_return) {
648 pr_info("Return probe point must be a symbol.\n"); 669 pr_info("Return probe point must be a symbol.\n");
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
671 argc -= 2; argv += 2; 692 argc -= 2; argv += 2;
672 693
673 /* setup a probe */ 694 /* setup a probe */
674 if (!group)
675 group = KPROBE_EVENT_SYSTEM;
676 if (!event) { 695 if (!event) {
677 /* Make a new event name */ 696 /* Make a new event name */
678 if (symbol) 697 if (symbol)
@@ -1113,10 +1132,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1113 struct kprobe_trace_entry field; 1132 struct kprobe_trace_entry field;
1114 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1115 1134
1116 ret = trace_define_common_fields(event_call);
1117 if (!ret)
1118 return ret;
1119
1120 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1135 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1121 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); 1136 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1122 /* Set argument names as fields */ 1137 /* Set argument names as fields */
@@ -1131,10 +1146,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1131 struct kretprobe_trace_entry field; 1146 struct kretprobe_trace_entry field;
1132 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1147 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1133 1148
1134 ret = trace_define_common_fields(event_call);
1135 if (!ret)
1136 return ret;
1137
1138 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1149 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1139 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1150 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1140 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); 1151 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
@@ -1434,7 +1445,6 @@ static int register_probe_event(struct trace_probe *tp)
1434 call->unregfunc = probe_event_disable; 1445 call->unregfunc = probe_event_disable;
1435 1446
1436#ifdef CONFIG_EVENT_PROFILE 1447#ifdef CONFIG_EVENT_PROFILE
1437 atomic_set(&call->profile_count, -1);
1438 call->profile_enable = probe_profile_enable; 1448 call->profile_enable = probe_profile_enable;
1439 call->profile_disable = probe_profile_disable; 1449 call->profile_disable = probe_profile_disable;
1440#endif 1450#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..faf37fa4408c 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
79} 79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */ 80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81 81
82void ksym_hbp_handler(struct perf_event *hbp, void *data) 82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
83{ 85{
84 struct ring_buffer_event *event; 86 struct ring_buffer_event *event;
85 struct ksym_trace_entry *entry; 87 struct ksym_trace_entry *entry;
86 struct pt_regs *regs = data;
87 struct ring_buffer *buffer; 88 struct ring_buffer *buffer;
88 int pc; 89 int pc;
89 90
@@ -235,7 +236,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
235 mutex_lock(&ksym_tracer_mutex); 236 mutex_lock(&ksym_tracer_mutex);
236 237
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 238 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); 239 ret = trace_seq_printf(s, "%pS:",
240 (void *)(unsigned long)entry->attr.bp_addr);
239 if (entry->attr.bp_type == HW_BREAKPOINT_R) 241 if (entry->attr.bp_type == HW_BREAKPOINT_R)
240 ret = trace_seq_puts(s, "r--\n"); 242 ret = trace_seq_puts(s, "r--\n");
241 else if (entry->attr.bp_type == HW_BREAKPOINT_W) 243 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
@@ -277,21 +279,20 @@ static ssize_t ksym_trace_filter_write(struct file *file,
277{ 279{
278 struct trace_ksym *entry; 280 struct trace_ksym *entry;
279 struct hlist_node *node; 281 struct hlist_node *node;
280 char *input_string, *ksymname = NULL; 282 char *buf, *input_string, *ksymname = NULL;
281 unsigned long ksym_addr = 0; 283 unsigned long ksym_addr = 0;
282 int ret, op, changed = 0; 284 int ret, op, changed = 0;
283 285
284 input_string = kzalloc(count + 1, GFP_KERNEL); 286 buf = kzalloc(count + 1, GFP_KERNEL);
285 if (!input_string) 287 if (!buf)
286 return -ENOMEM; 288 return -ENOMEM;
287 289
288 if (copy_from_user(input_string, buffer, count)) { 290 ret = -EFAULT;
289 kfree(input_string); 291 if (copy_from_user(buf, buffer, count))
290 return -EFAULT; 292 goto out;
291 }
292 input_string[count] = '\0';
293 293
294 strstrip(input_string); 294 buf[count] = '\0';
295 input_string = strstrip(buf);
295 296
296 /* 297 /*
297 * Clear all breakpoints if: 298 * Clear all breakpoints if:
@@ -299,18 +300,16 @@ static ssize_t ksym_trace_filter_write(struct file *file,
299 * 2: echo 0 > ksym_trace_filter 300 * 2: echo 0 > ksym_trace_filter
300 * 3: echo "*:---" > ksym_trace_filter 301 * 3: echo "*:---" > ksym_trace_filter
301 */ 302 */
302 if (!input_string[0] || !strcmp(input_string, "0") || 303 if (!buf[0] || !strcmp(buf, "0") ||
303 !strcmp(input_string, "*:---")) { 304 !strcmp(buf, "*:---")) {
304 __ksym_trace_reset(); 305 __ksym_trace_reset();
305 kfree(input_string); 306 ret = 0;
306 return count; 307 goto out;
307 } 308 }
308 309
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); 310 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0) { 311 if (ret < 0)
311 kfree(input_string); 312 goto out;
312 return ret;
313 }
314 313
315 mutex_lock(&ksym_tracer_mutex); 314 mutex_lock(&ksym_tracer_mutex);
316 315
@@ -321,7 +320,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
321 if (entry->attr.bp_type != op) 320 if (entry->attr.bp_type != op)
322 changed = 1; 321 changed = 1;
323 else 322 else
324 goto out; 323 goto out_unlock;
325 break; 324 break;
326 } 325 }
327 } 326 }
@@ -336,28 +335,24 @@ static ssize_t ksym_trace_filter_write(struct file *file,
336 if (IS_ERR(entry->ksym_hbp)) 335 if (IS_ERR(entry->ksym_hbp))
337 ret = PTR_ERR(entry->ksym_hbp); 336 ret = PTR_ERR(entry->ksym_hbp);
338 else 337 else
339 goto out; 338 goto out_unlock;
340 } 339 }
341 /* Error or "symbol:---" case: drop it */ 340 /* Error or "symbol:---" case: drop it */
342 ksym_filter_entry_count--; 341 ksym_filter_entry_count--;
343 hlist_del_rcu(&(entry->ksym_hlist)); 342 hlist_del_rcu(&(entry->ksym_hlist));
344 synchronize_rcu(); 343 synchronize_rcu();
345 kfree(entry); 344 kfree(entry);
346 goto out; 345 goto out_unlock;
347 } else { 346 } else {
348 /* Check for malformed request: (4) */ 347 /* Check for malformed request: (4) */
349 if (op == 0) 348 if (op)
350 goto out; 349 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
351 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
352 } 350 }
353out: 351out_unlock:
354 mutex_unlock(&ksym_tracer_mutex); 352 mutex_unlock(&ksym_tracer_mutex);
355 353out:
356 kfree(input_string); 354 kfree(buf);
357 355 return !ret ? count : ret;
358 if (!ret)
359 ret = count;
360 return ret;
361} 356}
362 357
363static const struct file_operations ksym_tracing_fops = { 358static const struct file_operations ksym_tracing_fops = {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
85 va_list ap; 93 va_list ap;
86 int ret; 94 int ret;
87 95
88 if (!len) 96 if (s->full || !len)
89 return 0; 97 return 0;
90 98
91 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
93 va_end(ap); 101 va_end(ap);
94 102
95 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
96 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
97 return 0; 106 return 0;
107 }
98 108
99 s->len += ret; 109 s->len += ret;
100 110
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
119 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
120 int ret; 130 int ret;
121 131
122 if (!len) 132 if (s->full || !len)
123 return 0; 133 return 0;
124 134
125 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
126 136
127 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
128 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
129 return 0; 140 return 0;
141 }
130 142
131 s->len += ret; 143 s->len += ret;
132 144
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
139 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
140 int ret; 152 int ret;
141 153
142 if (!len) 154 if (s->full || !len)
143 return 0; 155 return 0;
144 156
145 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
146 158
147 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
148 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
149 return 0; 162 return 0;
163 }
150 164
151 s->len += ret; 165 s->len += ret;
152 166
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
167{ 181{
168 int len = strlen(str); 182 int len = strlen(str);
169 183
170 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
185 return 0;
186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
171 return 0; 189 return 0;
190 }
172 191
173 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
174 s->len += len; 193 s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
178 197
179int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
180{ 199{
181 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
182 return 0; 201 return 0;
183 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
184 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
185 209
186 return 1; 210 return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
188 212
189int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
190{ 214{
191 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
192 return 0; 216 return 0;
193 217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
220 return 0;
221 }
222
194 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
195 s->len += len; 224 s->len += len;
196 225
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
203 const unsigned char *data = mem; 232 const unsigned char *data = mem;
204 int i, j; 233 int i, j;
205 234
235 if (s->full)
236 return 0;
237
206#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
207 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
208#else 240#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
220{ 252{
221 void *ret; 253 void *ret;
222 254
223 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
224 return NULL; 260 return NULL;
261 }
225 262
226 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
227 s->len += len; 264 s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
233{ 270{
234 unsigned char *p; 271 unsigned char *p;
235 272
236 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
237 return 0; 278 return 0;
279 }
280
238 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
239 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
240 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
247 return 1; 290 return 1;
248 } 291 }
249 292
293 s->full = 1;
250 return 0; 294 return 0;
251} 295}
252 296
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
373 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
374 int ret = 1; 418 int ret = 1;
375 419
420 if (s->full)
421 return 0;
422
376 if (mm) { 423 if (mm) {
377 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
378 425
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index dc98309e839a..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
67 67
68 /* Don't allow flipping of max traces now */ 68 /* Don't allow flipping of max traces now */
69 local_irq_save(flags); 69 local_irq_save(flags);
70 __raw_spin_lock(&ftrace_max_lock); 70 arch_spin_lock(&ftrace_max_lock);
71 71
72 cnt = ring_buffer_entries(tr->buffer); 72 cnt = ring_buffer_entries(tr->buffer);
73 73
@@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85 break; 85 break;
86 } 86 }
87 tracing_on(); 87 tracing_on();
88 __raw_spin_unlock(&ftrace_max_lock); 88 arch_spin_unlock(&ftrace_max_lock);
89 local_irq_restore(flags); 89 local_irq_restore(flags);
90 90
91 if (count) 91 if (count)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 171 return ret;
172 172
173 local_irq_save(flags); 173 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 174 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 175 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 176 arch_spin_unlock(&max_stack_lock);
177 local_irq_restore(flags); 177 local_irq_restore(flags);
178 178
179 return count; 179 return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
207static void *t_start(struct seq_file *m, loff_t *pos) 207static void *t_start(struct seq_file *m, loff_t *pos)
208{ 208{
209 local_irq_disable(); 209 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 210 arch_spin_lock(&max_stack_lock);
211 211
212 if (*pos == 0) 212 if (*pos == 0)
213 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 217
218static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
219{ 219{
220 __raw_spin_unlock(&max_stack_lock); 220 arch_spin_unlock(&max_stack_lock);
221 local_irq_enable(); 221 local_irq_enable();
222} 222}
223 223
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
217 int i; 217 int i;
218 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
219 219
220 ret = trace_define_common_fields(call);
221 if (ret)
222 return ret;
223
224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
225 if (ret) 221 if (ret)
226 return ret; 222 return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
241 struct syscall_trace_exit trace; 237 struct syscall_trace_exit trace;
242 int ret; 238 int ret;
243 239
244 ret = trace_define_common_fields(call);
245 if (ret)
246 return ret;
247
248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
249 if (ret) 241 if (ret)
250 return ret; 242 return ret;
@@ -333,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
333 mutex_lock(&syscall_trace_lock); 325 mutex_lock(&syscall_trace_lock);
334 if (!sys_refcount_enter) 326 if (!sys_refcount_enter)
335 ret = register_trace_sys_enter(ftrace_syscall_enter); 327 ret = register_trace_sys_enter(ftrace_syscall_enter);
336 if (ret) { 328 if (!ret) {
337 pr_info("event trace: Could not activate"
338 "syscall entry trace point");
339 } else {
340 set_bit(num, enabled_enter_syscalls); 329 set_bit(num, enabled_enter_syscalls);
341 sys_refcount_enter++; 330 sys_refcount_enter++;
342 } 331 }
@@ -370,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
370 mutex_lock(&syscall_trace_lock); 359 mutex_lock(&syscall_trace_lock);
371 if (!sys_refcount_exit) 360 if (!sys_refcount_exit)
372 ret = register_trace_sys_exit(ftrace_syscall_exit); 361 ret = register_trace_sys_exit(ftrace_syscall_exit);
373 if (ret) { 362 if (!ret) {
374 pr_info("event trace: Could not activate"
375 "syscall exit trace point");
376 } else {
377 set_bit(num, enabled_exit_syscalls); 363 set_bit(num, enabled_exit_syscalls);
378 sys_refcount_exit++; 364 sys_refcount_exit++;
379 } 365 }