aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c3
-rw-r--r--kernel/audit_tree.c13
-rw-r--r--kernel/auditsc.c1
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cpu.c24
-rw-r--r--kernel/exit.c38
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/futex.c77
-rw-r--r--kernel/hrtimer.c50
-rw-r--r--kernel/hw_breakpoint.c14
-rw-r--r--kernel/irq/autoprobe.c20
-rw-r--r--kernel/irq/chip.c86
-rw-r--r--kernel/irq/handle.c22
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/manage.c50
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/irq/numa_migrate.c8
-rw-r--r--kernel/irq/pm.c8
-rw-r--r--kernel/irq/proc.c4
-rw-r--r--kernel/irq/spurious.c14
-rw-r--r--kernel/kexec.c61
-rw-r--r--kernel/kfifo.c408
-rw-r--r--kernel/kmod.c12
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/ksysfs.c21
-rw-r--r--kernel/kthread.c23
-rw-r--r--kernel/lockdep.c20
-rw-r--r--kernel/module.c58
-rw-r--r--kernel/mutex-debug.h12
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/params.c8
-rw-r--r--kernel/perf_event.c171
-rw-r--r--kernel/pid.c12
-rw-r--r--kernel/power/console.c7
-rw-r--r--kernel/printk.c119
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/resource.c32
-rw-r--r--kernel/rtmutex-debug.c4
-rw-r--r--kernel/rtmutex.c106
-rw-r--r--kernel/sched.c531
-rw-r--r--kernel/sched_clock.c23
-rw-r--r--kernel/sched_cpupri.c10
-rw-r--r--kernel/sched_cpupri.h2
-rw-r--r--kernel/sched_debug.c4
-rw-r--r--kernel/sched_fair.c59
-rw-r--r--kernel/sched_idletask.c4
-rw-r--r--kernel/sched_rt.c64
-rw-r--r--kernel/signal.c66
-rw-r--r--kernel/smp.c37
-rw-r--r--kernel/spinlock.c306
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/sysctl_binary.c38
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/clockevents.c32
-rw-r--r--kernel/time/tick-broadcast.c42
-rw-r--r--kernel/time/tick-common.c20
-rw-r--r--kernel/time/tick-internal.h1
-rw-r--r--kernel/time/timecompare.c2
-rw-r--r--kernel/time/timekeeping.c27
-rw-r--r--kernel/time/timer_list.c10
-rw-r--r--kernel/time/timer_stats.c17
-rw-r--r--kernel/timer.c5
-rw-r--r--kernel/trace/Kconfig112
-rw-r--r--kernel/trace/ftrace.c36
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c49
-rw-r--r--kernel/trace/trace.c238
-rw-r--r--kernel/trace/trace.h23
-rw-r--r--kernel/trace/trace_clock.c8
-rw-r--r--kernel/trace/trace_event_profile.c6
-rw-r--r--kernel/trace/trace_events.c41
-rw-r--r--kernel/trace/trace_events_filter.c29
-rw-r--r--kernel/trace/trace_export.c11
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c47
-rw-r--r--kernel/trace/trace_ksym.c188
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c4
-rw-r--r--kernel/trace/trace_stack.c16
-rw-r--r--kernel/trace/trace_syscalls.c18
-rw-r--r--kernel/trace/trace_sysprof.c1
83 files changed, 2170 insertions, 1543 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 9a4715a2f6bf..a6605ca921b6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -536,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
536 do_div(elapsed, AHZ); 536 do_div(elapsed, AHZ);
537 ac.ac_btime = get_seconds() - elapsed; 537 ac.ac_btime = get_seconds() - elapsed;
538 /* we really need to bite the bullet and change layout */ 538 /* we really need to bite the bullet and change layout */
539 current_uid_gid(&ac.ac_uid, &ac.ac_gid); 539 ac.ac_uid = orig_cred->uid;
540 ac.ac_gid = orig_cred->gid;
540#if ACCT_VERSION==2 541#if ACCT_VERSION==2
541 ac.ac_ahz = AHZ; 542 ac.ac_ahz = AHZ;
542#endif 543#endif
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
277 owner->root = NULL; 277 owner->root = NULL;
278 } 278 }
279 279
280 for (i = j = 0; i < size; i++, j++) { 280 for (i = j = 0; j <= size; i++, j++) {
281 struct audit_tree *s; 281 struct audit_tree *s;
282 if (&chunk->owners[j] == p) { 282 if (&chunk->owners[j] == p) {
283 list_del_init(&p->list); 283 list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
290 if (!s) /* result of earlier fallback */ 290 if (!s) /* result of earlier fallback */
291 continue; 291 continue;
292 get_tree(s); 292 get_tree(s);
293 list_replace_init(&chunk->owners[i].list, &new->owners[j].list); 293 list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
294 } 294 }
295 295
296 list_replace_rcu(&chunk->hash, &new->hash); 296 list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
373 for (n = 0; n < old->count; n++) { 373 for (n = 0; n < old->count; n++) {
374 if (old->owners[n].owner == tree) { 374 if (old->owners[n].owner == tree) {
375 spin_unlock(&hash_lock); 375 spin_unlock(&hash_lock);
376 put_inotify_watch(watch); 376 put_inotify_watch(&old->watch);
377 return 0; 377 return 0;
378 } 378 }
379 } 379 }
380 spin_unlock(&hash_lock); 380 spin_unlock(&hash_lock);
381 381
382 chunk = alloc_chunk(old->count + 1); 382 chunk = alloc_chunk(old->count + 1);
383 if (!chunk) 383 if (!chunk) {
384 put_inotify_watch(&old->watch);
384 return -ENOMEM; 385 return -ENOMEM;
386 }
385 387
386 mutex_lock(&inode->inotify_mutex); 388 mutex_lock(&inode->inotify_mutex);
387 if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { 389 if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
425 spin_unlock(&hash_lock); 427 spin_unlock(&hash_lock);
426 inotify_evict_watch(&old->watch); 428 inotify_evict_watch(&old->watch);
427 mutex_unlock(&inode->inotify_mutex); 429 mutex_unlock(&inode->inotify_mutex);
428 put_inotify_watch(&old->watch); 430 put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
431 put_inotify_watch(&old->watch); /* and kill it */
429 return 0; 432 return 0;
430} 433}
431 434
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 267e484f0198..fc0f928167e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -250,7 +250,6 @@ struct audit_context {
250#endif 250#endif
251}; 251};
252 252
253#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
254static inline int open_arg(int flags, int mask) 253static inline int open_arg(int flags, int mask)
255{ 254{
256 int n = ACC_MODE(flags); 255 int n = ACC_MODE(flags);
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
12 12
13void foo(void) 13void foo(void)
14{ 14{
15 /* The enum constants to put into include/linux/bounds.h */ 15 /* The enum constants to put into include/generated/bounds.h */
16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
18 /* End of constants */ 18 /* End of constants */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0249f4be9b5c..1fbcc748044a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2468,7 +2468,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2468 /* make sure l doesn't vanish out from under us */ 2468 /* make sure l doesn't vanish out from under us */
2469 down_write(&l->mutex); 2469 down_write(&l->mutex);
2470 mutex_unlock(&cgrp->pidlist_mutex); 2470 mutex_unlock(&cgrp->pidlist_mutex);
2471 l->use_count++;
2472 return l; 2471 return l;
2473 } 2472 }
2474 } 2473 }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 291ac586f37f..1c8ddd6ee940 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 cpu_hotplug_begin(); 211 cpu_hotplug_begin();
212 set_cpu_active(cpu, false);
212 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, 213 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
213 hcpu, -1, &nr_calls); 214 hcpu, -1, &nr_calls);
214 if (err == NOTIFY_BAD) { 215 if (err == NOTIFY_BAD) {
@@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu)
280 goto out; 281 goto out;
281 } 282 }
282 283
283 set_cpu_active(cpu, false);
284
285 /*
286 * Make sure the all cpus did the reschedule and are not
287 * using stale version of the cpu_active_mask.
288 * This is not strictly necessary becuase stop_machine()
289 * that we run down the line already provides the required
290 * synchronization. But it's really a side effect and we do not
291 * want to depend on the innards of the stop_machine here.
292 */
293 synchronize_sched();
294
295 err = _cpu_down(cpu, 0); 284 err = _cpu_down(cpu, 0);
296 285
297out: 286out:
@@ -382,19 +371,12 @@ int disable_nonboot_cpus(void)
382 return error; 371 return error;
383 cpu_maps_update_begin(); 372 cpu_maps_update_begin();
384 first_cpu = cpumask_first(cpu_online_mask); 373 first_cpu = cpumask_first(cpu_online_mask);
385 /* We take down all of the non-boot CPUs in one shot to avoid races 374 /*
375 * We take down all of the non-boot CPUs in one shot to avoid races
386 * with the userspace trying to use the CPU hotplug at the same time 376 * with the userspace trying to use the CPU hotplug at the same time
387 */ 377 */
388 cpumask_clear(frozen_cpus); 378 cpumask_clear(frozen_cpus);
389 379
390 for_each_online_cpu(cpu) {
391 if (cpu == first_cpu)
392 continue;
393 set_cpu_active(cpu, false);
394 }
395
396 synchronize_sched();
397
398 printk("Disabling non-boot CPUs ...\n"); 380 printk("Disabling non-boot CPUs ...\n");
399 for_each_online_cpu(cpu) { 381 for_each_online_cpu(cpu) {
400 if (cpu == first_cpu) 382 if (cpu == first_cpu)
diff --git a/kernel/exit.c b/kernel/exit.c
index 6f50ef55a6f3..546774a31a66 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,10 +68,10 @@ static void __unhash_process(struct task_struct *p)
68 detach_pid(p, PIDTYPE_SID); 68 detach_pid(p, PIDTYPE_SID);
69 69
70 list_del_rcu(&p->tasks); 70 list_del_rcu(&p->tasks);
71 list_del_init(&p->sibling);
71 __get_cpu_var(process_counts)--; 72 __get_cpu_var(process_counts)--;
72 } 73 }
73 list_del_rcu(&p->thread_group); 74 list_del_rcu(&p->thread_group);
74 list_del_init(&p->sibling);
75} 75}
76 76
77/* 77/*
@@ -736,12 +736,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
736/* 736/*
737* Any that need to be release_task'd are put on the @dead list. 737* Any that need to be release_task'd are put on the @dead list.
738 */ 738 */
739static void reparent_thread(struct task_struct *father, struct task_struct *p, 739static void reparent_leader(struct task_struct *father, struct task_struct *p,
740 struct list_head *dead) 740 struct list_head *dead)
741{ 741{
742 if (p->pdeath_signal)
743 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
744
745 list_move_tail(&p->sibling, &p->real_parent->children); 742 list_move_tail(&p->sibling, &p->real_parent->children);
746 743
747 if (task_detached(p)) 744 if (task_detached(p))
@@ -780,12 +777,18 @@ static void forget_original_parent(struct task_struct *father)
780 reaper = find_new_reaper(father); 777 reaper = find_new_reaper(father);
781 778
782 list_for_each_entry_safe(p, n, &father->children, sibling) { 779 list_for_each_entry_safe(p, n, &father->children, sibling) {
783 p->real_parent = reaper; 780 struct task_struct *t = p;
784 if (p->parent == father) { 781 do {
785 BUG_ON(task_ptrace(p)); 782 t->real_parent = reaper;
786 p->parent = p->real_parent; 783 if (t->parent == father) {
787 } 784 BUG_ON(task_ptrace(t));
788 reparent_thread(father, p, &dead_children); 785 t->parent = t->real_parent;
786 }
787 if (t->pdeath_signal)
788 group_send_sig_info(t->pdeath_signal,
789 SEND_SIG_NOINFO, t);
790 } while_each_thread(p, t);
791 reparent_leader(father, p, &dead_children);
789 } 792 }
790 write_unlock_irq(&tasklist_lock); 793 write_unlock_irq(&tasklist_lock);
791 794
@@ -933,7 +936,7 @@ NORET_TYPE void do_exit(long code)
933 * an exiting task cleaning up the robust pi futexes. 936 * an exiting task cleaning up the robust pi futexes.
934 */ 937 */
935 smp_mb(); 938 smp_mb();
936 spin_unlock_wait(&tsk->pi_lock); 939 raw_spin_unlock_wait(&tsk->pi_lock);
937 940
938 if (unlikely(in_atomic())) 941 if (unlikely(in_atomic()))
939 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 942 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -1551,14 +1554,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1551 struct task_struct *p; 1554 struct task_struct *p;
1552 1555
1553 list_for_each_entry(p, &tsk->children, sibling) { 1556 list_for_each_entry(p, &tsk->children, sibling) {
1554 /* 1557 int ret = wait_consider_task(wo, 0, p);
1555 * Do not consider detached threads. 1558 if (ret)
1556 */ 1559 return ret;
1557 if (!task_detached(p)) {
1558 int ret = wait_consider_task(wo, 0, p);
1559 if (ret)
1560 return ret;
1561 }
1562 } 1560 }
1563 1561
1564 return 0; 1562 return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1415dc4598ae..5b2959b3ffc2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -939,9 +939,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
939 939
940static void rt_mutex_init_task(struct task_struct *p) 940static void rt_mutex_init_task(struct task_struct *p)
941{ 941{
942 spin_lock_init(&p->pi_lock); 942 raw_spin_lock_init(&p->pi_lock);
943#ifdef CONFIG_RT_MUTEXES 943#ifdef CONFIG_RT_MUTEXES
944 plist_head_init(&p->pi_waiters, &p->pi_lock); 944 plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
945 p->pi_blocked_on = NULL; 945 p->pi_blocked_on = NULL;
946#endif 946#endif
947} 947}
@@ -1127,6 +1127,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1127#ifdef CONFIG_DEBUG_MUTEXES 1127#ifdef CONFIG_DEBUG_MUTEXES
1128 p->blocked_on = NULL; /* not blocked yet */ 1128 p->blocked_on = NULL; /* not blocked yet */
1129#endif 1129#endif
1130#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1131 p->memcg_batch.do_batch = 0;
1132 p->memcg_batch.memcg = NULL;
1133#endif
1130 1134
1131 p->bts = NULL; 1135 p->bts = NULL;
1132 1136
@@ -1206,9 +1210,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1206 p->sas_ss_sp = p->sas_ss_size = 0; 1210 p->sas_ss_sp = p->sas_ss_size = 0;
1207 1211
1208 /* 1212 /*
1209 * Syscall tracing should be turned off in the child regardless 1213 * Syscall tracing and stepping should be turned off in the
1210 * of CLONE_PTRACE. 1214 * child regardless of CLONE_PTRACE.
1211 */ 1215 */
1216 user_disable_single_step(p);
1212 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 1217 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1213#ifdef TIF_SYSCALL_EMU 1218#ifdef TIF_SYSCALL_EMU
1214 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 1219 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1286,7 +1291,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1286 } 1291 }
1287 1292
1288 if (likely(p->pid)) { 1293 if (likely(p->pid)) {
1289 list_add_tail(&p->sibling, &p->real_parent->children);
1290 tracehook_finish_clone(p, clone_flags, trace); 1294 tracehook_finish_clone(p, clone_flags, trace);
1291 1295
1292 if (thread_group_leader(p)) { 1296 if (thread_group_leader(p)) {
@@ -1298,6 +1302,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1298 p->signal->tty = tty_kref_get(current->signal->tty); 1302 p->signal->tty = tty_kref_get(current->signal->tty);
1299 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1303 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1300 attach_pid(p, PIDTYPE_SID, task_session(current)); 1304 attach_pid(p, PIDTYPE_SID, task_session(current));
1305 list_add_tail(&p->sibling, &p->real_parent->children);
1301 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1306 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1302 __get_cpu_var(process_counts)++; 1307 __get_cpu_var(process_counts)++;
1303 } 1308 }
diff --git a/kernel/futex.c b/kernel/futex.c
index d73ef1f3e55d..d9b3a2228f9d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -203,8 +203,6 @@ static void drop_futex_key_refs(union futex_key *key)
203 * @uaddr: virtual address of the futex 203 * @uaddr: virtual address of the futex
204 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 204 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
205 * @key: address where result is stored. 205 * @key: address where result is stored.
206 * @rw: mapping needs to be read/write (values: VERIFY_READ,
207 * VERIFY_WRITE)
208 * 206 *
209 * Returns a negative error code or 0 207 * Returns a negative error code or 0
210 * The key words are stored in *key on success. 208 * The key words are stored in *key on success.
@@ -216,7 +214,7 @@ static void drop_futex_key_refs(union futex_key *key)
216 * lock_page() might sleep, the caller should not hold a spinlock. 214 * lock_page() might sleep, the caller should not hold a spinlock.
217 */ 215 */
218static int 216static int
219get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) 217get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
220{ 218{
221 unsigned long address = (unsigned long)uaddr; 219 unsigned long address = (unsigned long)uaddr;
222 struct mm_struct *mm = current->mm; 220 struct mm_struct *mm = current->mm;
@@ -239,7 +237,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
239 * but access_ok() should be faster than find_vma() 237 * but access_ok() should be faster than find_vma()
240 */ 238 */
241 if (!fshared) { 239 if (!fshared) {
242 if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) 240 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
243 return -EFAULT; 241 return -EFAULT;
244 key->private.mm = mm; 242 key->private.mm = mm;
245 key->private.address = address; 243 key->private.address = address;
@@ -248,7 +246,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
248 } 246 }
249 247
250again: 248again:
251 err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page); 249 err = get_user_pages_fast(address, 1, 1, &page);
252 if (err < 0) 250 if (err < 0)
253 return err; 251 return err;
254 252
@@ -403,9 +401,9 @@ static void free_pi_state(struct futex_pi_state *pi_state)
403 * and has cleaned up the pi_state already 401 * and has cleaned up the pi_state already
404 */ 402 */
405 if (pi_state->owner) { 403 if (pi_state->owner) {
406 spin_lock_irq(&pi_state->owner->pi_lock); 404 raw_spin_lock_irq(&pi_state->owner->pi_lock);
407 list_del_init(&pi_state->list); 405 list_del_init(&pi_state->list);
408 spin_unlock_irq(&pi_state->owner->pi_lock); 406 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
409 407
410 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); 408 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
411 } 409 }
@@ -470,18 +468,18 @@ void exit_pi_state_list(struct task_struct *curr)
470 * pi_state_list anymore, but we have to be careful 468 * pi_state_list anymore, but we have to be careful
471 * versus waiters unqueueing themselves: 469 * versus waiters unqueueing themselves:
472 */ 470 */
473 spin_lock_irq(&curr->pi_lock); 471 raw_spin_lock_irq(&curr->pi_lock);
474 while (!list_empty(head)) { 472 while (!list_empty(head)) {
475 473
476 next = head->next; 474 next = head->next;
477 pi_state = list_entry(next, struct futex_pi_state, list); 475 pi_state = list_entry(next, struct futex_pi_state, list);
478 key = pi_state->key; 476 key = pi_state->key;
479 hb = hash_futex(&key); 477 hb = hash_futex(&key);
480 spin_unlock_irq(&curr->pi_lock); 478 raw_spin_unlock_irq(&curr->pi_lock);
481 479
482 spin_lock(&hb->lock); 480 spin_lock(&hb->lock);
483 481
484 spin_lock_irq(&curr->pi_lock); 482 raw_spin_lock_irq(&curr->pi_lock);
485 /* 483 /*
486 * We dropped the pi-lock, so re-check whether this 484 * We dropped the pi-lock, so re-check whether this
487 * task still owns the PI-state: 485 * task still owns the PI-state:
@@ -495,15 +493,15 @@ void exit_pi_state_list(struct task_struct *curr)
495 WARN_ON(list_empty(&pi_state->list)); 493 WARN_ON(list_empty(&pi_state->list));
496 list_del_init(&pi_state->list); 494 list_del_init(&pi_state->list);
497 pi_state->owner = NULL; 495 pi_state->owner = NULL;
498 spin_unlock_irq(&curr->pi_lock); 496 raw_spin_unlock_irq(&curr->pi_lock);
499 497
500 rt_mutex_unlock(&pi_state->pi_mutex); 498 rt_mutex_unlock(&pi_state->pi_mutex);
501 499
502 spin_unlock(&hb->lock); 500 spin_unlock(&hb->lock);
503 501
504 spin_lock_irq(&curr->pi_lock); 502 raw_spin_lock_irq(&curr->pi_lock);
505 } 503 }
506 spin_unlock_irq(&curr->pi_lock); 504 raw_spin_unlock_irq(&curr->pi_lock);
507} 505}
508 506
509static int 507static int
@@ -558,7 +556,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
558 * change of the task flags, we do this protected by 556 * change of the task flags, we do this protected by
559 * p->pi_lock: 557 * p->pi_lock:
560 */ 558 */
561 spin_lock_irq(&p->pi_lock); 559 raw_spin_lock_irq(&p->pi_lock);
562 if (unlikely(p->flags & PF_EXITING)) { 560 if (unlikely(p->flags & PF_EXITING)) {
563 /* 561 /*
564 * The task is on the way out. When PF_EXITPIDONE is 562 * The task is on the way out. When PF_EXITPIDONE is
@@ -567,7 +565,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
567 */ 565 */
568 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; 566 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
569 567
570 spin_unlock_irq(&p->pi_lock); 568 raw_spin_unlock_irq(&p->pi_lock);
571 put_task_struct(p); 569 put_task_struct(p);
572 return ret; 570 return ret;
573 } 571 }
@@ -586,7 +584,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
586 WARN_ON(!list_empty(&pi_state->list)); 584 WARN_ON(!list_empty(&pi_state->list));
587 list_add(&pi_state->list, &p->pi_state_list); 585 list_add(&pi_state->list, &p->pi_state_list);
588 pi_state->owner = p; 586 pi_state->owner = p;
589 spin_unlock_irq(&p->pi_lock); 587 raw_spin_unlock_irq(&p->pi_lock);
590 588
591 put_task_struct(p); 589 put_task_struct(p);
592 590
@@ -760,7 +758,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
760 if (!pi_state) 758 if (!pi_state)
761 return -EINVAL; 759 return -EINVAL;
762 760
763 spin_lock(&pi_state->pi_mutex.wait_lock); 761 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
764 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 762 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
765 763
766 /* 764 /*
@@ -789,23 +787,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
789 else if (curval != uval) 787 else if (curval != uval)
790 ret = -EINVAL; 788 ret = -EINVAL;
791 if (ret) { 789 if (ret) {
792 spin_unlock(&pi_state->pi_mutex.wait_lock); 790 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
793 return ret; 791 return ret;
794 } 792 }
795 } 793 }
796 794
797 spin_lock_irq(&pi_state->owner->pi_lock); 795 raw_spin_lock_irq(&pi_state->owner->pi_lock);
798 WARN_ON(list_empty(&pi_state->list)); 796 WARN_ON(list_empty(&pi_state->list));
799 list_del_init(&pi_state->list); 797 list_del_init(&pi_state->list);
800 spin_unlock_irq(&pi_state->owner->pi_lock); 798 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
801 799
802 spin_lock_irq(&new_owner->pi_lock); 800 raw_spin_lock_irq(&new_owner->pi_lock);
803 WARN_ON(!list_empty(&pi_state->list)); 801 WARN_ON(!list_empty(&pi_state->list));
804 list_add(&pi_state->list, &new_owner->pi_state_list); 802 list_add(&pi_state->list, &new_owner->pi_state_list);
805 pi_state->owner = new_owner; 803 pi_state->owner = new_owner;
806 spin_unlock_irq(&new_owner->pi_lock); 804 raw_spin_unlock_irq(&new_owner->pi_lock);
807 805
808 spin_unlock(&pi_state->pi_mutex.wait_lock); 806 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
809 rt_mutex_unlock(&pi_state->pi_mutex); 807 rt_mutex_unlock(&pi_state->pi_mutex);
810 808
811 return 0; 809 return 0;
@@ -867,7 +865,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
867 if (!bitset) 865 if (!bitset)
868 return -EINVAL; 866 return -EINVAL;
869 867
870 ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ); 868 ret = get_futex_key(uaddr, fshared, &key);
871 if (unlikely(ret != 0)) 869 if (unlikely(ret != 0))
872 goto out; 870 goto out;
873 871
@@ -913,10 +911,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
913 int ret, op_ret; 911 int ret, op_ret;
914 912
915retry: 913retry:
916 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); 914 ret = get_futex_key(uaddr1, fshared, &key1);
917 if (unlikely(ret != 0)) 915 if (unlikely(ret != 0))
918 goto out; 916 goto out;
919 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); 917 ret = get_futex_key(uaddr2, fshared, &key2);
920 if (unlikely(ret != 0)) 918 if (unlikely(ret != 0))
921 goto out_put_key1; 919 goto out_put_key1;
922 920
@@ -1010,7 +1008,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1010 plist_add(&q->list, &hb2->chain); 1008 plist_add(&q->list, &hb2->chain);
1011 q->lock_ptr = &hb2->lock; 1009 q->lock_ptr = &hb2->lock;
1012#ifdef CONFIG_DEBUG_PI_LIST 1010#ifdef CONFIG_DEBUG_PI_LIST
1013 q->list.plist.lock = &hb2->lock; 1011 q->list.plist.spinlock = &hb2->lock;
1014#endif 1012#endif
1015 } 1013 }
1016 get_futex_key_refs(key2); 1014 get_futex_key_refs(key2);
@@ -1046,7 +1044,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1046 1044
1047 q->lock_ptr = &hb->lock; 1045 q->lock_ptr = &hb->lock;
1048#ifdef CONFIG_DEBUG_PI_LIST 1046#ifdef CONFIG_DEBUG_PI_LIST
1049 q->list.plist.lock = &hb->lock; 1047 q->list.plist.spinlock = &hb->lock;
1050#endif 1048#endif
1051 1049
1052 wake_up_state(q->task, TASK_NORMAL); 1050 wake_up_state(q->task, TASK_NORMAL);
@@ -1175,11 +1173,10 @@ retry:
1175 pi_state = NULL; 1173 pi_state = NULL;
1176 } 1174 }
1177 1175
1178 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); 1176 ret = get_futex_key(uaddr1, fshared, &key1);
1179 if (unlikely(ret != 0)) 1177 if (unlikely(ret != 0))
1180 goto out; 1178 goto out;
1181 ret = get_futex_key(uaddr2, fshared, &key2, 1179 ret = get_futex_key(uaddr2, fshared, &key2);
1182 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1183 if (unlikely(ret != 0)) 1180 if (unlikely(ret != 0))
1184 goto out_put_key1; 1181 goto out_put_key1;
1185 1182
@@ -1394,7 +1391,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1394 1391
1395 plist_node_init(&q->list, prio); 1392 plist_node_init(&q->list, prio);
1396#ifdef CONFIG_DEBUG_PI_LIST 1393#ifdef CONFIG_DEBUG_PI_LIST
1397 q->list.plist.lock = &hb->lock; 1394 q->list.plist.spinlock = &hb->lock;
1398#endif 1395#endif
1399 plist_add(&q->list, &hb->chain); 1396 plist_add(&q->list, &hb->chain);
1400 q->task = current; 1397 q->task = current;
@@ -1529,18 +1526,18 @@ retry:
1529 * itself. 1526 * itself.
1530 */ 1527 */
1531 if (pi_state->owner != NULL) { 1528 if (pi_state->owner != NULL) {
1532 spin_lock_irq(&pi_state->owner->pi_lock); 1529 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1533 WARN_ON(list_empty(&pi_state->list)); 1530 WARN_ON(list_empty(&pi_state->list));
1534 list_del_init(&pi_state->list); 1531 list_del_init(&pi_state->list);
1535 spin_unlock_irq(&pi_state->owner->pi_lock); 1532 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1536 } 1533 }
1537 1534
1538 pi_state->owner = newowner; 1535 pi_state->owner = newowner;
1539 1536
1540 spin_lock_irq(&newowner->pi_lock); 1537 raw_spin_lock_irq(&newowner->pi_lock);
1541 WARN_ON(!list_empty(&pi_state->list)); 1538 WARN_ON(!list_empty(&pi_state->list));
1542 list_add(&pi_state->list, &newowner->pi_state_list); 1539 list_add(&pi_state->list, &newowner->pi_state_list);
1543 spin_unlock_irq(&newowner->pi_lock); 1540 raw_spin_unlock_irq(&newowner->pi_lock);
1544 return 0; 1541 return 0;
1545 1542
1546 /* 1543 /*
@@ -1738,7 +1735,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
1738 */ 1735 */
1739retry: 1736retry:
1740 q->key = FUTEX_KEY_INIT; 1737 q->key = FUTEX_KEY_INIT;
1741 ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); 1738 ret = get_futex_key(uaddr, fshared, &q->key);
1742 if (unlikely(ret != 0)) 1739 if (unlikely(ret != 0))
1743 return ret; 1740 return ret;
1744 1741
@@ -1904,7 +1901,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1904 q.requeue_pi_key = NULL; 1901 q.requeue_pi_key = NULL;
1905retry: 1902retry:
1906 q.key = FUTEX_KEY_INIT; 1903 q.key = FUTEX_KEY_INIT;
1907 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); 1904 ret = get_futex_key(uaddr, fshared, &q.key);
1908 if (unlikely(ret != 0)) 1905 if (unlikely(ret != 0))
1909 goto out; 1906 goto out;
1910 1907
@@ -2023,7 +2020,7 @@ retry:
2023 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2020 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
2024 return -EPERM; 2021 return -EPERM;
2025 2022
2026 ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE); 2023 ret = get_futex_key(uaddr, fshared, &key);
2027 if (unlikely(ret != 0)) 2024 if (unlikely(ret != 0))
2028 goto out; 2025 goto out;
2029 2026
@@ -2215,7 +2212,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2215 rt_waiter.task = NULL; 2212 rt_waiter.task = NULL;
2216 2213
2217 key2 = FUTEX_KEY_INIT; 2214 key2 = FUTEX_KEY_INIT;
2218 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); 2215 ret = get_futex_key(uaddr2, fshared, &key2);
2219 if (unlikely(ret != 0)) 2216 if (unlikely(ret != 0))
2220 goto out; 2217 goto out;
2221 2218
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d2f9239dc6ba..0086628b6e97 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
127 for (;;) { 127 for (;;) {
128 base = timer->base; 128 base = timer->base;
129 if (likely(base != NULL)) { 129 if (likely(base != NULL)) {
130 spin_lock_irqsave(&base->cpu_base->lock, *flags); 130 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
131 if (likely(base == timer->base)) 131 if (likely(base == timer->base))
132 return base; 132 return base;
133 /* The timer has migrated to another CPU: */ 133 /* The timer has migrated to another CPU: */
134 spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 134 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
135 } 135 }
136 cpu_relax(); 136 cpu_relax();
137 } 137 }
@@ -208,13 +208,13 @@ again:
208 208
209 /* See the comment in lock_timer_base() */ 209 /* See the comment in lock_timer_base() */
210 timer->base = NULL; 210 timer->base = NULL;
211 spin_unlock(&base->cpu_base->lock); 211 raw_spin_unlock(&base->cpu_base->lock);
212 spin_lock(&new_base->cpu_base->lock); 212 raw_spin_lock(&new_base->cpu_base->lock);
213 213
214 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 214 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
215 cpu = this_cpu; 215 cpu = this_cpu;
216 spin_unlock(&new_base->cpu_base->lock); 216 raw_spin_unlock(&new_base->cpu_base->lock);
217 spin_lock(&base->cpu_base->lock); 217 raw_spin_lock(&base->cpu_base->lock);
218 timer->base = base; 218 timer->base = base;
219 goto again; 219 goto again;
220 } 220 }
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
230{ 230{
231 struct hrtimer_clock_base *base = timer->base; 231 struct hrtimer_clock_base *base = timer->base;
232 232
233 spin_lock_irqsave(&base->cpu_base->lock, *flags); 233 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
234 234
235 return base; 235 return base;
236} 236}
@@ -628,12 +628,12 @@ static void retrigger_next_event(void *arg)
628 base = &__get_cpu_var(hrtimer_bases); 628 base = &__get_cpu_var(hrtimer_bases);
629 629
630 /* Adjust CLOCK_REALTIME offset */ 630 /* Adjust CLOCK_REALTIME offset */
631 spin_lock(&base->lock); 631 raw_spin_lock(&base->lock);
632 base->clock_base[CLOCK_REALTIME].offset = 632 base->clock_base[CLOCK_REALTIME].offset =
633 timespec_to_ktime(realtime_offset); 633 timespec_to_ktime(realtime_offset);
634 634
635 hrtimer_force_reprogram(base, 0); 635 hrtimer_force_reprogram(base, 0);
636 spin_unlock(&base->lock); 636 raw_spin_unlock(&base->lock);
637} 637}
638 638
639/* 639/*
@@ -694,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
694{ 694{
695 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { 695 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
696 if (wakeup) { 696 if (wakeup) {
697 spin_unlock(&base->cpu_base->lock); 697 raw_spin_unlock(&base->cpu_base->lock);
698 raise_softirq_irqoff(HRTIMER_SOFTIRQ); 698 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
699 spin_lock(&base->cpu_base->lock); 699 raw_spin_lock(&base->cpu_base->lock);
700 } else 700 } else
701 __raise_softirq_irqoff(HRTIMER_SOFTIRQ); 701 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
702 702
@@ -790,7 +790,7 @@ static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
790static inline 790static inline
791void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 791void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
792{ 792{
793 spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 793 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
794} 794}
795 795
796/** 796/**
@@ -1123,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
1123 unsigned long flags; 1123 unsigned long flags;
1124 int i; 1124 int i;
1125 1125
1126 spin_lock_irqsave(&cpu_base->lock, flags); 1126 raw_spin_lock_irqsave(&cpu_base->lock, flags);
1127 1127
1128 if (!hrtimer_hres_active()) { 1128 if (!hrtimer_hres_active()) {
1129 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 1129 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
@@ -1140,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void)
1140 } 1140 }
1141 } 1141 }
1142 1142
1143 spin_unlock_irqrestore(&cpu_base->lock, flags); 1143 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1144 1144
1145 if (mindelta.tv64 < 0) 1145 if (mindelta.tv64 < 0)
1146 mindelta.tv64 = 0; 1146 mindelta.tv64 = 0;
@@ -1222,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1222 * they get migrated to another cpu, therefore its safe to unlock 1222 * they get migrated to another cpu, therefore its safe to unlock
1223 * the timer base. 1223 * the timer base.
1224 */ 1224 */
1225 spin_unlock(&cpu_base->lock); 1225 raw_spin_unlock(&cpu_base->lock);
1226 trace_hrtimer_expire_entry(timer, now); 1226 trace_hrtimer_expire_entry(timer, now);
1227 restart = fn(timer); 1227 restart = fn(timer);
1228 trace_hrtimer_expire_exit(timer); 1228 trace_hrtimer_expire_exit(timer);
1229 spin_lock(&cpu_base->lock); 1229 raw_spin_lock(&cpu_base->lock);
1230 1230
1231 /* 1231 /*
1232 * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1232 * Note: We clear the CALLBACK bit after enqueue_hrtimer and
@@ -1261,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1261retry: 1261retry:
1262 expires_next.tv64 = KTIME_MAX; 1262 expires_next.tv64 = KTIME_MAX;
1263 1263
1264 spin_lock(&cpu_base->lock); 1264 raw_spin_lock(&cpu_base->lock);
1265 /* 1265 /*
1266 * We set expires_next to KTIME_MAX here with cpu_base->lock 1266 * We set expires_next to KTIME_MAX here with cpu_base->lock
1267 * held to prevent that a timer is enqueued in our queue via 1267 * held to prevent that a timer is enqueued in our queue via
@@ -1317,7 +1317,7 @@ retry:
1317 * against it. 1317 * against it.
1318 */ 1318 */
1319 cpu_base->expires_next = expires_next; 1319 cpu_base->expires_next = expires_next;
1320 spin_unlock(&cpu_base->lock); 1320 raw_spin_unlock(&cpu_base->lock);
1321 1321
1322 /* Reprogramming necessary ? */ 1322 /* Reprogramming necessary ? */
1323 if (expires_next.tv64 == KTIME_MAX || 1323 if (expires_next.tv64 == KTIME_MAX ||
@@ -1457,7 +1457,7 @@ void hrtimer_run_queues(void)
1457 gettime = 0; 1457 gettime = 0;
1458 } 1458 }
1459 1459
1460 spin_lock(&cpu_base->lock); 1460 raw_spin_lock(&cpu_base->lock);
1461 1461
1462 while ((node = base->first)) { 1462 while ((node = base->first)) {
1463 struct hrtimer *timer; 1463 struct hrtimer *timer;
@@ -1469,7 +1469,7 @@ void hrtimer_run_queues(void)
1469 1469
1470 __run_hrtimer(timer, &base->softirq_time); 1470 __run_hrtimer(timer, &base->softirq_time);
1471 } 1471 }
1472 spin_unlock(&cpu_base->lock); 1472 raw_spin_unlock(&cpu_base->lock);
1473 } 1473 }
1474} 1474}
1475 1475
@@ -1625,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1625 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 1625 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1626 int i; 1626 int i;
1627 1627
1628 spin_lock_init(&cpu_base->lock); 1628 raw_spin_lock_init(&cpu_base->lock);
1629 1629
1630 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 1630 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1631 cpu_base->clock_base[i].cpu_base = cpu_base; 1631 cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1683,16 +1683,16 @@ static void migrate_hrtimers(int scpu)
1683 * The caller is globally serialized and nobody else 1683 * The caller is globally serialized and nobody else
1684 * takes two locks at once, deadlock is not possible. 1684 * takes two locks at once, deadlock is not possible.
1685 */ 1685 */
1686 spin_lock(&new_base->lock); 1686 raw_spin_lock(&new_base->lock);
1687 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1687 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1688 1688
1689 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1689 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1690 migrate_hrtimer_list(&old_base->clock_base[i], 1690 migrate_hrtimer_list(&old_base->clock_base[i],
1691 &new_base->clock_base[i]); 1691 &new_base->clock_base[i]);
1692 } 1692 }
1693 1693
1694 spin_unlock(&old_base->lock); 1694 raw_spin_unlock(&old_base->lock);
1695 spin_unlock(&new_base->lock); 1695 raw_spin_unlock(&new_base->lock);
1696 1696
1697 /* Check, if we got expired work to do */ 1697 /* Check, if we got expired work to do */
1698 __hrtimer_peek_ahead_timers(); 1698 __hrtimer_peek_ahead_timers();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 366eedf949c0..50dbd5999588 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,6 +40,7 @@
40#include <linux/percpu.h> 40#include <linux/percpu.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/cpu.h>
43#include <linux/smp.h> 44#include <linux/smp.h>
44 45
45#include <linux/hw_breakpoint.h> 46#include <linux/hw_breakpoint.h>
@@ -96,7 +97,7 @@ static int task_bp_pinned(struct task_struct *tsk)
96 97
97 list = &ctx->event_list; 98 list = &ctx->event_list;
98 99
99 spin_lock_irqsave(&ctx->lock, flags); 100 raw_spin_lock_irqsave(&ctx->lock, flags);
100 101
101 /* 102 /*
102 * The current breakpoint counter is not included in the list 103 * The current breakpoint counter is not included in the list
@@ -107,7 +108,7 @@ static int task_bp_pinned(struct task_struct *tsk)
107 count++; 108 count++;
108 } 109 }
109 110
110 spin_unlock_irqrestore(&ctx->lock, flags); 111 raw_spin_unlock_irqrestore(&ctx->lock, flags);
111 112
112 return count; 113 return count;
113} 114}
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
388 if (!cpu_events) 389 if (!cpu_events)
389 return ERR_PTR(-ENOMEM); 390 return ERR_PTR(-ENOMEM);
390 391
391 for_each_possible_cpu(cpu) { 392 get_online_cpus();
393 for_each_online_cpu(cpu) {
392 pevent = per_cpu_ptr(cpu_events, cpu); 394 pevent = per_cpu_ptr(cpu_events, cpu);
393 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); 395 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
394 396
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
399 goto fail; 401 goto fail;
400 } 402 }
401 } 403 }
404 put_online_cpus();
402 405
403 return cpu_events; 406 return cpu_events;
404 407
405fail: 408fail:
406 for_each_possible_cpu(cpu) { 409 for_each_online_cpu(cpu) {
407 pevent = per_cpu_ptr(cpu_events, cpu); 410 pevent = per_cpu_ptr(cpu_events, cpu);
408 if (IS_ERR(*pevent)) 411 if (IS_ERR(*pevent))
409 break; 412 break;
410 unregister_hw_breakpoint(*pevent); 413 unregister_hw_breakpoint(*pevent);
411 } 414 }
415 put_online_cpus();
416
412 free_percpu(cpu_events); 417 free_percpu(cpu_events);
413 /* return the error if any */
414 return ERR_PTR(err); 418 return ERR_PTR(err);
415} 419}
416EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 420EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 1de9700f416e..2295a31ef110 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -45,7 +45,7 @@ unsigned long probe_irq_on(void)
45 * flush such a longstanding irq before considering it as spurious. 45 * flush such a longstanding irq before considering it as spurious.
46 */ 46 */
47 for_each_irq_desc_reverse(i, desc) { 47 for_each_irq_desc_reverse(i, desc) {
48 spin_lock_irq(&desc->lock); 48 raw_spin_lock_irq(&desc->lock);
49 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 49 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
50 /* 50 /*
51 * An old-style architecture might still have 51 * An old-style architecture might still have
@@ -61,7 +61,7 @@ unsigned long probe_irq_on(void)
61 desc->chip->set_type(i, IRQ_TYPE_PROBE); 61 desc->chip->set_type(i, IRQ_TYPE_PROBE);
62 desc->chip->startup(i); 62 desc->chip->startup(i);
63 } 63 }
64 spin_unlock_irq(&desc->lock); 64 raw_spin_unlock_irq(&desc->lock);
65 } 65 }
66 66
67 /* Wait for longstanding interrupts to trigger. */ 67 /* Wait for longstanding interrupts to trigger. */
@@ -73,13 +73,13 @@ unsigned long probe_irq_on(void)
73 * happened in the previous stage, it may have masked itself) 73 * happened in the previous stage, it may have masked itself)
74 */ 74 */
75 for_each_irq_desc_reverse(i, desc) { 75 for_each_irq_desc_reverse(i, desc) {
76 spin_lock_irq(&desc->lock); 76 raw_spin_lock_irq(&desc->lock);
77 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 77 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
78 desc->status |= IRQ_AUTODETECT | IRQ_WAITING; 78 desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
79 if (desc->chip->startup(i)) 79 if (desc->chip->startup(i))
80 desc->status |= IRQ_PENDING; 80 desc->status |= IRQ_PENDING;
81 } 81 }
82 spin_unlock_irq(&desc->lock); 82 raw_spin_unlock_irq(&desc->lock);
83 } 83 }
84 84
85 /* 85 /*
@@ -91,7 +91,7 @@ unsigned long probe_irq_on(void)
91 * Now filter out any obviously spurious interrupts 91 * Now filter out any obviously spurious interrupts
92 */ 92 */
93 for_each_irq_desc(i, desc) { 93 for_each_irq_desc(i, desc) {
94 spin_lock_irq(&desc->lock); 94 raw_spin_lock_irq(&desc->lock);
95 status = desc->status; 95 status = desc->status;
96 96
97 if (status & IRQ_AUTODETECT) { 97 if (status & IRQ_AUTODETECT) {
@@ -103,7 +103,7 @@ unsigned long probe_irq_on(void)
103 if (i < 32) 103 if (i < 32)
104 mask |= 1 << i; 104 mask |= 1 << i;
105 } 105 }
106 spin_unlock_irq(&desc->lock); 106 raw_spin_unlock_irq(&desc->lock);
107 } 107 }
108 108
109 return mask; 109 return mask;
@@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val)
129 int i; 129 int i;
130 130
131 for_each_irq_desc(i, desc) { 131 for_each_irq_desc(i, desc) {
132 spin_lock_irq(&desc->lock); 132 raw_spin_lock_irq(&desc->lock);
133 status = desc->status; 133 status = desc->status;
134 134
135 if (status & IRQ_AUTODETECT) { 135 if (status & IRQ_AUTODETECT) {
@@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val)
139 desc->status = status & ~IRQ_AUTODETECT; 139 desc->status = status & ~IRQ_AUTODETECT;
140 desc->chip->shutdown(i); 140 desc->chip->shutdown(i);
141 } 141 }
142 spin_unlock_irq(&desc->lock); 142 raw_spin_unlock_irq(&desc->lock);
143 } 143 }
144 mutex_unlock(&probing_active); 144 mutex_unlock(&probing_active);
145 145
@@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val)
171 unsigned int status; 171 unsigned int status;
172 172
173 for_each_irq_desc(i, desc) { 173 for_each_irq_desc(i, desc) {
174 spin_lock_irq(&desc->lock); 174 raw_spin_lock_irq(&desc->lock);
175 status = desc->status; 175 status = desc->status;
176 176
177 if (status & IRQ_AUTODETECT) { 177 if (status & IRQ_AUTODETECT) {
@@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val)
183 desc->status = status & ~IRQ_AUTODETECT; 183 desc->status = status & ~IRQ_AUTODETECT;
184 desc->chip->shutdown(i); 184 desc->chip->shutdown(i);
185 } 185 }
186 spin_unlock_irq(&desc->lock); 186 raw_spin_unlock_irq(&desc->lock);
187 } 187 }
188 mutex_unlock(&probing_active); 188 mutex_unlock(&probing_active);
189 189
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ba566c261adc..ecc3fa28f666 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq)
34 } 34 }
35 35
36 /* Ensure we don't have left over values from a previous use of this irq */ 36 /* Ensure we don't have left over values from a previous use of this irq */
37 spin_lock_irqsave(&desc->lock, flags); 37 raw_spin_lock_irqsave(&desc->lock, flags);
38 desc->status = IRQ_DISABLED; 38 desc->status = IRQ_DISABLED;
39 desc->chip = &no_irq_chip; 39 desc->chip = &no_irq_chip;
40 desc->handle_irq = handle_bad_irq; 40 desc->handle_irq = handle_bad_irq;
@@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq)
51 cpumask_clear(desc->pending_mask); 51 cpumask_clear(desc->pending_mask);
52#endif 52#endif
53#endif 53#endif
54 spin_unlock_irqrestore(&desc->lock, flags); 54 raw_spin_unlock_irqrestore(&desc->lock, flags);
55} 55}
56 56
57/** 57/**
@@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq)
68 return; 68 return;
69 } 69 }
70 70
71 spin_lock_irqsave(&desc->lock, flags); 71 raw_spin_lock_irqsave(&desc->lock, flags);
72 if (desc->action) { 72 if (desc->action) {
73 spin_unlock_irqrestore(&desc->lock, flags); 73 raw_spin_unlock_irqrestore(&desc->lock, flags);
74 WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", 74 WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
75 irq); 75 irq);
76 return; 76 return;
@@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq)
82 desc->chip = &no_irq_chip; 82 desc->chip = &no_irq_chip;
83 desc->name = NULL; 83 desc->name = NULL;
84 clear_kstat_irqs(desc); 84 clear_kstat_irqs(desc);
85 spin_unlock_irqrestore(&desc->lock, flags); 85 raw_spin_unlock_irqrestore(&desc->lock, flags);
86} 86}
87 87
88 88
@@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
104 if (!chip) 104 if (!chip)
105 chip = &no_irq_chip; 105 chip = &no_irq_chip;
106 106
107 spin_lock_irqsave(&desc->lock, flags); 107 raw_spin_lock_irqsave(&desc->lock, flags);
108 irq_chip_set_defaults(chip); 108 irq_chip_set_defaults(chip);
109 desc->chip = chip; 109 desc->chip = chip;
110 spin_unlock_irqrestore(&desc->lock, flags); 110 raw_spin_unlock_irqrestore(&desc->lock, flags);
111 111
112 return 0; 112 return 0;
113} 113}
@@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type)
133 if (type == IRQ_TYPE_NONE) 133 if (type == IRQ_TYPE_NONE)
134 return 0; 134 return 0;
135 135
136 spin_lock_irqsave(&desc->lock, flags); 136 raw_spin_lock_irqsave(&desc->lock, flags);
137 ret = __irq_set_trigger(desc, irq, type); 137 ret = __irq_set_trigger(desc, irq, type);
138 spin_unlock_irqrestore(&desc->lock, flags); 138 raw_spin_unlock_irqrestore(&desc->lock, flags);
139 return ret; 139 return ret;
140} 140}
141EXPORT_SYMBOL(set_irq_type); 141EXPORT_SYMBOL(set_irq_type);
@@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data)
158 return -EINVAL; 158 return -EINVAL;
159 } 159 }
160 160
161 spin_lock_irqsave(&desc->lock, flags); 161 raw_spin_lock_irqsave(&desc->lock, flags);
162 desc->handler_data = data; 162 desc->handler_data = data;
163 spin_unlock_irqrestore(&desc->lock, flags); 163 raw_spin_unlock_irqrestore(&desc->lock, flags);
164 return 0; 164 return 0;
165} 165}
166EXPORT_SYMBOL(set_irq_data); 166EXPORT_SYMBOL(set_irq_data);
@@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
183 return -EINVAL; 183 return -EINVAL;
184 } 184 }
185 185
186 spin_lock_irqsave(&desc->lock, flags); 186 raw_spin_lock_irqsave(&desc->lock, flags);
187 desc->msi_desc = entry; 187 desc->msi_desc = entry;
188 if (entry) 188 if (entry)
189 entry->irq = irq; 189 entry->irq = irq;
190 spin_unlock_irqrestore(&desc->lock, flags); 190 raw_spin_unlock_irqrestore(&desc->lock, flags);
191 return 0; 191 return 0;
192} 192}
193 193
@@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data)
214 return -EINVAL; 214 return -EINVAL;
215 } 215 }
216 216
217 spin_lock_irqsave(&desc->lock, flags); 217 raw_spin_lock_irqsave(&desc->lock, flags);
218 desc->chip_data = data; 218 desc->chip_data = data;
219 spin_unlock_irqrestore(&desc->lock, flags); 219 raw_spin_unlock_irqrestore(&desc->lock, flags);
220 220
221 return 0; 221 return 0;
222} 222}
@@ -241,12 +241,12 @@ void set_irq_nested_thread(unsigned int irq, int nest)
241 if (!desc) 241 if (!desc)
242 return; 242 return;
243 243
244 spin_lock_irqsave(&desc->lock, flags); 244 raw_spin_lock_irqsave(&desc->lock, flags);
245 if (nest) 245 if (nest)
246 desc->status |= IRQ_NESTED_THREAD; 246 desc->status |= IRQ_NESTED_THREAD;
247 else 247 else
248 desc->status &= ~IRQ_NESTED_THREAD; 248 desc->status &= ~IRQ_NESTED_THREAD;
249 spin_unlock_irqrestore(&desc->lock, flags); 249 raw_spin_unlock_irqrestore(&desc->lock, flags);
250} 250}
251EXPORT_SYMBOL_GPL(set_irq_nested_thread); 251EXPORT_SYMBOL_GPL(set_irq_nested_thread);
252 252
@@ -343,7 +343,7 @@ void handle_nested_irq(unsigned int irq)
343 343
344 might_sleep(); 344 might_sleep();
345 345
346 spin_lock_irq(&desc->lock); 346 raw_spin_lock_irq(&desc->lock);
347 347
348 kstat_incr_irqs_this_cpu(irq, desc); 348 kstat_incr_irqs_this_cpu(irq, desc);
349 349
@@ -352,17 +352,17 @@ void handle_nested_irq(unsigned int irq)
352 goto out_unlock; 352 goto out_unlock;
353 353
354 desc->status |= IRQ_INPROGRESS; 354 desc->status |= IRQ_INPROGRESS;
355 spin_unlock_irq(&desc->lock); 355 raw_spin_unlock_irq(&desc->lock);
356 356
357 action_ret = action->thread_fn(action->irq, action->dev_id); 357 action_ret = action->thread_fn(action->irq, action->dev_id);
358 if (!noirqdebug) 358 if (!noirqdebug)
359 note_interrupt(irq, desc, action_ret); 359 note_interrupt(irq, desc, action_ret);
360 360
361 spin_lock_irq(&desc->lock); 361 raw_spin_lock_irq(&desc->lock);
362 desc->status &= ~IRQ_INPROGRESS; 362 desc->status &= ~IRQ_INPROGRESS;
363 363
364out_unlock: 364out_unlock:
365 spin_unlock_irq(&desc->lock); 365 raw_spin_unlock_irq(&desc->lock);
366} 366}
367EXPORT_SYMBOL_GPL(handle_nested_irq); 367EXPORT_SYMBOL_GPL(handle_nested_irq);
368 368
@@ -384,7 +384,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
384 struct irqaction *action; 384 struct irqaction *action;
385 irqreturn_t action_ret; 385 irqreturn_t action_ret;
386 386
387 spin_lock(&desc->lock); 387 raw_spin_lock(&desc->lock);
388 388
389 if (unlikely(desc->status & IRQ_INPROGRESS)) 389 if (unlikely(desc->status & IRQ_INPROGRESS))
390 goto out_unlock; 390 goto out_unlock;
@@ -396,16 +396,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
396 goto out_unlock; 396 goto out_unlock;
397 397
398 desc->status |= IRQ_INPROGRESS; 398 desc->status |= IRQ_INPROGRESS;
399 spin_unlock(&desc->lock); 399 raw_spin_unlock(&desc->lock);
400 400
401 action_ret = handle_IRQ_event(irq, action); 401 action_ret = handle_IRQ_event(irq, action);
402 if (!noirqdebug) 402 if (!noirqdebug)
403 note_interrupt(irq, desc, action_ret); 403 note_interrupt(irq, desc, action_ret);
404 404
405 spin_lock(&desc->lock); 405 raw_spin_lock(&desc->lock);
406 desc->status &= ~IRQ_INPROGRESS; 406 desc->status &= ~IRQ_INPROGRESS;
407out_unlock: 407out_unlock:
408 spin_unlock(&desc->lock); 408 raw_spin_unlock(&desc->lock);
409} 409}
410 410
411/** 411/**
@@ -424,7 +424,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
424 struct irqaction *action; 424 struct irqaction *action;
425 irqreturn_t action_ret; 425 irqreturn_t action_ret;
426 426
427 spin_lock(&desc->lock); 427 raw_spin_lock(&desc->lock);
428 mask_ack_irq(desc, irq); 428 mask_ack_irq(desc, irq);
429 429
430 if (unlikely(desc->status & IRQ_INPROGRESS)) 430 if (unlikely(desc->status & IRQ_INPROGRESS))
@@ -441,13 +441,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
441 goto out_unlock; 441 goto out_unlock;
442 442
443 desc->status |= IRQ_INPROGRESS; 443 desc->status |= IRQ_INPROGRESS;
444 spin_unlock(&desc->lock); 444 raw_spin_unlock(&desc->lock);
445 445
446 action_ret = handle_IRQ_event(irq, action); 446 action_ret = handle_IRQ_event(irq, action);
447 if (!noirqdebug) 447 if (!noirqdebug)
448 note_interrupt(irq, desc, action_ret); 448 note_interrupt(irq, desc, action_ret);
449 449
450 spin_lock(&desc->lock); 450 raw_spin_lock(&desc->lock);
451 desc->status &= ~IRQ_INPROGRESS; 451 desc->status &= ~IRQ_INPROGRESS;
452 452
453 if (unlikely(desc->status & IRQ_ONESHOT)) 453 if (unlikely(desc->status & IRQ_ONESHOT))
@@ -455,7 +455,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
455 else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) 455 else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
456 desc->chip->unmask(irq); 456 desc->chip->unmask(irq);
457out_unlock: 457out_unlock:
458 spin_unlock(&desc->lock); 458 raw_spin_unlock(&desc->lock);
459} 459}
460EXPORT_SYMBOL_GPL(handle_level_irq); 460EXPORT_SYMBOL_GPL(handle_level_irq);
461 461
@@ -475,7 +475,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
475 struct irqaction *action; 475 struct irqaction *action;
476 irqreturn_t action_ret; 476 irqreturn_t action_ret;
477 477
478 spin_lock(&desc->lock); 478 raw_spin_lock(&desc->lock);
479 479
480 if (unlikely(desc->status & IRQ_INPROGRESS)) 480 if (unlikely(desc->status & IRQ_INPROGRESS))
481 goto out; 481 goto out;
@@ -497,18 +497,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
497 497
498 desc->status |= IRQ_INPROGRESS; 498 desc->status |= IRQ_INPROGRESS;
499 desc->status &= ~IRQ_PENDING; 499 desc->status &= ~IRQ_PENDING;
500 spin_unlock(&desc->lock); 500 raw_spin_unlock(&desc->lock);
501 501
502 action_ret = handle_IRQ_event(irq, action); 502 action_ret = handle_IRQ_event(irq, action);
503 if (!noirqdebug) 503 if (!noirqdebug)
504 note_interrupt(irq, desc, action_ret); 504 note_interrupt(irq, desc, action_ret);
505 505
506 spin_lock(&desc->lock); 506 raw_spin_lock(&desc->lock);
507 desc->status &= ~IRQ_INPROGRESS; 507 desc->status &= ~IRQ_INPROGRESS;
508out: 508out:
509 desc->chip->eoi(irq); 509 desc->chip->eoi(irq);
510 510
511 spin_unlock(&desc->lock); 511 raw_spin_unlock(&desc->lock);
512} 512}
513 513
514/** 514/**
@@ -530,7 +530,7 @@ out:
530void 530void
531handle_edge_irq(unsigned int irq, struct irq_desc *desc) 531handle_edge_irq(unsigned int irq, struct irq_desc *desc)
532{ 532{
533 spin_lock(&desc->lock); 533 raw_spin_lock(&desc->lock);
534 534
535 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 535 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
536 536
@@ -576,17 +576,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
576 } 576 }
577 577
578 desc->status &= ~IRQ_PENDING; 578 desc->status &= ~IRQ_PENDING;
579 spin_unlock(&desc->lock); 579 raw_spin_unlock(&desc->lock);
580 action_ret = handle_IRQ_event(irq, action); 580 action_ret = handle_IRQ_event(irq, action);
581 if (!noirqdebug) 581 if (!noirqdebug)
582 note_interrupt(irq, desc, action_ret); 582 note_interrupt(irq, desc, action_ret);
583 spin_lock(&desc->lock); 583 raw_spin_lock(&desc->lock);
584 584
585 } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); 585 } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
586 586
587 desc->status &= ~IRQ_INPROGRESS; 587 desc->status &= ~IRQ_INPROGRESS;
588out_unlock: 588out_unlock:
589 spin_unlock(&desc->lock); 589 raw_spin_unlock(&desc->lock);
590} 590}
591 591
592/** 592/**
@@ -643,7 +643,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
643 } 643 }
644 644
645 chip_bus_lock(irq, desc); 645 chip_bus_lock(irq, desc);
646 spin_lock_irqsave(&desc->lock, flags); 646 raw_spin_lock_irqsave(&desc->lock, flags);
647 647
648 /* Uninstall? */ 648 /* Uninstall? */
649 if (handle == handle_bad_irq) { 649 if (handle == handle_bad_irq) {
@@ -661,7 +661,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
661 desc->depth = 0; 661 desc->depth = 0;
662 desc->chip->startup(irq); 662 desc->chip->startup(irq);
663 } 663 }
664 spin_unlock_irqrestore(&desc->lock, flags); 664 raw_spin_unlock_irqrestore(&desc->lock, flags);
665 chip_bus_sync_unlock(irq, desc); 665 chip_bus_sync_unlock(irq, desc);
666} 666}
667EXPORT_SYMBOL_GPL(__set_irq_handler); 667EXPORT_SYMBOL_GPL(__set_irq_handler);
@@ -692,9 +692,9 @@ void __init set_irq_noprobe(unsigned int irq)
692 return; 692 return;
693 } 693 }
694 694
695 spin_lock_irqsave(&desc->lock, flags); 695 raw_spin_lock_irqsave(&desc->lock, flags);
696 desc->status |= IRQ_NOPROBE; 696 desc->status |= IRQ_NOPROBE;
697 spin_unlock_irqrestore(&desc->lock, flags); 697 raw_spin_unlock_irqrestore(&desc->lock, flags);
698} 698}
699 699
700void __init set_irq_probe(unsigned int irq) 700void __init set_irq_probe(unsigned int irq)
@@ -707,7 +707,7 @@ void __init set_irq_probe(unsigned int irq)
707 return; 707 return;
708 } 708 }
709 709
710 spin_lock_irqsave(&desc->lock, flags); 710 raw_spin_lock_irqsave(&desc->lock, flags);
711 desc->status &= ~IRQ_NOPROBE; 711 desc->status &= ~IRQ_NOPROBE;
712 spin_unlock_irqrestore(&desc->lock, flags); 712 raw_spin_unlock_irqrestore(&desc->lock, flags);
713} 713}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 17c71bb565c6..814940e7f485 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -80,7 +80,7 @@ static struct irq_desc irq_desc_init = {
80 .chip = &no_irq_chip, 80 .chip = &no_irq_chip,
81 .handle_irq = handle_bad_irq, 81 .handle_irq = handle_bad_irq,
82 .depth = 1, 82 .depth = 1,
83 .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), 83 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
84}; 84};
85 85
86void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) 86void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
@@ -108,7 +108,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
108{ 108{
109 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); 109 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
110 110
111 spin_lock_init(&desc->lock); 111 raw_spin_lock_init(&desc->lock);
112 desc->irq = irq; 112 desc->irq = irq;
113#ifdef CONFIG_SMP 113#ifdef CONFIG_SMP
114 desc->node = node; 114 desc->node = node;
@@ -130,7 +130,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
130/* 130/*
131 * Protect the sparse_irqs: 131 * Protect the sparse_irqs:
132 */ 132 */
133DEFINE_SPINLOCK(sparse_irq_lock); 133DEFINE_RAW_SPINLOCK(sparse_irq_lock);
134 134
135struct irq_desc **irq_desc_ptrs __read_mostly; 135struct irq_desc **irq_desc_ptrs __read_mostly;
136 136
@@ -141,7 +141,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
141 .chip = &no_irq_chip, 141 .chip = &no_irq_chip,
142 .handle_irq = handle_bad_irq, 142 .handle_irq = handle_bad_irq,
143 .depth = 1, 143 .depth = 1,
144 .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), 144 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
145 } 145 }
146}; 146};
147 147
@@ -212,7 +212,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
212 if (desc) 212 if (desc)
213 return desc; 213 return desc;
214 214
215 spin_lock_irqsave(&sparse_irq_lock, flags); 215 raw_spin_lock_irqsave(&sparse_irq_lock, flags);
216 216
217 /* We have to check it to avoid races with another CPU */ 217 /* We have to check it to avoid races with another CPU */
218 desc = irq_desc_ptrs[irq]; 218 desc = irq_desc_ptrs[irq];
@@ -234,7 +234,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
234 irq_desc_ptrs[irq] = desc; 234 irq_desc_ptrs[irq] = desc;
235 235
236out_unlock: 236out_unlock:
237 spin_unlock_irqrestore(&sparse_irq_lock, flags); 237 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
238 238
239 return desc; 239 return desc;
240} 240}
@@ -247,7 +247,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
247 .chip = &no_irq_chip, 247 .chip = &no_irq_chip,
248 .handle_irq = handle_bad_irq, 248 .handle_irq = handle_bad_irq,
249 .depth = 1, 249 .depth = 1,
250 .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), 250 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
251 } 251 }
252}; 252};
253 253
@@ -473,7 +473,7 @@ unsigned int __do_IRQ(unsigned int irq)
473 return 1; 473 return 1;
474 } 474 }
475 475
476 spin_lock(&desc->lock); 476 raw_spin_lock(&desc->lock);
477 if (desc->chip->ack) 477 if (desc->chip->ack)
478 desc->chip->ack(irq); 478 desc->chip->ack(irq);
479 /* 479 /*
@@ -517,13 +517,13 @@ unsigned int __do_IRQ(unsigned int irq)
517 for (;;) { 517 for (;;) {
518 irqreturn_t action_ret; 518 irqreturn_t action_ret;
519 519
520 spin_unlock(&desc->lock); 520 raw_spin_unlock(&desc->lock);
521 521
522 action_ret = handle_IRQ_event(irq, action); 522 action_ret = handle_IRQ_event(irq, action);
523 if (!noirqdebug) 523 if (!noirqdebug)
524 note_interrupt(irq, desc, action_ret); 524 note_interrupt(irq, desc, action_ret);
525 525
526 spin_lock(&desc->lock); 526 raw_spin_lock(&desc->lock);
527 if (likely(!(desc->status & IRQ_PENDING))) 527 if (likely(!(desc->status & IRQ_PENDING)))
528 break; 528 break;
529 desc->status &= ~IRQ_PENDING; 529 desc->status &= ~IRQ_PENDING;
@@ -536,7 +536,7 @@ out:
536 * disabled while the handler was running. 536 * disabled while the handler was running.
537 */ 537 */
538 desc->chip->end(irq); 538 desc->chip->end(irq);
539 spin_unlock(&desc->lock); 539 raw_spin_unlock(&desc->lock);
540 540
541 return 1; 541 return 1;
542} 542}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 1b5d742c6a77..b2821f070a3d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
18extern struct lock_class_key irq_desc_lock_class; 18extern struct lock_class_key irq_desc_lock_class;
19extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); 19extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
20extern void clear_kstat_irqs(struct irq_desc *desc); 20extern void clear_kstat_irqs(struct irq_desc *desc);
21extern spinlock_t sparse_irq_lock; 21extern raw_spinlock_t sparse_irq_lock;
22 22
23#ifdef CONFIG_SPARSE_IRQ 23#ifdef CONFIG_SPARSE_IRQ
24/* irq_desc_ptrs allocated at boot time */ 24/* irq_desc_ptrs allocated at boot time */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7305b297d1eb..eb6078ca60c7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq)
46 cpu_relax(); 46 cpu_relax();
47 47
48 /* Ok, that indicated we're done: double-check carefully. */ 48 /* Ok, that indicated we're done: double-check carefully. */
49 spin_lock_irqsave(&desc->lock, flags); 49 raw_spin_lock_irqsave(&desc->lock, flags);
50 status = desc->status; 50 status = desc->status;
51 spin_unlock_irqrestore(&desc->lock, flags); 51 raw_spin_unlock_irqrestore(&desc->lock, flags);
52 52
53 /* Oops, that failed? */ 53 /* Oops, that failed? */
54 } while (status & IRQ_INPROGRESS); 54 } while (status & IRQ_INPROGRESS);
@@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
114 if (!desc->chip->set_affinity) 114 if (!desc->chip->set_affinity)
115 return -EINVAL; 115 return -EINVAL;
116 116
117 spin_lock_irqsave(&desc->lock, flags); 117 raw_spin_lock_irqsave(&desc->lock, flags);
118 118
119#ifdef CONFIG_GENERIC_PENDING_IRQ 119#ifdef CONFIG_GENERIC_PENDING_IRQ
120 if (desc->status & IRQ_MOVE_PCNTXT) { 120 if (desc->status & IRQ_MOVE_PCNTXT) {
@@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
134 } 134 }
135#endif 135#endif
136 desc->status |= IRQ_AFFINITY_SET; 136 desc->status |= IRQ_AFFINITY_SET;
137 spin_unlock_irqrestore(&desc->lock, flags); 137 raw_spin_unlock_irqrestore(&desc->lock, flags);
138 return 0; 138 return 0;
139} 139}
140 140
@@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq)
181 unsigned long flags; 181 unsigned long flags;
182 int ret; 182 int ret;
183 183
184 spin_lock_irqsave(&desc->lock, flags); 184 raw_spin_lock_irqsave(&desc->lock, flags);
185 ret = setup_affinity(irq, desc); 185 ret = setup_affinity(irq, desc);
186 if (!ret) 186 if (!ret)
187 irq_set_thread_affinity(desc); 187 irq_set_thread_affinity(desc);
188 spin_unlock_irqrestore(&desc->lock, flags); 188 raw_spin_unlock_irqrestore(&desc->lock, flags);
189 189
190 return ret; 190 return ret;
191} 191}
@@ -231,9 +231,9 @@ void disable_irq_nosync(unsigned int irq)
231 return; 231 return;
232 232
233 chip_bus_lock(irq, desc); 233 chip_bus_lock(irq, desc);
234 spin_lock_irqsave(&desc->lock, flags); 234 raw_spin_lock_irqsave(&desc->lock, flags);
235 __disable_irq(desc, irq, false); 235 __disable_irq(desc, irq, false);
236 spin_unlock_irqrestore(&desc->lock, flags); 236 raw_spin_unlock_irqrestore(&desc->lock, flags);
237 chip_bus_sync_unlock(irq, desc); 237 chip_bus_sync_unlock(irq, desc);
238} 238}
239EXPORT_SYMBOL(disable_irq_nosync); 239EXPORT_SYMBOL(disable_irq_nosync);
@@ -308,9 +308,9 @@ void enable_irq(unsigned int irq)
308 return; 308 return;
309 309
310 chip_bus_lock(irq, desc); 310 chip_bus_lock(irq, desc);
311 spin_lock_irqsave(&desc->lock, flags); 311 raw_spin_lock_irqsave(&desc->lock, flags);
312 __enable_irq(desc, irq, false); 312 __enable_irq(desc, irq, false);
313 spin_unlock_irqrestore(&desc->lock, flags); 313 raw_spin_unlock_irqrestore(&desc->lock, flags);
314 chip_bus_sync_unlock(irq, desc); 314 chip_bus_sync_unlock(irq, desc);
315} 315}
316EXPORT_SYMBOL(enable_irq); 316EXPORT_SYMBOL(enable_irq);
@@ -347,7 +347,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
347 /* wakeup-capable irqs can be shared between drivers that 347 /* wakeup-capable irqs can be shared between drivers that
348 * don't need to have the same sleep mode behaviors. 348 * don't need to have the same sleep mode behaviors.
349 */ 349 */
350 spin_lock_irqsave(&desc->lock, flags); 350 raw_spin_lock_irqsave(&desc->lock, flags);
351 if (on) { 351 if (on) {
352 if (desc->wake_depth++ == 0) { 352 if (desc->wake_depth++ == 0) {
353 ret = set_irq_wake_real(irq, on); 353 ret = set_irq_wake_real(irq, on);
@@ -368,7 +368,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
368 } 368 }
369 } 369 }
370 370
371 spin_unlock_irqrestore(&desc->lock, flags); 371 raw_spin_unlock_irqrestore(&desc->lock, flags);
372 return ret; 372 return ret;
373} 373}
374EXPORT_SYMBOL(set_irq_wake); 374EXPORT_SYMBOL(set_irq_wake);
@@ -484,12 +484,12 @@ static int irq_wait_for_interrupt(struct irqaction *action)
484static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) 484static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
485{ 485{
486 chip_bus_lock(irq, desc); 486 chip_bus_lock(irq, desc);
487 spin_lock_irq(&desc->lock); 487 raw_spin_lock_irq(&desc->lock);
488 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { 488 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
489 desc->status &= ~IRQ_MASKED; 489 desc->status &= ~IRQ_MASKED;
490 desc->chip->unmask(irq); 490 desc->chip->unmask(irq);
491 } 491 }
492 spin_unlock_irq(&desc->lock); 492 raw_spin_unlock_irq(&desc->lock);
493 chip_bus_sync_unlock(irq, desc); 493 chip_bus_sync_unlock(irq, desc);
494} 494}
495 495
@@ -514,9 +514,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
514 return; 514 return;
515 } 515 }
516 516
517 spin_lock_irq(&desc->lock); 517 raw_spin_lock_irq(&desc->lock);
518 cpumask_copy(mask, desc->affinity); 518 cpumask_copy(mask, desc->affinity);
519 spin_unlock_irq(&desc->lock); 519 raw_spin_unlock_irq(&desc->lock);
520 520
521 set_cpus_allowed_ptr(current, mask); 521 set_cpus_allowed_ptr(current, mask);
522 free_cpumask_var(mask); 522 free_cpumask_var(mask);
@@ -545,7 +545,7 @@ static int irq_thread(void *data)
545 545
546 atomic_inc(&desc->threads_active); 546 atomic_inc(&desc->threads_active);
547 547
548 spin_lock_irq(&desc->lock); 548 raw_spin_lock_irq(&desc->lock);
549 if (unlikely(desc->status & IRQ_DISABLED)) { 549 if (unlikely(desc->status & IRQ_DISABLED)) {
550 /* 550 /*
551 * CHECKME: We might need a dedicated 551 * CHECKME: We might need a dedicated
@@ -555,9 +555,9 @@ static int irq_thread(void *data)
555 * retriggers the interrupt itself --- tglx 555 * retriggers the interrupt itself --- tglx
556 */ 556 */
557 desc->status |= IRQ_PENDING; 557 desc->status |= IRQ_PENDING;
558 spin_unlock_irq(&desc->lock); 558 raw_spin_unlock_irq(&desc->lock);
559 } else { 559 } else {
560 spin_unlock_irq(&desc->lock); 560 raw_spin_unlock_irq(&desc->lock);
561 561
562 action->thread_fn(action->irq, action->dev_id); 562 action->thread_fn(action->irq, action->dev_id);
563 563
@@ -679,7 +679,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
679 /* 679 /*
680 * The following block of code has to be executed atomically 680 * The following block of code has to be executed atomically
681 */ 681 */
682 spin_lock_irqsave(&desc->lock, flags); 682 raw_spin_lock_irqsave(&desc->lock, flags);
683 old_ptr = &desc->action; 683 old_ptr = &desc->action;
684 old = *old_ptr; 684 old = *old_ptr;
685 if (old) { 685 if (old) {
@@ -775,7 +775,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
775 __enable_irq(desc, irq, false); 775 __enable_irq(desc, irq, false);
776 } 776 }
777 777
778 spin_unlock_irqrestore(&desc->lock, flags); 778 raw_spin_unlock_irqrestore(&desc->lock, flags);
779 779
780 /* 780 /*
781 * Strictly no need to wake it up, but hung_task complains 781 * Strictly no need to wake it up, but hung_task complains
@@ -802,7 +802,7 @@ mismatch:
802 ret = -EBUSY; 802 ret = -EBUSY;
803 803
804out_thread: 804out_thread:
805 spin_unlock_irqrestore(&desc->lock, flags); 805 raw_spin_unlock_irqrestore(&desc->lock, flags);
806 if (new->thread) { 806 if (new->thread) {
807 struct task_struct *t = new->thread; 807 struct task_struct *t = new->thread;
808 808
@@ -844,7 +844,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
844 if (!desc) 844 if (!desc)
845 return NULL; 845 return NULL;
846 846
847 spin_lock_irqsave(&desc->lock, flags); 847 raw_spin_lock_irqsave(&desc->lock, flags);
848 848
849 /* 849 /*
850 * There can be multiple actions per IRQ descriptor, find the right 850 * There can be multiple actions per IRQ descriptor, find the right
@@ -856,7 +856,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
856 856
857 if (!action) { 857 if (!action) {
858 WARN(1, "Trying to free already-free IRQ %d\n", irq); 858 WARN(1, "Trying to free already-free IRQ %d\n", irq);
859 spin_unlock_irqrestore(&desc->lock, flags); 859 raw_spin_unlock_irqrestore(&desc->lock, flags);
860 860
861 return NULL; 861 return NULL;
862 } 862 }
@@ -884,7 +884,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
884 desc->chip->disable(irq); 884 desc->chip->disable(irq);
885 } 885 }
886 886
887 spin_unlock_irqrestore(&desc->lock, flags); 887 raw_spin_unlock_irqrestore(&desc->lock, flags);
888 888
889 unregister_handler_proc(irq, action); 889 unregister_handler_proc(irq, action);
890 890
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index fcb6c96f2627..241962280836 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -27,7 +27,7 @@ void move_masked_irq(int irq)
27 if (!desc->chip->set_affinity) 27 if (!desc->chip->set_affinity)
28 return; 28 return;
29 29
30 assert_spin_locked(&desc->lock); 30 assert_raw_spin_locked(&desc->lock);
31 31
32 /* 32 /*
33 * If there was a valid mask to work with, please 33 * If there was a valid mask to work with, please
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 3fd30197da2e..26bac9d8f860 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
42 "for migration.\n", irq); 42 "for migration.\n", irq);
43 return false; 43 return false;
44 } 44 }
45 spin_lock_init(&desc->lock); 45 raw_spin_lock_init(&desc->lock);
46 desc->node = node; 46 desc->node = node;
47 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 47 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
48 init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); 48 init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
@@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
67 67
68 irq = old_desc->irq; 68 irq = old_desc->irq;
69 69
70 spin_lock_irqsave(&sparse_irq_lock, flags); 70 raw_spin_lock_irqsave(&sparse_irq_lock, flags);
71 71
72 /* We have to check it to avoid races with another CPU */ 72 /* We have to check it to avoid races with another CPU */
73 desc = irq_desc_ptrs[irq]; 73 desc = irq_desc_ptrs[irq];
@@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
91 } 91 }
92 92
93 irq_desc_ptrs[irq] = desc; 93 irq_desc_ptrs[irq] = desc;
94 spin_unlock_irqrestore(&sparse_irq_lock, flags); 94 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
95 95
96 /* free the old one */ 96 /* free the old one */
97 free_one_irq_desc(old_desc, desc); 97 free_one_irq_desc(old_desc, desc);
@@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
100 return desc; 100 return desc;
101 101
102out_unlock: 102out_unlock:
103 spin_unlock_irqrestore(&sparse_irq_lock, flags); 103 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
104 104
105 return desc; 105 return desc;
106} 106}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index a0bb09e79867..0d4005d85b03 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -28,9 +28,9 @@ void suspend_device_irqs(void)
28 for_each_irq_desc(irq, desc) { 28 for_each_irq_desc(irq, desc) {
29 unsigned long flags; 29 unsigned long flags;
30 30
31 spin_lock_irqsave(&desc->lock, flags); 31 raw_spin_lock_irqsave(&desc->lock, flags);
32 __disable_irq(desc, irq, true); 32 __disable_irq(desc, irq, true);
33 spin_unlock_irqrestore(&desc->lock, flags); 33 raw_spin_unlock_irqrestore(&desc->lock, flags);
34 } 34 }
35 35
36 for_each_irq_desc(irq, desc) 36 for_each_irq_desc(irq, desc)
@@ -56,9 +56,9 @@ void resume_device_irqs(void)
56 if (!(desc->status & IRQ_SUSPENDED)) 56 if (!(desc->status & IRQ_SUSPENDED))
57 continue; 57 continue;
58 58
59 spin_lock_irqsave(&desc->lock, flags); 59 raw_spin_lock_irqsave(&desc->lock, flags);
60 __enable_irq(desc, irq, true); 60 __enable_irq(desc, irq, true);
61 spin_unlock_irqrestore(&desc->lock, flags); 61 raw_spin_unlock_irqrestore(&desc->lock, flags);
62 } 62 }
63} 63}
64EXPORT_SYMBOL_GPL(resume_device_irqs); 64EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145fea97..6f50eccc79c0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -179,7 +179,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
179 unsigned long flags; 179 unsigned long flags;
180 int ret = 1; 180 int ret = 1;
181 181
182 spin_lock_irqsave(&desc->lock, flags); 182 raw_spin_lock_irqsave(&desc->lock, flags);
183 for (action = desc->action ; action; action = action->next) { 183 for (action = desc->action ; action; action = action->next) {
184 if ((action != new_action) && action->name && 184 if ((action != new_action) && action->name &&
185 !strcmp(new_action->name, action->name)) { 185 !strcmp(new_action->name, action->name)) {
@@ -187,7 +187,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
187 break; 187 break;
188 } 188 }
189 } 189 }
190 spin_unlock_irqrestore(&desc->lock, flags); 190 raw_spin_unlock_irqrestore(&desc->lock, flags);
191 return ret; 191 return ret;
192} 192}
193 193
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e49ea1c5232d..89fb90ae534f 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
28 struct irqaction *action; 28 struct irqaction *action;
29 int ok = 0, work = 0; 29 int ok = 0, work = 0;
30 30
31 spin_lock(&desc->lock); 31 raw_spin_lock(&desc->lock);
32 /* Already running on another processor */ 32 /* Already running on another processor */
33 if (desc->status & IRQ_INPROGRESS) { 33 if (desc->status & IRQ_INPROGRESS) {
34 /* 34 /*
@@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc)
37 */ 37 */
38 if (desc->action && (desc->action->flags & IRQF_SHARED)) 38 if (desc->action && (desc->action->flags & IRQF_SHARED))
39 desc->status |= IRQ_PENDING; 39 desc->status |= IRQ_PENDING;
40 spin_unlock(&desc->lock); 40 raw_spin_unlock(&desc->lock);
41 return ok; 41 return ok;
42 } 42 }
43 /* Honour the normal IRQ locking */ 43 /* Honour the normal IRQ locking */
44 desc->status |= IRQ_INPROGRESS; 44 desc->status |= IRQ_INPROGRESS;
45 action = desc->action; 45 action = desc->action;
46 spin_unlock(&desc->lock); 46 raw_spin_unlock(&desc->lock);
47 47
48 while (action) { 48 while (action) {
49 /* Only shared IRQ handlers are safe to call */ 49 /* Only shared IRQ handlers are safe to call */
@@ -56,7 +56,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
56 } 56 }
57 local_irq_disable(); 57 local_irq_disable();
58 /* Now clean up the flags */ 58 /* Now clean up the flags */
59 spin_lock(&desc->lock); 59 raw_spin_lock(&desc->lock);
60 action = desc->action; 60 action = desc->action;
61 61
62 /* 62 /*
@@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc)
68 * Perform real IRQ processing for the IRQ we deferred 68 * Perform real IRQ processing for the IRQ we deferred
69 */ 69 */
70 work = 1; 70 work = 1;
71 spin_unlock(&desc->lock); 71 raw_spin_unlock(&desc->lock);
72 handle_IRQ_event(irq, action); 72 handle_IRQ_event(irq, action);
73 spin_lock(&desc->lock); 73 raw_spin_lock(&desc->lock);
74 desc->status &= ~IRQ_PENDING; 74 desc->status &= ~IRQ_PENDING;
75 } 75 }
76 desc->status &= ~IRQ_INPROGRESS; 76 desc->status &= ~IRQ_INPROGRESS;
@@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
80 */ 80 */
81 if (work && desc->chip && desc->chip->end) 81 if (work && desc->chip && desc->chip->end)
82 desc->chip->end(irq); 82 desc->chip->end(irq);
83 spin_unlock(&desc->lock); 83 raw_spin_unlock(&desc->lock);
84 84
85 return ok; 85 return ok;
86} 86}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..a9a93d9ee7a7 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
21#include <linux/hardirq.h> 21#include <linux/hardirq.h>
22#include <linux/elf.h> 22#include <linux/elf.h>
23#include <linux/elfcore.h> 23#include <linux/elfcore.h>
24#include <linux/utsrelease.h> 24#include <generated/utsrelease.h>
25#include <linux/utsname.h> 25#include <linux/utsname.h>
26#include <linux/numa.h> 26#include <linux/numa.h>
27#include <linux/suspend.h> 27#include <linux/suspend.h>
@@ -31,6 +31,7 @@
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/console.h> 32#include <linux/console.h>
33#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
34#include <linux/swap.h>
34 35
35#include <asm/page.h> 36#include <asm/page.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
@@ -1082,6 +1083,64 @@ void crash_kexec(struct pt_regs *regs)
1082 } 1083 }
1083} 1084}
1084 1085
1086size_t crash_get_memory_size(void)
1087{
1088 size_t size;
1089 mutex_lock(&kexec_mutex);
1090 size = crashk_res.end - crashk_res.start + 1;
1091 mutex_unlock(&kexec_mutex);
1092 return size;
1093}
1094
1095static void free_reserved_phys_range(unsigned long begin, unsigned long end)
1096{
1097 unsigned long addr;
1098
1099 for (addr = begin; addr < end; addr += PAGE_SIZE) {
1100 ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
1101 init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
1102 free_page((unsigned long)__va(addr));
1103 totalram_pages++;
1104 }
1105}
1106
1107int crash_shrink_memory(unsigned long new_size)
1108{
1109 int ret = 0;
1110 unsigned long start, end;
1111
1112 mutex_lock(&kexec_mutex);
1113
1114 if (kexec_crash_image) {
1115 ret = -ENOENT;
1116 goto unlock;
1117 }
1118 start = crashk_res.start;
1119 end = crashk_res.end;
1120
1121 if (new_size >= end - start + 1) {
1122 ret = -EINVAL;
1123 if (new_size == end - start + 1)
1124 ret = 0;
1125 goto unlock;
1126 }
1127
1128 start = roundup(start, PAGE_SIZE);
1129 end = roundup(start + new_size, PAGE_SIZE);
1130
1131 free_reserved_phys_range(end, crashk_res.end);
1132
1133 if (start == end) {
1134 crashk_res.end = end;
1135 release_resource(&crashk_res);
1136 } else
1137 crashk_res.end = end - 1;
1138
1139unlock:
1140 mutex_unlock(&kexec_mutex);
1141 return ret;
1142}
1143
1085static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 1144static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1086 size_t data_len) 1145 size_t data_len)
1087{ 1146{
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 3765ff3c1bbe..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * A simple kernel FIFO implementation. 2 * A generic kernel FIFO implementation.
3 * 3 *
4 * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
4 * Copyright (C) 2004 Stelian Pop <stelian@popies.net> 5 * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
@@ -25,50 +26,48 @@
25#include <linux/err.h> 26#include <linux/err.h>
26#include <linux/kfifo.h> 27#include <linux/kfifo.h>
27#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/uaccess.h>
30
31static void _kfifo_init(struct kfifo *fifo, void *buffer,
32 unsigned int size)
33{
34 fifo->buffer = buffer;
35 fifo->size = size;
36
37 kfifo_reset(fifo);
38}
28 39
29/** 40/**
30 * kfifo_init - allocates a new FIFO using a preallocated buffer 41 * kfifo_init - initialize a FIFO using a preallocated buffer
42 * @fifo: the fifo to assign the buffer
31 * @buffer: the preallocated buffer to be used. 43 * @buffer: the preallocated buffer to be used.
32 * @size: the size of the internal buffer, this have to be a power of 2. 44 * @size: the size of the internal buffer, this has to be a power of 2.
33 * @gfp_mask: get_free_pages mask, passed to kmalloc()
34 * @lock: the lock to be used to protect the fifo buffer
35 * 45 *
36 * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
37 * &struct kfifo with kfree().
38 */ 46 */
39struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, 47void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
40 gfp_t gfp_mask, spinlock_t *lock)
41{ 48{
42 struct kfifo *fifo;
43
44 /* size must be a power of 2 */ 49 /* size must be a power of 2 */
45 BUG_ON(!is_power_of_2(size)); 50 BUG_ON(!is_power_of_2(size));
46 51
47 fifo = kmalloc(sizeof(struct kfifo), gfp_mask); 52 _kfifo_init(fifo, buffer, size);
48 if (!fifo)
49 return ERR_PTR(-ENOMEM);
50
51 fifo->buffer = buffer;
52 fifo->size = size;
53 fifo->in = fifo->out = 0;
54 fifo->lock = lock;
55
56 return fifo;
57} 53}
58EXPORT_SYMBOL(kfifo_init); 54EXPORT_SYMBOL(kfifo_init);
59 55
60/** 56/**
61 * kfifo_alloc - allocates a new FIFO and its internal buffer 57 * kfifo_alloc - allocates a new FIFO internal buffer
62 * @size: the size of the internal buffer to be allocated. 58 * @fifo: the fifo to assign then new buffer
59 * @size: the size of the buffer to be allocated, this have to be a power of 2.
63 * @gfp_mask: get_free_pages mask, passed to kmalloc() 60 * @gfp_mask: get_free_pages mask, passed to kmalloc()
64 * @lock: the lock to be used to protect the fifo buffer 61 *
62 * This function dynamically allocates a new fifo internal buffer
65 * 63 *
66 * The size will be rounded-up to a power of 2. 64 * The size will be rounded-up to a power of 2.
65 * The buffer will be release with kfifo_free().
66 * Return 0 if no error, otherwise the an error code
67 */ 67 */
68struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) 68int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
69{ 69{
70 unsigned char *buffer; 70 unsigned char *buffer;
71 struct kfifo *ret;
72 71
73 /* 72 /*
74 * round up to the next power of 2, since our 'let the indices 73 * round up to the next power of 2, since our 'let the indices
@@ -80,48 +79,92 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
80 } 79 }
81 80
82 buffer = kmalloc(size, gfp_mask); 81 buffer = kmalloc(size, gfp_mask);
83 if (!buffer) 82 if (!buffer) {
84 return ERR_PTR(-ENOMEM); 83 _kfifo_init(fifo, 0, 0);
85 84 return -ENOMEM;
86 ret = kfifo_init(buffer, size, gfp_mask, lock); 85 }
87 86
88 if (IS_ERR(ret)) 87 _kfifo_init(fifo, buffer, size);
89 kfree(buffer);
90 88
91 return ret; 89 return 0;
92} 90}
93EXPORT_SYMBOL(kfifo_alloc); 91EXPORT_SYMBOL(kfifo_alloc);
94 92
95/** 93/**
96 * kfifo_free - frees the FIFO 94 * kfifo_free - frees the FIFO internal buffer
97 * @fifo: the fifo to be freed. 95 * @fifo: the fifo to be freed.
98 */ 96 */
99void kfifo_free(struct kfifo *fifo) 97void kfifo_free(struct kfifo *fifo)
100{ 98{
101 kfree(fifo->buffer); 99 kfree(fifo->buffer);
102 kfree(fifo);
103} 100}
104EXPORT_SYMBOL(kfifo_free); 101EXPORT_SYMBOL(kfifo_free);
105 102
106/** 103/**
107 * __kfifo_put - puts some data into the FIFO, no locking version 104 * kfifo_skip - skip output data
108 * @fifo: the fifo to be used. 105 * @fifo: the fifo to be used.
109 * @buffer: the data to be added. 106 * @len: number of bytes to skip
110 * @len: the length of the data to be added.
111 *
112 * This function copies at most @len bytes from the @buffer into
113 * the FIFO depending on the free space, and returns the number of
114 * bytes copied.
115 *
116 * Note that with only one concurrent reader and one concurrent
117 * writer, you don't need extra locking to use these functions.
118 */ 107 */
119unsigned int __kfifo_put(struct kfifo *fifo, 108void kfifo_skip(struct kfifo *fifo, unsigned int len)
120 const unsigned char *buffer, unsigned int len) 109{
110 if (len < kfifo_len(fifo)) {
111 __kfifo_add_out(fifo, len);
112 return;
113 }
114 kfifo_reset_out(fifo);
115}
116EXPORT_SYMBOL(kfifo_skip);
117
118static inline void __kfifo_in_data(struct kfifo *fifo,
119 const void *from, unsigned int len, unsigned int off)
121{ 120{
122 unsigned int l; 121 unsigned int l;
123 122
124 len = min(len, fifo->size - fifo->in + fifo->out); 123 /*
124 * Ensure that we sample the fifo->out index -before- we
125 * start putting bytes into the kfifo.
126 */
127
128 smp_mb();
129
130 off = __kfifo_off(fifo, fifo->in + off);
131
132 /* first put the data starting from fifo->in to buffer end */
133 l = min(len, fifo->size - off);
134 memcpy(fifo->buffer + off, from, l);
135
136 /* then put the rest (if any) at the beginning of the buffer */
137 memcpy(fifo->buffer, from + l, len - l);
138}
139
140static inline void __kfifo_out_data(struct kfifo *fifo,
141 void *to, unsigned int len, unsigned int off)
142{
143 unsigned int l;
144
145 /*
146 * Ensure that we sample the fifo->in index -before- we
147 * start removing bytes from the kfifo.
148 */
149
150 smp_rmb();
151
152 off = __kfifo_off(fifo, fifo->out + off);
153
154 /* first get the data from fifo->out until the end of the buffer */
155 l = min(len, fifo->size - off);
156 memcpy(to, fifo->buffer + off, l);
157
158 /* then get the rest (if any) from the beginning of the buffer */
159 memcpy(to + l, fifo->buffer, len - l);
160}
161
162static inline int __kfifo_from_user_data(struct kfifo *fifo,
163 const void __user *from, unsigned int len, unsigned int off,
164 unsigned *lenout)
165{
166 unsigned int l;
167 int ret;
125 168
126 /* 169 /*
127 * Ensure that we sample the fifo->out index -before- we 170 * Ensure that we sample the fifo->out index -before- we
@@ -130,68 +173,271 @@ unsigned int __kfifo_put(struct kfifo *fifo,
130 173
131 smp_mb(); 174 smp_mb();
132 175
176 off = __kfifo_off(fifo, fifo->in + off);
177
133 /* first put the data starting from fifo->in to buffer end */ 178 /* first put the data starting from fifo->in to buffer end */
134 l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); 179 l = min(len, fifo->size - off);
135 memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); 180 ret = copy_from_user(fifo->buffer + off, from, l);
181 if (unlikely(ret)) {
182 *lenout = ret;
183 return -EFAULT;
184 }
185 *lenout = l;
136 186
137 /* then put the rest (if any) at the beginning of the buffer */ 187 /* then put the rest (if any) at the beginning of the buffer */
138 memcpy(fifo->buffer, buffer + l, len - l); 188 ret = copy_from_user(fifo->buffer, from + l, len - l);
189 *lenout += ret ? ret : len - l;
190 return ret ? -EFAULT : 0;
191}
192
193static inline int __kfifo_to_user_data(struct kfifo *fifo,
194 void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
195{
196 unsigned int l;
197 int ret;
139 198
140 /* 199 /*
141 * Ensure that we add the bytes to the kfifo -before- 200 * Ensure that we sample the fifo->in index -before- we
142 * we update the fifo->in index. 201 * start removing bytes from the kfifo.
143 */ 202 */
144 203
145 smp_wmb(); 204 smp_rmb();
205
206 off = __kfifo_off(fifo, fifo->out + off);
207
208 /* first get the data from fifo->out until the end of the buffer */
209 l = min(len, fifo->size - off);
210 ret = copy_to_user(to, fifo->buffer + off, l);
211 *lenout = l;
212 if (unlikely(ret)) {
213 *lenout -= ret;
214 return -EFAULT;
215 }
216
217 /* then get the rest (if any) from the beginning of the buffer */
218 len -= l;
219 ret = copy_to_user(to + l, fifo->buffer, len);
220 if (unlikely(ret)) {
221 *lenout += len - ret;
222 return -EFAULT;
223 }
224 *lenout += len;
225 return 0;
226}
227
228unsigned int __kfifo_in_n(struct kfifo *fifo,
229 const void *from, unsigned int len, unsigned int recsize)
230{
231 if (kfifo_avail(fifo) < len + recsize)
232 return len + 1;
233
234 __kfifo_in_data(fifo, from, len, recsize);
235 return 0;
236}
237EXPORT_SYMBOL(__kfifo_in_n);
146 238
147 fifo->in += len; 239/**
240 * kfifo_in - puts some data into the FIFO
241 * @fifo: the fifo to be used.
242 * @from: the data to be added.
243 * @len: the length of the data to be added.
244 *
245 * This function copies at most @len bytes from the @from buffer into
246 * the FIFO depending on the free space, and returns the number of
247 * bytes copied.
248 *
249 * Note that with only one concurrent reader and one concurrent
250 * writer, you don't need extra locking to use these functions.
251 */
252unsigned int kfifo_in(struct kfifo *fifo, const void *from,
253 unsigned int len)
254{
255 len = min(kfifo_avail(fifo), len);
148 256
257 __kfifo_in_data(fifo, from, len, 0);
258 __kfifo_add_in(fifo, len);
149 return len; 259 return len;
150} 260}
151EXPORT_SYMBOL(__kfifo_put); 261EXPORT_SYMBOL(kfifo_in);
262
263unsigned int __kfifo_in_generic(struct kfifo *fifo,
264 const void *from, unsigned int len, unsigned int recsize)
265{
266 return __kfifo_in_rec(fifo, from, len, recsize);
267}
268EXPORT_SYMBOL(__kfifo_in_generic);
269
270unsigned int __kfifo_out_n(struct kfifo *fifo,
271 void *to, unsigned int len, unsigned int recsize)
272{
273 if (kfifo_len(fifo) < len + recsize)
274 return len;
275
276 __kfifo_out_data(fifo, to, len, recsize);
277 __kfifo_add_out(fifo, len + recsize);
278 return 0;
279}
280EXPORT_SYMBOL(__kfifo_out_n);
152 281
153/** 282/**
154 * __kfifo_get - gets some data from the FIFO, no locking version 283 * kfifo_out - gets some data from the FIFO
155 * @fifo: the fifo to be used. 284 * @fifo: the fifo to be used.
156 * @buffer: where the data must be copied. 285 * @to: where the data must be copied.
157 * @len: the size of the destination buffer. 286 * @len: the size of the destination buffer.
158 * 287 *
159 * This function copies at most @len bytes from the FIFO into the 288 * This function copies at most @len bytes from the FIFO into the
160 * @buffer and returns the number of copied bytes. 289 * @to buffer and returns the number of copied bytes.
161 * 290 *
162 * Note that with only one concurrent reader and one concurrent 291 * Note that with only one concurrent reader and one concurrent
163 * writer, you don't need extra locking to use these functions. 292 * writer, you don't need extra locking to use these functions.
164 */ 293 */
165unsigned int __kfifo_get(struct kfifo *fifo, 294unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
166 unsigned char *buffer, unsigned int len)
167{ 295{
168 unsigned int l; 296 len = min(kfifo_len(fifo), len);
169 297
170 len = min(len, fifo->in - fifo->out); 298 __kfifo_out_data(fifo, to, len, 0);
299 __kfifo_add_out(fifo, len);
171 300
172 /* 301 return len;
173 * Ensure that we sample the fifo->in index -before- we 302}
174 * start removing bytes from the kfifo. 303EXPORT_SYMBOL(kfifo_out);
175 */
176 304
177 smp_rmb(); 305/**
306 * kfifo_out_peek - copy some data from the FIFO, but do not remove it
307 * @fifo: the fifo to be used.
308 * @to: where the data must be copied.
309 * @len: the size of the destination buffer.
310 * @offset: offset into the fifo
311 *
312 * This function copies at most @len bytes at @offset from the FIFO
313 * into the @to buffer and returns the number of copied bytes.
314 * The data is not removed from the FIFO.
315 */
316unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
317 unsigned offset)
318{
319 len = min(kfifo_len(fifo), len + offset);
178 320
179 /* first get the data from fifo->out until the end of the buffer */ 321 __kfifo_out_data(fifo, to, len, offset);
180 l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); 322 return len;
181 memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); 323}
324EXPORT_SYMBOL(kfifo_out_peek);
182 325
183 /* then get the rest (if any) from the beginning of the buffer */ 326unsigned int __kfifo_out_generic(struct kfifo *fifo,
184 memcpy(buffer + l, fifo->buffer, len - l); 327 void *to, unsigned int len, unsigned int recsize,
328 unsigned int *total)
329{
330 return __kfifo_out_rec(fifo, to, len, recsize, total);
331}
332EXPORT_SYMBOL(__kfifo_out_generic);
185 333
186 /* 334unsigned int __kfifo_from_user_n(struct kfifo *fifo,
187 * Ensure that we remove the bytes from the kfifo -before- 335 const void __user *from, unsigned int len, unsigned int recsize)
188 * we update the fifo->out index. 336{
189 */ 337 unsigned total;
190 338
191 smp_mb(); 339 if (kfifo_avail(fifo) < len + recsize)
340 return len + 1;
192 341
193 fifo->out += len; 342 __kfifo_from_user_data(fifo, from, len, recsize, &total);
343 return total;
344}
345EXPORT_SYMBOL(__kfifo_from_user_n);
194 346
195 return len; 347/**
348 * kfifo_from_user - puts some data from user space into the FIFO
349 * @fifo: the fifo to be used.
350 * @from: pointer to the data to be added.
351 * @len: the length of the data to be added.
352 *
353 * This function copies at most @len bytes from the @from into the
354 * FIFO depending and returns -EFAULT/0.
355 *
356 * Note that with only one concurrent reader and one concurrent
357 * writer, you don't need extra locking to use these functions.
358 */
359int kfifo_from_user(struct kfifo *fifo,
360 const void __user *from, unsigned int len, unsigned *total)
361{
362 int ret;
363 len = min(kfifo_avail(fifo), len);
364 ret = __kfifo_from_user_data(fifo, from, len, 0, total);
365 if (ret)
366 return ret;
367 __kfifo_add_in(fifo, len);
368 return 0;
196} 369}
197EXPORT_SYMBOL(__kfifo_get); 370EXPORT_SYMBOL(kfifo_from_user);
371
372unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
373 const void __user *from, unsigned int len, unsigned int recsize)
374{
375 return __kfifo_from_user_rec(fifo, from, len, recsize);
376}
377EXPORT_SYMBOL(__kfifo_from_user_generic);
378
379unsigned int __kfifo_to_user_n(struct kfifo *fifo,
380 void __user *to, unsigned int len, unsigned int reclen,
381 unsigned int recsize)
382{
383 unsigned int ret, total;
384
385 if (kfifo_len(fifo) < reclen + recsize)
386 return len;
387
388 ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
389
390 if (likely(ret == 0))
391 __kfifo_add_out(fifo, reclen + recsize);
392
393 return total;
394}
395EXPORT_SYMBOL(__kfifo_to_user_n);
396
397/**
398 * kfifo_to_user - gets data from the FIFO and write it to user space
399 * @fifo: the fifo to be used.
400 * @to: where the data must be copied.
401 * @len: the size of the destination buffer.
402 @ @lenout: pointer to output variable with copied data
403 *
404 * This function copies at most @len bytes from the FIFO into the
405 * @to buffer and 0 or -EFAULT.
406 *
407 * Note that with only one concurrent reader and one concurrent
408 * writer, you don't need extra locking to use these functions.
409 */
410int kfifo_to_user(struct kfifo *fifo,
411 void __user *to, unsigned int len, unsigned *lenout)
412{
413 int ret;
414 len = min(kfifo_len(fifo), len);
415 ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
416 __kfifo_add_out(fifo, *lenout);
417 return ret;
418}
419EXPORT_SYMBOL(kfifo_to_user);
420
421unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
422 void __user *to, unsigned int len, unsigned int recsize,
423 unsigned int *total)
424{
425 return __kfifo_to_user_rec(fifo, to, len, recsize, total);
426}
427EXPORT_SYMBOL(__kfifo_to_user_generic);
428
429unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize)
430{
431 if (recsize == 0)
432 return kfifo_avail(fifo);
433
434 return __kfifo_peek_n(fifo, recsize);
435}
436EXPORT_SYMBOL(__kfifo_peek_generic);
437
438void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize)
439{
440 __kfifo_skip_rec(fifo, recsize);
441}
442EXPORT_SYMBOL(__kfifo_skip_generic);
443
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 25b103190364..bf0e231d9702 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -520,13 +520,15 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
520 return -ENOMEM; 520 return -ENOMEM;
521 521
522 ret = call_usermodehelper_stdinpipe(sub_info, filp); 522 ret = call_usermodehelper_stdinpipe(sub_info, filp);
523 if (ret < 0) 523 if (ret < 0) {
524 goto out; 524 call_usermodehelper_freeinfo(sub_info);
525 return ret;
526 }
525 527
526 return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); 528 ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
529 if (ret < 0) /* Failed to execute helper, close pipe */
530 filp_close(*filp, NULL);
527 531
528 out:
529 call_usermodehelper_freeinfo(sub_info);
530 return ret; 532 return ret;
531} 533}
532EXPORT_SYMBOL(call_usermodehelper_pipe); 534EXPORT_SYMBOL(call_usermodehelper_pipe);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e5342a344c43..b7df302a0204 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1035,7 +1035,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
1035 /* Pre-allocate memory for max kretprobe instances */ 1035 /* Pre-allocate memory for max kretprobe instances */
1036 if (rp->maxactive <= 0) { 1036 if (rp->maxactive <= 0) {
1037#ifdef CONFIG_PREEMPT 1037#ifdef CONFIG_PREEMPT
1038 rp->maxactive = max(10, 2 * num_possible_cpus()); 1038 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
1039#else 1039#else
1040 rp->maxactive = num_possible_cpus(); 1040 rp->maxactive = num_possible_cpus();
1041#endif 1041#endif
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 528dd78e7e7e..3feaf5a74514 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -100,6 +100,26 @@ static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
100} 100}
101KERNEL_ATTR_RO(kexec_crash_loaded); 101KERNEL_ATTR_RO(kexec_crash_loaded);
102 102
103static ssize_t kexec_crash_size_show(struct kobject *kobj,
104 struct kobj_attribute *attr, char *buf)
105{
106 return sprintf(buf, "%zu\n", crash_get_memory_size());
107}
108static ssize_t kexec_crash_size_store(struct kobject *kobj,
109 struct kobj_attribute *attr,
110 const char *buf, size_t count)
111{
112 unsigned long cnt;
113 int ret;
114
115 if (strict_strtoul(buf, 0, &cnt))
116 return -EINVAL;
117
118 ret = crash_shrink_memory(cnt);
119 return ret < 0 ? ret : count;
120}
121KERNEL_ATTR_RW(kexec_crash_size);
122
103static ssize_t vmcoreinfo_show(struct kobject *kobj, 123static ssize_t vmcoreinfo_show(struct kobject *kobj,
104 struct kobj_attribute *attr, char *buf) 124 struct kobj_attribute *attr, char *buf)
105{ 125{
@@ -147,6 +167,7 @@ static struct attribute * kernel_attrs[] = {
147#ifdef CONFIG_KEXEC 167#ifdef CONFIG_KEXEC
148 &kexec_loaded_attr.attr, 168 &kexec_loaded_attr.attr,
149 &kexec_crash_loaded_attr.attr, 169 &kexec_crash_loaded_attr.attr,
170 &kexec_crash_size_attr.attr,
150 &vmcoreinfo_attr.attr, 171 &vmcoreinfo_attr.attr,
151#endif 172#endif
152 NULL 173 NULL
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ab7ae57773e1..fbb6222fe7e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
150EXPORT_SYMBOL(kthread_create); 150EXPORT_SYMBOL(kthread_create);
151 151
152/** 152/**
153 * kthread_bind - bind a just-created kthread to a cpu.
154 * @p: thread created by kthread_create().
155 * @cpu: cpu (might not be online, must be possible) for @k to run on.
156 *
157 * Description: This function is equivalent to set_cpus_allowed(),
158 * except that @cpu doesn't need to be online, and the thread must be
159 * stopped (i.e., just returned from kthread_create()).
160 */
161void kthread_bind(struct task_struct *p, unsigned int cpu)
162{
163 /* Must have done schedule() in kthread() before we set_task_cpu */
164 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
165 WARN_ON(1);
166 return;
167 }
168
169 p->cpus_allowed = cpumask_of_cpu(cpu);
170 p->rt.nr_cpus_allowed = 1;
171 p->flags |= PF_THREAD_BOUND;
172}
173EXPORT_SYMBOL(kthread_bind);
174
175/**
153 * kthread_stop - stop a thread created by kthread_create(). 176 * kthread_stop - stop a thread created by kthread_create().
154 * @k: thread created by kthread_create(). 177 * @k: thread created by kthread_create().
155 * 178 *
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 429540c70d3f..5feaddcdbe49 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -73,11 +73,11 @@ module_param(lock_stat, int, 0644);
73 * to use a raw spinlock - we really dont want the spinlock 73 * to use a raw spinlock - we really dont want the spinlock
74 * code to recurse back into the lockdep code... 74 * code to recurse back into the lockdep code...
75 */ 75 */
76static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 76static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
77 77
78static int graph_lock(void) 78static int graph_lock(void)
79{ 79{
80 __raw_spin_lock(&lockdep_lock); 80 arch_spin_lock(&lockdep_lock);
81 /* 81 /*
82 * Make sure that if another CPU detected a bug while 82 * Make sure that if another CPU detected a bug while
83 * walking the graph we dont change it (while the other 83 * walking the graph we dont change it (while the other
@@ -85,7 +85,7 @@ static int graph_lock(void)
85 * dropped already) 85 * dropped already)
86 */ 86 */
87 if (!debug_locks) { 87 if (!debug_locks) {
88 __raw_spin_unlock(&lockdep_lock); 88 arch_spin_unlock(&lockdep_lock);
89 return 0; 89 return 0;
90 } 90 }
91 /* prevent any recursions within lockdep from causing deadlocks */ 91 /* prevent any recursions within lockdep from causing deadlocks */
@@ -95,11 +95,11 @@ static int graph_lock(void)
95 95
96static inline int graph_unlock(void) 96static inline int graph_unlock(void)
97{ 97{
98 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) 98 if (debug_locks && !arch_spin_is_locked(&lockdep_lock))
99 return DEBUG_LOCKS_WARN_ON(1); 99 return DEBUG_LOCKS_WARN_ON(1);
100 100
101 current->lockdep_recursion--; 101 current->lockdep_recursion--;
102 __raw_spin_unlock(&lockdep_lock); 102 arch_spin_unlock(&lockdep_lock);
103 return 0; 103 return 0;
104} 104}
105 105
@@ -111,7 +111,7 @@ static inline int debug_locks_off_graph_unlock(void)
111{ 111{
112 int ret = debug_locks_off(); 112 int ret = debug_locks_off();
113 113
114 __raw_spin_unlock(&lockdep_lock); 114 arch_spin_unlock(&lockdep_lock);
115 115
116 return ret; 116 return ret;
117} 117}
@@ -1170,9 +1170,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
1170 this.class = class; 1170 this.class = class;
1171 1171
1172 local_irq_save(flags); 1172 local_irq_save(flags);
1173 __raw_spin_lock(&lockdep_lock); 1173 arch_spin_lock(&lockdep_lock);
1174 ret = __lockdep_count_forward_deps(&this); 1174 ret = __lockdep_count_forward_deps(&this);
1175 __raw_spin_unlock(&lockdep_lock); 1175 arch_spin_unlock(&lockdep_lock);
1176 local_irq_restore(flags); 1176 local_irq_restore(flags);
1177 1177
1178 return ret; 1178 return ret;
@@ -1197,9 +1197,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
1197 this.class = class; 1197 this.class = class;
1198 1198
1199 local_irq_save(flags); 1199 local_irq_save(flags);
1200 __raw_spin_lock(&lockdep_lock); 1200 arch_spin_lock(&lockdep_lock);
1201 ret = __lockdep_count_backward_deps(&this); 1201 ret = __lockdep_count_backward_deps(&this);
1202 __raw_spin_unlock(&lockdep_lock); 1202 arch_spin_unlock(&lockdep_lock);
1203 local_irq_restore(flags); 1203 local_irq_restore(flags);
1204 1204
1205 return ret; 1205 return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 12afc5a3ddd3..f82386bd9ee9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -880,11 +880,23 @@ static int try_to_force_load(struct module *mod, const char *reason)
880} 880}
881 881
882#ifdef CONFIG_MODVERSIONS 882#ifdef CONFIG_MODVERSIONS
883/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */
884static unsigned long maybe_relocated(unsigned long crc,
885 const struct module *crc_owner)
886{
887#ifdef ARCH_RELOCATES_KCRCTAB
888 if (crc_owner == NULL)
889 return crc - (unsigned long)reloc_start;
890#endif
891 return crc;
892}
893
883static int check_version(Elf_Shdr *sechdrs, 894static int check_version(Elf_Shdr *sechdrs,
884 unsigned int versindex, 895 unsigned int versindex,
885 const char *symname, 896 const char *symname,
886 struct module *mod, 897 struct module *mod,
887 const unsigned long *crc) 898 const unsigned long *crc,
899 const struct module *crc_owner)
888{ 900{
889 unsigned int i, num_versions; 901 unsigned int i, num_versions;
890 struct modversion_info *versions; 902 struct modversion_info *versions;
@@ -905,10 +917,10 @@ static int check_version(Elf_Shdr *sechdrs,
905 if (strcmp(versions[i].name, symname) != 0) 917 if (strcmp(versions[i].name, symname) != 0)
906 continue; 918 continue;
907 919
908 if (versions[i].crc == *crc) 920 if (versions[i].crc == maybe_relocated(*crc, crc_owner))
909 return 1; 921 return 1;
910 DEBUGP("Found checksum %lX vs module %lX\n", 922 DEBUGP("Found checksum %lX vs module %lX\n",
911 *crc, versions[i].crc); 923 maybe_relocated(*crc, crc_owner), versions[i].crc);
912 goto bad_version; 924 goto bad_version;
913 } 925 }
914 926
@@ -931,7 +943,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
931 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, 943 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
932 &crc, true, false)) 944 &crc, true, false))
933 BUG(); 945 BUG();
934 return check_version(sechdrs, versindex, "module_layout", mod, crc); 946 return check_version(sechdrs, versindex, "module_layout", mod, crc,
947 NULL);
935} 948}
936 949
937/* First part is kernel version, which we ignore if module has crcs. */ 950/* First part is kernel version, which we ignore if module has crcs. */
@@ -949,7 +962,8 @@ static inline int check_version(Elf_Shdr *sechdrs,
949 unsigned int versindex, 962 unsigned int versindex,
950 const char *symname, 963 const char *symname,
951 struct module *mod, 964 struct module *mod,
952 const unsigned long *crc) 965 const unsigned long *crc,
966 const struct module *crc_owner)
953{ 967{
954 return 1; 968 return 1;
955} 969}
@@ -984,8 +998,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
984 /* use_module can fail due to OOM, 998 /* use_module can fail due to OOM,
985 or module initialization or unloading */ 999 or module initialization or unloading */
986 if (sym) { 1000 if (sym) {
987 if (!check_version(sechdrs, versindex, name, mod, crc) || 1001 if (!check_version(sechdrs, versindex, name, mod, crc, owner)
988 !use_module(mod, owner)) 1002 || !use_module(mod, owner))
989 sym = NULL; 1003 sym = NULL;
990 } 1004 }
991 return sym; 1005 return sym;
@@ -996,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
996 * J. Corbet <corbet@lwn.net> 1010 * J. Corbet <corbet@lwn.net>
997 */ 1011 */
998#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) 1012#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
1013
1014static inline bool sect_empty(const Elf_Shdr *sect)
1015{
1016 return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
1017}
1018
999struct module_sect_attr 1019struct module_sect_attr
1000{ 1020{
1001 struct module_attribute mattr; 1021 struct module_attribute mattr;
@@ -1037,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1037 1057
1038 /* Count loaded sections and allocate structures */ 1058 /* Count loaded sections and allocate structures */
1039 for (i = 0; i < nsect; i++) 1059 for (i = 0; i < nsect; i++)
1040 if (sechdrs[i].sh_flags & SHF_ALLOC 1060 if (!sect_empty(&sechdrs[i]))
1041 && sechdrs[i].sh_size)
1042 nloaded++; 1061 nloaded++;
1043 size[0] = ALIGN(sizeof(*sect_attrs) 1062 size[0] = ALIGN(sizeof(*sect_attrs)
1044 + nloaded * sizeof(sect_attrs->attrs[0]), 1063 + nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1056,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1056 sattr = &sect_attrs->attrs[0]; 1075 sattr = &sect_attrs->attrs[0];
1057 gattr = &sect_attrs->grp.attrs[0]; 1076 gattr = &sect_attrs->grp.attrs[0];
1058 for (i = 0; i < nsect; i++) { 1077 for (i = 0; i < nsect; i++) {
1059 if (! (sechdrs[i].sh_flags & SHF_ALLOC)) 1078 if (sect_empty(&sechdrs[i]))
1060 continue;
1061 if (!sechdrs[i].sh_size)
1062 continue; 1079 continue;
1063 sattr->address = sechdrs[i].sh_addr; 1080 sattr->address = sechdrs[i].sh_addr;
1064 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, 1081 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
@@ -1142,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
1142 /* Count notes sections and allocate structures. */ 1159 /* Count notes sections and allocate structures. */
1143 notes = 0; 1160 notes = 0;
1144 for (i = 0; i < nsect; i++) 1161 for (i = 0; i < nsect; i++)
1145 if ((sechdrs[i].sh_flags & SHF_ALLOC) && 1162 if (!sect_empty(&sechdrs[i]) &&
1146 (sechdrs[i].sh_type == SHT_NOTE)) 1163 (sechdrs[i].sh_type == SHT_NOTE))
1147 ++notes; 1164 ++notes;
1148 1165
@@ -1158,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
1158 notes_attrs->notes = notes; 1175 notes_attrs->notes = notes;
1159 nattr = &notes_attrs->attrs[0]; 1176 nattr = &notes_attrs->attrs[0];
1160 for (loaded = i = 0; i < nsect; ++i) { 1177 for (loaded = i = 0; i < nsect; ++i) {
1161 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 1178 if (sect_empty(&sechdrs[i]))
1162 continue; 1179 continue;
1163 if (sechdrs[i].sh_type == SHT_NOTE) { 1180 if (sechdrs[i].sh_type == SHT_NOTE) {
1164 nattr->attr.name = mod->sect_attrs->attrs[loaded].name; 1181 nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
@@ -1896,9 +1913,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
1896 unsigned int i; 1913 unsigned int i;
1897 1914
1898 /* only scan the sections containing data */ 1915 /* only scan the sections containing data */
1899 kmemleak_scan_area(mod->module_core, (unsigned long)mod - 1916 kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
1900 (unsigned long)mod->module_core,
1901 sizeof(struct module), GFP_KERNEL);
1902 1917
1903 for (i = 1; i < hdr->e_shnum; i++) { 1918 for (i = 1; i < hdr->e_shnum; i++) {
1904 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 1919 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -1907,8 +1922,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
1907 && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) 1922 && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
1908 continue; 1923 continue;
1909 1924
1910 kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr - 1925 kmemleak_scan_area((void *)sechdrs[i].sh_addr,
1911 (unsigned long)mod->module_core,
1912 sechdrs[i].sh_size, GFP_KERNEL); 1926 sechdrs[i].sh_size, GFP_KERNEL);
1913 } 1927 }
1914} 1928}
@@ -2236,6 +2250,12 @@ static noinline struct module *load_module(void __user *umod,
2236 "_ftrace_events", 2250 "_ftrace_events",
2237 sizeof(*mod->trace_events), 2251 sizeof(*mod->trace_events),
2238 &mod->num_trace_events); 2252 &mod->num_trace_events);
2253 /*
2254 * This section contains pointers to allocated objects in the trace
2255 * code and not scanning it leads to false positives.
2256 */
2257 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2258 mod->num_trace_events, GFP_KERNEL);
2239#endif 2259#endif
2240#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2260#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2241 /* sechdrs[0].sh_size is always zero */ 2261 /* sechdrs[0].sh_size is always zero */
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 6b2d735846a5..57d527a16f9d 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock)
43 \ 43 \
44 DEBUG_LOCKS_WARN_ON(in_interrupt()); \ 44 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
45 local_irq_save(flags); \ 45 local_irq_save(flags); \
46 __raw_spin_lock(&(lock)->raw_lock); \ 46 arch_spin_lock(&(lock)->rlock.raw_lock);\
47 DEBUG_LOCKS_WARN_ON(l->magic != l); \ 47 DEBUG_LOCKS_WARN_ON(l->magic != l); \
48 } while (0) 48 } while (0)
49 49
50#define spin_unlock_mutex(lock, flags) \ 50#define spin_unlock_mutex(lock, flags) \
51 do { \ 51 do { \
52 __raw_spin_unlock(&(lock)->raw_lock); \ 52 arch_spin_unlock(&(lock)->rlock.raw_lock); \
53 local_irq_restore(flags); \ 53 local_irq_restore(flags); \
54 preempt_check_resched(); \ 54 preempt_check_resched(); \
55 } while (0) 55 } while (0)
diff --git a/kernel/panic.c b/kernel/panic.c
index 96b45d0b4ba5..5827f7b97254 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,6 +10,7 @@
10 */ 10 */
11#include <linux/debug_locks.h> 11#include <linux/debug_locks.h>
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13#include <linux/kmsg_dump.h>
13#include <linux/kallsyms.h> 14#include <linux/kallsyms.h>
14#include <linux/notifier.h> 15#include <linux/notifier.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -74,6 +75,7 @@ NORET_TYPE void panic(const char * fmt, ...)
74 dump_stack(); 75 dump_stack();
75#endif 76#endif
76 77
78 kmsg_dump(KMSG_DUMP_PANIC);
77 /* 79 /*
78 * If we have crashed and we have a crash kernel loaded let it handle 80 * If we have crashed and we have a crash kernel loaded let it handle
79 * everything else. 81 * everything else.
@@ -339,6 +341,7 @@ void oops_exit(void)
339{ 341{
340 do_oops_enter_exit(); 342 do_oops_enter_exit();
341 print_oops_end_marker(); 343 print_oops_end_marker();
344 kmsg_dump(KMSG_DUMP_OOPS);
342} 345}
343 346
344#ifdef WANT_WARN_ON_SLOWPATH 347#ifdef WANT_WARN_ON_SLOWPATH
diff --git a/kernel/params.c b/kernel/params.c
index d656c276508d..cf1b69183127 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,6 +24,7 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/ctype.h> 26#include <linux/ctype.h>
27#include <linux/string.h>
27 28
28#if 0 29#if 0
29#define DEBUGP printk 30#define DEBUGP printk
@@ -122,9 +123,7 @@ static char *next_arg(char *args, char **param, char **val)
122 next = args + i; 123 next = args + i;
123 124
124 /* Chew up trailing spaces. */ 125 /* Chew up trailing spaces. */
125 while (isspace(*next)) 126 return skip_spaces(next);
126 next++;
127 return next;
128} 127}
129 128
130/* Args looks like "foo=bar,bar2 baz=fuz wiz". */ 129/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
@@ -139,8 +138,7 @@ int parse_args(const char *name,
139 DEBUGP("Parsing ARGS: %s\n", args); 138 DEBUGP("Parsing ARGS: %s\n", args);
140 139
141 /* Chew leading spaces */ 140 /* Chew leading spaces */
142 while (isspace(*args)) 141 args = skip_spaces(args);
143 args++;
144 142
145 while (*args) { 143 while (*args) {
146 int ret; 144 int ret;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e73e53c7582f..d27746bd3a06 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -203,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
203 * if so. If we locked the right context, then it 203 * if so. If we locked the right context, then it
204 * can't get swapped on us any more. 204 * can't get swapped on us any more.
205 */ 205 */
206 spin_lock_irqsave(&ctx->lock, *flags); 206 raw_spin_lock_irqsave(&ctx->lock, *flags);
207 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 207 if (ctx != rcu_dereference(task->perf_event_ctxp)) {
208 spin_unlock_irqrestore(&ctx->lock, *flags); 208 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
209 goto retry; 209 goto retry;
210 } 210 }
211 211
212 if (!atomic_inc_not_zero(&ctx->refcount)) { 212 if (!atomic_inc_not_zero(&ctx->refcount)) {
213 spin_unlock_irqrestore(&ctx->lock, *flags); 213 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
214 ctx = NULL; 214 ctx = NULL;
215 } 215 }
216 } 216 }
@@ -231,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
231 ctx = perf_lock_task_context(task, &flags); 231 ctx = perf_lock_task_context(task, &flags);
232 if (ctx) { 232 if (ctx) {
233 ++ctx->pin_count; 233 ++ctx->pin_count;
234 spin_unlock_irqrestore(&ctx->lock, flags); 234 raw_spin_unlock_irqrestore(&ctx->lock, flags);
235 } 235 }
236 return ctx; 236 return ctx;
237} 237}
@@ -240,9 +240,9 @@ static void perf_unpin_context(struct perf_event_context *ctx)
240{ 240{
241 unsigned long flags; 241 unsigned long flags;
242 242
243 spin_lock_irqsave(&ctx->lock, flags); 243 raw_spin_lock_irqsave(&ctx->lock, flags);
244 --ctx->pin_count; 244 --ctx->pin_count;
245 spin_unlock_irqrestore(&ctx->lock, flags); 245 raw_spin_unlock_irqrestore(&ctx->lock, flags);
246 put_ctx(ctx); 246 put_ctx(ctx);
247} 247}
248 248
@@ -427,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
427 if (ctx->task && cpuctx->task_ctx != ctx) 427 if (ctx->task && cpuctx->task_ctx != ctx)
428 return; 428 return;
429 429
430 spin_lock(&ctx->lock); 430 raw_spin_lock(&ctx->lock);
431 /* 431 /*
432 * Protect the list operation against NMI by disabling the 432 * Protect the list operation against NMI by disabling the
433 * events on a global level. 433 * events on a global level.
@@ -449,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
449 } 449 }
450 450
451 perf_enable(); 451 perf_enable();
452 spin_unlock(&ctx->lock); 452 raw_spin_unlock(&ctx->lock);
453} 453}
454 454
455 455
@@ -488,12 +488,12 @@ retry:
488 task_oncpu_function_call(task, __perf_event_remove_from_context, 488 task_oncpu_function_call(task, __perf_event_remove_from_context,
489 event); 489 event);
490 490
491 spin_lock_irq(&ctx->lock); 491 raw_spin_lock_irq(&ctx->lock);
492 /* 492 /*
493 * If the context is active we need to retry the smp call. 493 * If the context is active we need to retry the smp call.
494 */ 494 */
495 if (ctx->nr_active && !list_empty(&event->group_entry)) { 495 if (ctx->nr_active && !list_empty(&event->group_entry)) {
496 spin_unlock_irq(&ctx->lock); 496 raw_spin_unlock_irq(&ctx->lock);
497 goto retry; 497 goto retry;
498 } 498 }
499 499
@@ -504,7 +504,7 @@ retry:
504 */ 504 */
505 if (!list_empty(&event->group_entry)) 505 if (!list_empty(&event->group_entry))
506 list_del_event(event, ctx); 506 list_del_event(event, ctx);
507 spin_unlock_irq(&ctx->lock); 507 raw_spin_unlock_irq(&ctx->lock);
508} 508}
509 509
510/* 510/*
@@ -535,7 +535,7 @@ static void __perf_event_disable(void *info)
535 if (ctx->task && cpuctx->task_ctx != ctx) 535 if (ctx->task && cpuctx->task_ctx != ctx)
536 return; 536 return;
537 537
538 spin_lock(&ctx->lock); 538 raw_spin_lock(&ctx->lock);
539 539
540 /* 540 /*
541 * If the event is on, turn it off. 541 * If the event is on, turn it off.
@@ -551,7 +551,7 @@ static void __perf_event_disable(void *info)
551 event->state = PERF_EVENT_STATE_OFF; 551 event->state = PERF_EVENT_STATE_OFF;
552 } 552 }
553 553
554 spin_unlock(&ctx->lock); 554 raw_spin_unlock(&ctx->lock);
555} 555}
556 556
557/* 557/*
@@ -584,12 +584,12 @@ void perf_event_disable(struct perf_event *event)
584 retry: 584 retry:
585 task_oncpu_function_call(task, __perf_event_disable, event); 585 task_oncpu_function_call(task, __perf_event_disable, event);
586 586
587 spin_lock_irq(&ctx->lock); 587 raw_spin_lock_irq(&ctx->lock);
588 /* 588 /*
589 * If the event is still active, we need to retry the cross-call. 589 * If the event is still active, we need to retry the cross-call.
590 */ 590 */
591 if (event->state == PERF_EVENT_STATE_ACTIVE) { 591 if (event->state == PERF_EVENT_STATE_ACTIVE) {
592 spin_unlock_irq(&ctx->lock); 592 raw_spin_unlock_irq(&ctx->lock);
593 goto retry; 593 goto retry;
594 } 594 }
595 595
@@ -602,7 +602,7 @@ void perf_event_disable(struct perf_event *event)
602 event->state = PERF_EVENT_STATE_OFF; 602 event->state = PERF_EVENT_STATE_OFF;
603 } 603 }
604 604
605 spin_unlock_irq(&ctx->lock); 605 raw_spin_unlock_irq(&ctx->lock);
606} 606}
607 607
608static int 608static int
@@ -770,7 +770,7 @@ static void __perf_install_in_context(void *info)
770 cpuctx->task_ctx = ctx; 770 cpuctx->task_ctx = ctx;
771 } 771 }
772 772
773 spin_lock(&ctx->lock); 773 raw_spin_lock(&ctx->lock);
774 ctx->is_active = 1; 774 ctx->is_active = 1;
775 update_context_time(ctx); 775 update_context_time(ctx);
776 776
@@ -782,6 +782,9 @@ static void __perf_install_in_context(void *info)
782 782
783 add_event_to_ctx(event, ctx); 783 add_event_to_ctx(event, ctx);
784 784
785 if (event->cpu != -1 && event->cpu != smp_processor_id())
786 goto unlock;
787
785 /* 788 /*
786 * Don't put the event on if it is disabled or if 789 * Don't put the event on if it is disabled or if
787 * it is in a group and the group isn't on. 790 * it is in a group and the group isn't on.
@@ -820,7 +823,7 @@ static void __perf_install_in_context(void *info)
820 unlock: 823 unlock:
821 perf_enable(); 824 perf_enable();
822 825
823 spin_unlock(&ctx->lock); 826 raw_spin_unlock(&ctx->lock);
824} 827}
825 828
826/* 829/*
@@ -856,12 +859,12 @@ retry:
856 task_oncpu_function_call(task, __perf_install_in_context, 859 task_oncpu_function_call(task, __perf_install_in_context,
857 event); 860 event);
858 861
859 spin_lock_irq(&ctx->lock); 862 raw_spin_lock_irq(&ctx->lock);
860 /* 863 /*
861 * we need to retry the smp call. 864 * we need to retry the smp call.
862 */ 865 */
863 if (ctx->is_active && list_empty(&event->group_entry)) { 866 if (ctx->is_active && list_empty(&event->group_entry)) {
864 spin_unlock_irq(&ctx->lock); 867 raw_spin_unlock_irq(&ctx->lock);
865 goto retry; 868 goto retry;
866 } 869 }
867 870
@@ -872,7 +875,7 @@ retry:
872 */ 875 */
873 if (list_empty(&event->group_entry)) 876 if (list_empty(&event->group_entry))
874 add_event_to_ctx(event, ctx); 877 add_event_to_ctx(event, ctx);
875 spin_unlock_irq(&ctx->lock); 878 raw_spin_unlock_irq(&ctx->lock);
876} 879}
877 880
878/* 881/*
@@ -917,7 +920,7 @@ static void __perf_event_enable(void *info)
917 cpuctx->task_ctx = ctx; 920 cpuctx->task_ctx = ctx;
918 } 921 }
919 922
920 spin_lock(&ctx->lock); 923 raw_spin_lock(&ctx->lock);
921 ctx->is_active = 1; 924 ctx->is_active = 1;
922 update_context_time(ctx); 925 update_context_time(ctx);
923 926
@@ -925,6 +928,9 @@ static void __perf_event_enable(void *info)
925 goto unlock; 928 goto unlock;
926 __perf_event_mark_enabled(event, ctx); 929 __perf_event_mark_enabled(event, ctx);
927 930
931 if (event->cpu != -1 && event->cpu != smp_processor_id())
932 goto unlock;
933
928 /* 934 /*
929 * If the event is in a group and isn't the group leader, 935 * If the event is in a group and isn't the group leader,
930 * then don't put it on unless the group is on. 936 * then don't put it on unless the group is on.
@@ -959,7 +965,7 @@ static void __perf_event_enable(void *info)
959 } 965 }
960 966
961 unlock: 967 unlock:
962 spin_unlock(&ctx->lock); 968 raw_spin_unlock(&ctx->lock);
963} 969}
964 970
965/* 971/*
@@ -985,7 +991,7 @@ void perf_event_enable(struct perf_event *event)
985 return; 991 return;
986 } 992 }
987 993
988 spin_lock_irq(&ctx->lock); 994 raw_spin_lock_irq(&ctx->lock);
989 if (event->state >= PERF_EVENT_STATE_INACTIVE) 995 if (event->state >= PERF_EVENT_STATE_INACTIVE)
990 goto out; 996 goto out;
991 997
@@ -1000,10 +1006,10 @@ void perf_event_enable(struct perf_event *event)
1000 event->state = PERF_EVENT_STATE_OFF; 1006 event->state = PERF_EVENT_STATE_OFF;
1001 1007
1002 retry: 1008 retry:
1003 spin_unlock_irq(&ctx->lock); 1009 raw_spin_unlock_irq(&ctx->lock);
1004 task_oncpu_function_call(task, __perf_event_enable, event); 1010 task_oncpu_function_call(task, __perf_event_enable, event);
1005 1011
1006 spin_lock_irq(&ctx->lock); 1012 raw_spin_lock_irq(&ctx->lock);
1007 1013
1008 /* 1014 /*
1009 * If the context is active and the event is still off, 1015 * If the context is active and the event is still off,
@@ -1020,7 +1026,7 @@ void perf_event_enable(struct perf_event *event)
1020 __perf_event_mark_enabled(event, ctx); 1026 __perf_event_mark_enabled(event, ctx);
1021 1027
1022 out: 1028 out:
1023 spin_unlock_irq(&ctx->lock); 1029 raw_spin_unlock_irq(&ctx->lock);
1024} 1030}
1025 1031
1026static int perf_event_refresh(struct perf_event *event, int refresh) 1032static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1042,7 +1048,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1042{ 1048{
1043 struct perf_event *event; 1049 struct perf_event *event;
1044 1050
1045 spin_lock(&ctx->lock); 1051 raw_spin_lock(&ctx->lock);
1046 ctx->is_active = 0; 1052 ctx->is_active = 0;
1047 if (likely(!ctx->nr_events)) 1053 if (likely(!ctx->nr_events))
1048 goto out; 1054 goto out;
@@ -1055,7 +1061,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1055 } 1061 }
1056 perf_enable(); 1062 perf_enable();
1057 out: 1063 out:
1058 spin_unlock(&ctx->lock); 1064 raw_spin_unlock(&ctx->lock);
1059} 1065}
1060 1066
1061/* 1067/*
@@ -1193,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1193 * order we take the locks because no other cpu could 1199 * order we take the locks because no other cpu could
1194 * be trying to lock both of these tasks. 1200 * be trying to lock both of these tasks.
1195 */ 1201 */
1196 spin_lock(&ctx->lock); 1202 raw_spin_lock(&ctx->lock);
1197 spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 1203 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
1198 if (context_equiv(ctx, next_ctx)) { 1204 if (context_equiv(ctx, next_ctx)) {
1199 /* 1205 /*
1200 * XXX do we need a memory barrier of sorts 1206 * XXX do we need a memory barrier of sorts
@@ -1208,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1208 1214
1209 perf_event_sync_stat(ctx, next_ctx); 1215 perf_event_sync_stat(ctx, next_ctx);
1210 } 1216 }
1211 spin_unlock(&next_ctx->lock); 1217 raw_spin_unlock(&next_ctx->lock);
1212 spin_unlock(&ctx->lock); 1218 raw_spin_unlock(&ctx->lock);
1213 } 1219 }
1214 rcu_read_unlock(); 1220 rcu_read_unlock();
1215 1221
@@ -1251,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1251 struct perf_event *event; 1257 struct perf_event *event;
1252 int can_add_hw = 1; 1258 int can_add_hw = 1;
1253 1259
1254 spin_lock(&ctx->lock); 1260 raw_spin_lock(&ctx->lock);
1255 ctx->is_active = 1; 1261 ctx->is_active = 1;
1256 if (likely(!ctx->nr_events)) 1262 if (likely(!ctx->nr_events))
1257 goto out; 1263 goto out;
@@ -1306,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1306 } 1312 }
1307 perf_enable(); 1313 perf_enable();
1308 out: 1314 out:
1309 spin_unlock(&ctx->lock); 1315 raw_spin_unlock(&ctx->lock);
1310} 1316}
1311 1317
1312/* 1318/*
@@ -1370,11 +1376,14 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1370 struct hw_perf_event *hwc; 1376 struct hw_perf_event *hwc;
1371 u64 interrupts, freq; 1377 u64 interrupts, freq;
1372 1378
1373 spin_lock(&ctx->lock); 1379 raw_spin_lock(&ctx->lock);
1374 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
1375 if (event->state != PERF_EVENT_STATE_ACTIVE) 1381 if (event->state != PERF_EVENT_STATE_ACTIVE)
1376 continue; 1382 continue;
1377 1383
1384 if (event->cpu != -1 && event->cpu != smp_processor_id())
1385 continue;
1386
1378 hwc = &event->hw; 1387 hwc = &event->hw;
1379 1388
1380 interrupts = hwc->interrupts; 1389 interrupts = hwc->interrupts;
@@ -1425,7 +1434,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1425 perf_enable(); 1434 perf_enable();
1426 } 1435 }
1427 } 1436 }
1428 spin_unlock(&ctx->lock); 1437 raw_spin_unlock(&ctx->lock);
1429} 1438}
1430 1439
1431/* 1440/*
@@ -1438,7 +1447,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1438 if (!ctx->nr_events) 1447 if (!ctx->nr_events)
1439 return; 1448 return;
1440 1449
1441 spin_lock(&ctx->lock); 1450 raw_spin_lock(&ctx->lock);
1442 /* 1451 /*
1443 * Rotate the first entry last (works just fine for group events too): 1452 * Rotate the first entry last (works just fine for group events too):
1444 */ 1453 */
@@ -1449,7 +1458,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1449 } 1458 }
1450 perf_enable(); 1459 perf_enable();
1451 1460
1452 spin_unlock(&ctx->lock); 1461 raw_spin_unlock(&ctx->lock);
1453} 1462}
1454 1463
1455void perf_event_task_tick(struct task_struct *curr, int cpu) 1464void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1498,7 +1507,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1498 1507
1499 __perf_event_task_sched_out(ctx); 1508 __perf_event_task_sched_out(ctx);
1500 1509
1501 spin_lock(&ctx->lock); 1510 raw_spin_lock(&ctx->lock);
1502 1511
1503 list_for_each_entry(event, &ctx->group_list, group_entry) { 1512 list_for_each_entry(event, &ctx->group_list, group_entry) {
1504 if (!event->attr.enable_on_exec) 1513 if (!event->attr.enable_on_exec)
@@ -1516,7 +1525,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1516 if (enabled) 1525 if (enabled)
1517 unclone_ctx(ctx); 1526 unclone_ctx(ctx);
1518 1527
1519 spin_unlock(&ctx->lock); 1528 raw_spin_unlock(&ctx->lock);
1520 1529
1521 perf_event_task_sched_in(task, smp_processor_id()); 1530 perf_event_task_sched_in(task, smp_processor_id());
1522 out: 1531 out:
@@ -1542,10 +1551,10 @@ static void __perf_event_read(void *info)
1542 if (ctx->task && cpuctx->task_ctx != ctx) 1551 if (ctx->task && cpuctx->task_ctx != ctx)
1543 return; 1552 return;
1544 1553
1545 spin_lock(&ctx->lock); 1554 raw_spin_lock(&ctx->lock);
1546 update_context_time(ctx); 1555 update_context_time(ctx);
1547 update_event_times(event); 1556 update_event_times(event);
1548 spin_unlock(&ctx->lock); 1557 raw_spin_unlock(&ctx->lock);
1549 1558
1550 event->pmu->read(event); 1559 event->pmu->read(event);
1551} 1560}
@@ -1563,10 +1572,10 @@ static u64 perf_event_read(struct perf_event *event)
1563 struct perf_event_context *ctx = event->ctx; 1572 struct perf_event_context *ctx = event->ctx;
1564 unsigned long flags; 1573 unsigned long flags;
1565 1574
1566 spin_lock_irqsave(&ctx->lock, flags); 1575 raw_spin_lock_irqsave(&ctx->lock, flags);
1567 update_context_time(ctx); 1576 update_context_time(ctx);
1568 update_event_times(event); 1577 update_event_times(event);
1569 spin_unlock_irqrestore(&ctx->lock, flags); 1578 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1570 } 1579 }
1571 1580
1572 return atomic64_read(&event->count); 1581 return atomic64_read(&event->count);
@@ -1579,7 +1588,7 @@ static void
1579__perf_event_init_context(struct perf_event_context *ctx, 1588__perf_event_init_context(struct perf_event_context *ctx,
1580 struct task_struct *task) 1589 struct task_struct *task)
1581{ 1590{
1582 spin_lock_init(&ctx->lock); 1591 raw_spin_lock_init(&ctx->lock);
1583 mutex_init(&ctx->mutex); 1592 mutex_init(&ctx->mutex);
1584 INIT_LIST_HEAD(&ctx->group_list); 1593 INIT_LIST_HEAD(&ctx->group_list);
1585 INIT_LIST_HEAD(&ctx->event_list); 1594 INIT_LIST_HEAD(&ctx->event_list);
@@ -1595,15 +1604,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1595 unsigned long flags; 1604 unsigned long flags;
1596 int err; 1605 int err;
1597 1606
1598 /* 1607 if (pid == -1 && cpu != -1) {
1599 * If cpu is not a wildcard then this is a percpu event:
1600 */
1601 if (cpu != -1) {
1602 /* Must be root to operate on a CPU event: */ 1608 /* Must be root to operate on a CPU event: */
1603 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 1609 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1604 return ERR_PTR(-EACCES); 1610 return ERR_PTR(-EACCES);
1605 1611
1606 if (cpu < 0 || cpu > num_possible_cpus()) 1612 if (cpu < 0 || cpu >= nr_cpumask_bits)
1607 return ERR_PTR(-EINVAL); 1613 return ERR_PTR(-EINVAL);
1608 1614
1609 /* 1615 /*
@@ -1611,7 +1617,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1611 * offline CPU and activate it when the CPU comes up, but 1617 * offline CPU and activate it when the CPU comes up, but
1612 * that's for later. 1618 * that's for later.
1613 */ 1619 */
1614 if (!cpu_isset(cpu, cpu_online_map)) 1620 if (!cpu_online(cpu))
1615 return ERR_PTR(-ENODEV); 1621 return ERR_PTR(-ENODEV);
1616 1622
1617 cpuctx = &per_cpu(perf_cpu_context, cpu); 1623 cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -1649,7 +1655,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1649 ctx = perf_lock_task_context(task, &flags); 1655 ctx = perf_lock_task_context(task, &flags);
1650 if (ctx) { 1656 if (ctx) {
1651 unclone_ctx(ctx); 1657 unclone_ctx(ctx);
1652 spin_unlock_irqrestore(&ctx->lock, flags); 1658 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1653 } 1659 }
1654 1660
1655 if (!ctx) { 1661 if (!ctx) {
@@ -1987,7 +1993,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1987 if (!value) 1993 if (!value)
1988 return -EINVAL; 1994 return -EINVAL;
1989 1995
1990 spin_lock_irq(&ctx->lock); 1996 raw_spin_lock_irq(&ctx->lock);
1991 if (event->attr.freq) { 1997 if (event->attr.freq) {
1992 if (value > sysctl_perf_event_sample_rate) { 1998 if (value > sysctl_perf_event_sample_rate) {
1993 ret = -EINVAL; 1999 ret = -EINVAL;
@@ -2000,7 +2006,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2000 event->hw.sample_period = value; 2006 event->hw.sample_period = value;
2001 } 2007 }
2002unlock: 2008unlock:
2003 spin_unlock_irq(&ctx->lock); 2009 raw_spin_unlock_irq(&ctx->lock);
2004 2010
2005 return ret; 2011 return ret;
2006} 2012}
@@ -3262,6 +3268,12 @@ static void perf_event_task_output(struct perf_event *event,
3262 3268
3263static int perf_event_task_match(struct perf_event *event) 3269static int perf_event_task_match(struct perf_event *event)
3264{ 3270{
3271 if (event->state != PERF_EVENT_STATE_ACTIVE)
3272 return 0;
3273
3274 if (event->cpu != -1 && event->cpu != smp_processor_id())
3275 return 0;
3276
3265 if (event->attr.comm || event->attr.mmap || event->attr.task) 3277 if (event->attr.comm || event->attr.mmap || event->attr.task)
3266 return 1; 3278 return 1;
3267 3279
@@ -3287,12 +3299,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3287 rcu_read_lock(); 3299 rcu_read_lock();
3288 cpuctx = &get_cpu_var(perf_cpu_context); 3300 cpuctx = &get_cpu_var(perf_cpu_context);
3289 perf_event_task_ctx(&cpuctx->ctx, task_event); 3301 perf_event_task_ctx(&cpuctx->ctx, task_event);
3290 put_cpu_var(perf_cpu_context);
3291
3292 if (!ctx) 3302 if (!ctx)
3293 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3303 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3294 if (ctx) 3304 if (ctx)
3295 perf_event_task_ctx(ctx, task_event); 3305 perf_event_task_ctx(ctx, task_event);
3306 put_cpu_var(perf_cpu_context);
3296 rcu_read_unlock(); 3307 rcu_read_unlock();
3297} 3308}
3298 3309
@@ -3369,6 +3380,12 @@ static void perf_event_comm_output(struct perf_event *event,
3369 3380
3370static int perf_event_comm_match(struct perf_event *event) 3381static int perf_event_comm_match(struct perf_event *event)
3371{ 3382{
3383 if (event->state != PERF_EVENT_STATE_ACTIVE)
3384 return 0;
3385
3386 if (event->cpu != -1 && event->cpu != smp_processor_id())
3387 return 0;
3388
3372 if (event->attr.comm) 3389 if (event->attr.comm)
3373 return 1; 3390 return 1;
3374 3391
@@ -3405,15 +3422,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3405 rcu_read_lock(); 3422 rcu_read_lock();
3406 cpuctx = &get_cpu_var(perf_cpu_context); 3423 cpuctx = &get_cpu_var(perf_cpu_context);
3407 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3424 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3408 put_cpu_var(perf_cpu_context);
3409
3410 /*
3411 * doesn't really matter which of the child contexts the
3412 * events ends up in.
3413 */
3414 ctx = rcu_dereference(current->perf_event_ctxp); 3425 ctx = rcu_dereference(current->perf_event_ctxp);
3415 if (ctx) 3426 if (ctx)
3416 perf_event_comm_ctx(ctx, comm_event); 3427 perf_event_comm_ctx(ctx, comm_event);
3428 put_cpu_var(perf_cpu_context);
3417 rcu_read_unlock(); 3429 rcu_read_unlock();
3418} 3430}
3419 3431
@@ -3488,6 +3500,12 @@ static void perf_event_mmap_output(struct perf_event *event,
3488static int perf_event_mmap_match(struct perf_event *event, 3500static int perf_event_mmap_match(struct perf_event *event,
3489 struct perf_mmap_event *mmap_event) 3501 struct perf_mmap_event *mmap_event)
3490{ 3502{
3503 if (event->state != PERF_EVENT_STATE_ACTIVE)
3504 return 0;
3505
3506 if (event->cpu != -1 && event->cpu != smp_processor_id())
3507 return 0;
3508
3491 if (event->attr.mmap) 3509 if (event->attr.mmap)
3492 return 1; 3510 return 1;
3493 3511
@@ -3561,15 +3579,10 @@ got_name:
3561 rcu_read_lock(); 3579 rcu_read_lock();
3562 cpuctx = &get_cpu_var(perf_cpu_context); 3580 cpuctx = &get_cpu_var(perf_cpu_context);
3563 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3581 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3564 put_cpu_var(perf_cpu_context);
3565
3566 /*
3567 * doesn't really matter which of the child contexts the
3568 * events ends up in.
3569 */
3570 ctx = rcu_dereference(current->perf_event_ctxp); 3582 ctx = rcu_dereference(current->perf_event_ctxp);
3571 if (ctx) 3583 if (ctx)
3572 perf_event_mmap_ctx(ctx, mmap_event); 3584 perf_event_mmap_ctx(ctx, mmap_event);
3585 put_cpu_var(perf_cpu_context);
3573 rcu_read_unlock(); 3586 rcu_read_unlock();
3574 3587
3575 kfree(buf); 3588 kfree(buf);
@@ -3860,6 +3873,9 @@ static int perf_swevent_match(struct perf_event *event,
3860 struct perf_sample_data *data, 3873 struct perf_sample_data *data,
3861 struct pt_regs *regs) 3874 struct pt_regs *regs)
3862{ 3875{
3876 if (event->cpu != -1 && event->cpu != smp_processor_id())
3877 return 0;
3878
3863 if (!perf_swevent_is_counting(event)) 3879 if (!perf_swevent_is_counting(event))
3864 return 0; 3880 return 0;
3865 3881
@@ -4564,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
4564 if (attr->type >= PERF_TYPE_MAX) 4580 if (attr->type >= PERF_TYPE_MAX)
4565 return -EINVAL; 4581 return -EINVAL;
4566 4582
4567 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) 4583 if (attr->__reserved_1 || attr->__reserved_2)
4568 return -EINVAL; 4584 return -EINVAL;
4569 4585
4570 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 4586 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4717,7 +4733,7 @@ SYSCALL_DEFINE5(perf_event_open,
4717 if (IS_ERR(event)) 4733 if (IS_ERR(event))
4718 goto err_put_context; 4734 goto err_put_context;
4719 4735
4720 err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); 4736 err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
4721 if (err < 0) 4737 if (err < 0)
4722 goto err_free_put_context; 4738 goto err_free_put_context;
4723 4739
@@ -4992,7 +5008,7 @@ void perf_event_exit_task(struct task_struct *child)
4992 * reading child->perf_event_ctxp, we wait until it has 5008 * reading child->perf_event_ctxp, we wait until it has
4993 * incremented the context's refcount before we do put_ctx below. 5009 * incremented the context's refcount before we do put_ctx below.
4994 */ 5010 */
4995 spin_lock(&child_ctx->lock); 5011 raw_spin_lock(&child_ctx->lock);
4996 child->perf_event_ctxp = NULL; 5012 child->perf_event_ctxp = NULL;
4997 /* 5013 /*
4998 * If this context is a clone; unclone it so it can't get 5014 * If this context is a clone; unclone it so it can't get
@@ -5001,7 +5017,7 @@ void perf_event_exit_task(struct task_struct *child)
5001 */ 5017 */
5002 unclone_ctx(child_ctx); 5018 unclone_ctx(child_ctx);
5003 update_context_time(child_ctx); 5019 update_context_time(child_ctx);
5004 spin_unlock_irqrestore(&child_ctx->lock, flags); 5020 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
5005 5021
5006 /* 5022 /*
5007 * Report the task dead after unscheduling the events so that we 5023 * Report the task dead after unscheduling the events so that we
@@ -5141,7 +5157,7 @@ int perf_event_init_task(struct task_struct *child)
5141 GFP_KERNEL); 5157 GFP_KERNEL);
5142 if (!child_ctx) { 5158 if (!child_ctx) {
5143 ret = -ENOMEM; 5159 ret = -ENOMEM;
5144 goto exit; 5160 break;
5145 } 5161 }
5146 5162
5147 __perf_event_init_context(child_ctx, child); 5163 __perf_event_init_context(child_ctx, child);
@@ -5157,7 +5173,7 @@ int perf_event_init_task(struct task_struct *child)
5157 } 5173 }
5158 } 5174 }
5159 5175
5160 if (inherited_all) { 5176 if (child_ctx && inherited_all) {
5161 /* 5177 /*
5162 * Mark the child context as a clone of the parent 5178 * Mark the child context as a clone of the parent
5163 * context, or of whatever the parent is a clone of. 5179 * context, or of whatever the parent is a clone of.
@@ -5177,7 +5193,6 @@ int perf_event_init_task(struct task_struct *child)
5177 get_ctx(child_ctx->parent_ctx); 5193 get_ctx(child_ctx->parent_ctx);
5178 } 5194 }
5179 5195
5180exit:
5181 mutex_unlock(&parent_ctx->mutex); 5196 mutex_unlock(&parent_ctx->mutex);
5182 5197
5183 perf_unpin_context(parent_ctx); 5198 perf_unpin_context(parent_ctx);
@@ -5292,11 +5307,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
5292 perf_reserved_percpu = val; 5307 perf_reserved_percpu = val;
5293 for_each_online_cpu(cpu) { 5308 for_each_online_cpu(cpu) {
5294 cpuctx = &per_cpu(perf_cpu_context, cpu); 5309 cpuctx = &per_cpu(perf_cpu_context, cpu);
5295 spin_lock_irq(&cpuctx->ctx.lock); 5310 raw_spin_lock_irq(&cpuctx->ctx.lock);
5296 mpt = min(perf_max_events - cpuctx->ctx.nr_events, 5311 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5297 perf_max_events - perf_reserved_percpu); 5312 perf_max_events - perf_reserved_percpu);
5298 cpuctx->max_pertask = mpt; 5313 cpuctx->max_pertask = mpt;
5299 spin_unlock_irq(&cpuctx->ctx.lock); 5314 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5300 } 5315 }
5301 spin_unlock(&perf_resource_lock); 5316 spin_unlock(&perf_resource_lock);
5302 5317
diff --git a/kernel/pid.c b/kernel/pid.c
index d3f722d20f9c..2e17c9c92cbe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -141,11 +141,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
141 * installing it: 141 * installing it:
142 */ 142 */
143 spin_lock_irq(&pidmap_lock); 143 spin_lock_irq(&pidmap_lock);
144 if (map->page) 144 if (!map->page) {
145 kfree(page);
146 else
147 map->page = page; 145 map->page = page;
146 page = NULL;
147 }
148 spin_unlock_irq(&pidmap_lock); 148 spin_unlock_irq(&pidmap_lock);
149 kfree(page);
149 if (unlikely(!map->page)) 150 if (unlikely(!map->page))
150 break; 151 break;
151 } 152 }
@@ -268,12 +269,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
268 for (type = 0; type < PIDTYPE_MAX; ++type) 269 for (type = 0; type < PIDTYPE_MAX; ++type)
269 INIT_HLIST_HEAD(&pid->tasks[type]); 270 INIT_HLIST_HEAD(&pid->tasks[type]);
270 271
272 upid = pid->numbers + ns->level;
271 spin_lock_irq(&pidmap_lock); 273 spin_lock_irq(&pidmap_lock);
272 for (i = ns->level; i >= 0; i--) { 274 for ( ; upid >= pid->numbers; --upid)
273 upid = &pid->numbers[i];
274 hlist_add_head_rcu(&upid->pid_chain, 275 hlist_add_head_rcu(&upid->pid_chain,
275 &pid_hash[pid_hashfn(upid->nr, upid->ns)]); 276 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
276 }
277 spin_unlock_irq(&pidmap_lock); 277 spin_unlock_irq(&pidmap_lock);
278 278
279out: 279out:
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 5187136fe1de..218e5af90156 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -6,7 +6,7 @@
6 6
7#include <linux/vt_kern.h> 7#include <linux/vt_kern.h>
8#include <linux/kbd_kern.h> 8#include <linux/kbd_kern.h>
9#include <linux/console.h> 9#include <linux/vt.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include "power.h" 11#include "power.h"
12 12
@@ -21,8 +21,7 @@ int pm_prepare_console(void)
21 if (orig_fgconsole < 0) 21 if (orig_fgconsole < 0)
22 return 1; 22 return 1;
23 23
24 orig_kmsg = kmsg_redirect; 24 orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
25 kmsg_redirect = SUSPEND_CONSOLE;
26 return 0; 25 return 0;
27} 26}
28 27
@@ -30,7 +29,7 @@ void pm_restore_console(void)
30{ 29{
31 if (orig_fgconsole >= 0) { 30 if (orig_fgconsole >= 0) {
32 vt_move_to_console(orig_fgconsole, 0); 31 vt_move_to_console(orig_fgconsole, 0);
33 kmsg_redirect = orig_kmsg; 32 vt_kmsg_redirect(orig_kmsg);
34 } 33 }
35} 34}
36#endif 35#endif
diff --git a/kernel/printk.c b/kernel/printk.c
index b5ac4d99c667..17463ca2e229 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -34,6 +34,7 @@
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/kexec.h> 35#include <linux/kexec.h>
36#include <linux/ratelimit.h> 36#include <linux/ratelimit.h>
37#include <linux/kmsg_dump.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -1405,4 +1406,122 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
1405 return false; 1406 return false;
1406} 1407}
1407EXPORT_SYMBOL(printk_timed_ratelimit); 1408EXPORT_SYMBOL(printk_timed_ratelimit);
1409
1410static DEFINE_SPINLOCK(dump_list_lock);
1411static LIST_HEAD(dump_list);
1412
1413/**
1414 * kmsg_dump_register - register a kernel log dumper.
1415 * @dumper: pointer to the kmsg_dumper structure
1416 *
1417 * Adds a kernel log dumper to the system. The dump callback in the
1418 * structure will be called when the kernel oopses or panics and must be
1419 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
1420 */
1421int kmsg_dump_register(struct kmsg_dumper *dumper)
1422{
1423 unsigned long flags;
1424 int err = -EBUSY;
1425
1426 /* The dump callback needs to be set */
1427 if (!dumper->dump)
1428 return -EINVAL;
1429
1430 spin_lock_irqsave(&dump_list_lock, flags);
1431 /* Don't allow registering multiple times */
1432 if (!dumper->registered) {
1433 dumper->registered = 1;
1434 list_add_tail(&dumper->list, &dump_list);
1435 err = 0;
1436 }
1437 spin_unlock_irqrestore(&dump_list_lock, flags);
1438
1439 return err;
1440}
1441EXPORT_SYMBOL_GPL(kmsg_dump_register);
1442
1443/**
1444 * kmsg_dump_unregister - unregister a kmsg dumper.
1445 * @dumper: pointer to the kmsg_dumper structure
1446 *
1447 * Removes a dump device from the system. Returns zero on success and
1448 * %-EINVAL otherwise.
1449 */
1450int kmsg_dump_unregister(struct kmsg_dumper *dumper)
1451{
1452 unsigned long flags;
1453 int err = -EINVAL;
1454
1455 spin_lock_irqsave(&dump_list_lock, flags);
1456 if (dumper->registered) {
1457 dumper->registered = 0;
1458 list_del(&dumper->list);
1459 err = 0;
1460 }
1461 spin_unlock_irqrestore(&dump_list_lock, flags);
1462
1463 return err;
1464}
1465EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
1466
1467static const char const *kmsg_reasons[] = {
1468 [KMSG_DUMP_OOPS] = "oops",
1469 [KMSG_DUMP_PANIC] = "panic",
1470};
1471
1472static const char *kmsg_to_str(enum kmsg_dump_reason reason)
1473{
1474 if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
1475 return "unknown";
1476
1477 return kmsg_reasons[reason];
1478}
1479
1480/**
1481 * kmsg_dump - dump kernel log to kernel message dumpers.
1482 * @reason: the reason (oops, panic etc) for dumping
1483 *
1484 * Iterate through each of the dump devices and call the oops/panic
1485 * callbacks with the log buffer.
1486 */
1487void kmsg_dump(enum kmsg_dump_reason reason)
1488{
1489 unsigned long end;
1490 unsigned chars;
1491 struct kmsg_dumper *dumper;
1492 const char *s1, *s2;
1493 unsigned long l1, l2;
1494 unsigned long flags;
1495
1496 /* Theoretically, the log could move on after we do this, but
1497 there's not a lot we can do about that. The new messages
1498 will overwrite the start of what we dump. */
1499 spin_lock_irqsave(&logbuf_lock, flags);
1500 end = log_end & LOG_BUF_MASK;
1501 chars = logged_chars;
1502 spin_unlock_irqrestore(&logbuf_lock, flags);
1503
1504 if (logged_chars > end) {
1505 s1 = log_buf + log_buf_len - logged_chars + end;
1506 l1 = logged_chars - end;
1507
1508 s2 = log_buf;
1509 l2 = end;
1510 } else {
1511 s1 = "";
1512 l1 = 0;
1513
1514 s2 = log_buf + end - logged_chars;
1515 l2 = logged_chars;
1516 }
1517
1518 if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
1519 printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
1520 kmsg_to_str(reason));
1521 return;
1522 }
1523 list_for_each_entry(dumper, &dump_list, list)
1524 dumper->dump(dumper, reason, s1, l1, s2, l2);
1525 spin_unlock_irqrestore(&dump_list_lock, flags);
1526}
1408#endif 1527#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index 760c26209a3c..c705a41b4ba3 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1198,7 +1198,7 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
1198 relay_consume_bytes(rbuf, buf->private); 1198 relay_consume_bytes(rbuf, buf->private);
1199} 1199}
1200 1200
1201static struct pipe_buf_operations relay_pipe_buf_ops = { 1201static const struct pipe_buf_operations relay_pipe_buf_ops = {
1202 .can_merge = 0, 1202 .can_merge = 0,
1203 .map = generic_pipe_buf_map, 1203 .map = generic_pipe_buf_map,
1204 .unmap = generic_pipe_buf_unmap, 1204 .unmap = generic_pipe_buf_unmap,
diff --git a/kernel/resource.c b/kernel/resource.c
index dc15686b7a77..af96c1e4b54b 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -308,37 +308,37 @@ static int find_resource(struct resource *root, struct resource *new,
308 void *alignf_data) 308 void *alignf_data)
309{ 309{
310 struct resource *this = root->child; 310 struct resource *this = root->child;
311 resource_size_t start, end; 311 struct resource tmp = *new;
312 312
313 start = root->start; 313 tmp.start = root->start;
314 /* 314 /*
315 * Skip past an allocated resource that starts at 0, since the assignment 315 * Skip past an allocated resource that starts at 0, since the assignment
316 * of this->start - 1 to new->end below would cause an underflow. 316 * of this->start - 1 to tmp->end below would cause an underflow.
317 */ 317 */
318 if (this && this->start == 0) { 318 if (this && this->start == 0) {
319 start = this->end + 1; 319 tmp.start = this->end + 1;
320 this = this->sibling; 320 this = this->sibling;
321 } 321 }
322 for(;;) { 322 for(;;) {
323 if (this) 323 if (this)
324 end = this->start - 1; 324 tmp.end = this->start - 1;
325 else 325 else
326 end = root->end; 326 tmp.end = root->end;
327 if (start < min) 327 if (tmp.start < min)
328 start = min; 328 tmp.start = min;
329 if (end > max) 329 if (tmp.end > max)
330 end = max; 330 tmp.end = max;
331 start = ALIGN(start, align); 331 tmp.start = ALIGN(tmp.start, align);
332 if (alignf) 332 if (alignf)
333 alignf(alignf_data, new, size, align); 333 alignf(alignf_data, &tmp, size, align);
334 if (start < end && end - start >= size - 1) { 334 if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) {
335 new->start = start; 335 new->start = tmp.start;
336 new->end = start + size - 1; 336 new->end = tmp.start + size - 1;
337 return 0; 337 return 0;
338 } 338 }
339 if (!this) 339 if (!this)
340 break; 340 break;
341 start = this->end + 1; 341 tmp.start = this->end + 1;
342 this = this->sibling; 342 this = this->sibling;
343 } 343 }
344 return -EBUSY; 344 return -EBUSY;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5fcb4fe645e2..ddabb54bb5c8 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -37,8 +37,8 @@ do { \
37 if (rt_trace_on) { \ 37 if (rt_trace_on) { \
38 rt_trace_on = 0; \ 38 rt_trace_on = 0; \
39 console_verbose(); \ 39 console_verbose(); \
40 if (spin_is_locked(&current->pi_lock)) \ 40 if (raw_spin_is_locked(&current->pi_lock)) \
41 spin_unlock(&current->pi_lock); \ 41 raw_spin_unlock(&current->pi_lock); \
42 } \ 42 } \
43} while (0) 43} while (0)
44 44
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 29bd4baf9e75..a9604815786a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -138,9 +138,9 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
138{ 138{
139 unsigned long flags; 139 unsigned long flags;
140 140
141 spin_lock_irqsave(&task->pi_lock, flags); 141 raw_spin_lock_irqsave(&task->pi_lock, flags);
142 __rt_mutex_adjust_prio(task); 142 __rt_mutex_adjust_prio(task);
143 spin_unlock_irqrestore(&task->pi_lock, flags); 143 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
144} 144}
145 145
146/* 146/*
@@ -195,7 +195,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
195 /* 195 /*
196 * Task can not go away as we did a get_task() before ! 196 * Task can not go away as we did a get_task() before !
197 */ 197 */
198 spin_lock_irqsave(&task->pi_lock, flags); 198 raw_spin_lock_irqsave(&task->pi_lock, flags);
199 199
200 waiter = task->pi_blocked_on; 200 waiter = task->pi_blocked_on;
201 /* 201 /*
@@ -231,8 +231,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
231 goto out_unlock_pi; 231 goto out_unlock_pi;
232 232
233 lock = waiter->lock; 233 lock = waiter->lock;
234 if (!spin_trylock(&lock->wait_lock)) { 234 if (!raw_spin_trylock(&lock->wait_lock)) {
235 spin_unlock_irqrestore(&task->pi_lock, flags); 235 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
236 cpu_relax(); 236 cpu_relax();
237 goto retry; 237 goto retry;
238 } 238 }
@@ -240,7 +240,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
240 /* Deadlock detection */ 240 /* Deadlock detection */
241 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 241 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
242 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 242 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
243 spin_unlock(&lock->wait_lock); 243 raw_spin_unlock(&lock->wait_lock);
244 ret = deadlock_detect ? -EDEADLK : 0; 244 ret = deadlock_detect ? -EDEADLK : 0;
245 goto out_unlock_pi; 245 goto out_unlock_pi;
246 } 246 }
@@ -253,13 +253,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
253 plist_add(&waiter->list_entry, &lock->wait_list); 253 plist_add(&waiter->list_entry, &lock->wait_list);
254 254
255 /* Release the task */ 255 /* Release the task */
256 spin_unlock_irqrestore(&task->pi_lock, flags); 256 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
257 put_task_struct(task); 257 put_task_struct(task);
258 258
259 /* Grab the next task */ 259 /* Grab the next task */
260 task = rt_mutex_owner(lock); 260 task = rt_mutex_owner(lock);
261 get_task_struct(task); 261 get_task_struct(task);
262 spin_lock_irqsave(&task->pi_lock, flags); 262 raw_spin_lock_irqsave(&task->pi_lock, flags);
263 263
264 if (waiter == rt_mutex_top_waiter(lock)) { 264 if (waiter == rt_mutex_top_waiter(lock)) {
265 /* Boost the owner */ 265 /* Boost the owner */
@@ -277,10 +277,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
277 __rt_mutex_adjust_prio(task); 277 __rt_mutex_adjust_prio(task);
278 } 278 }
279 279
280 spin_unlock_irqrestore(&task->pi_lock, flags); 280 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
281 281
282 top_waiter = rt_mutex_top_waiter(lock); 282 top_waiter = rt_mutex_top_waiter(lock);
283 spin_unlock(&lock->wait_lock); 283 raw_spin_unlock(&lock->wait_lock);
284 284
285 if (!detect_deadlock && waiter != top_waiter) 285 if (!detect_deadlock && waiter != top_waiter)
286 goto out_put_task; 286 goto out_put_task;
@@ -288,7 +288,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
288 goto again; 288 goto again;
289 289
290 out_unlock_pi: 290 out_unlock_pi:
291 spin_unlock_irqrestore(&task->pi_lock, flags); 291 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
292 out_put_task: 292 out_put_task:
293 put_task_struct(task); 293 put_task_struct(task);
294 294
@@ -313,9 +313,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
313 if (pendowner == task) 313 if (pendowner == task)
314 return 1; 314 return 1;
315 315
316 spin_lock_irqsave(&pendowner->pi_lock, flags); 316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
317 if (task->prio >= pendowner->prio) { 317 if (task->prio >= pendowner->prio) {
318 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
319 return 0; 319 return 0;
320 } 320 }
321 321
@@ -325,7 +325,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
325 * priority. 325 * priority.
326 */ 326 */
327 if (likely(!rt_mutex_has_waiters(lock))) { 327 if (likely(!rt_mutex_has_waiters(lock))) {
328 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
329 return 1; 329 return 1;
330 } 330 }
331 331
@@ -333,7 +333,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
333 next = rt_mutex_top_waiter(lock); 333 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters); 334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner); 335 __rt_mutex_adjust_prio(pendowner);
336 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
337 337
338 /* 338 /*
339 * We are going to steal the lock and a waiter was 339 * We are going to steal the lock and a waiter was
@@ -350,10 +350,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
350 * might be task: 350 * might be task:
351 */ 351 */
352 if (likely(next->task != task)) { 352 if (likely(next->task != task)) {
353 spin_lock_irqsave(&task->pi_lock, flags); 353 raw_spin_lock_irqsave(&task->pi_lock, flags);
354 plist_add(&next->pi_list_entry, &task->pi_waiters); 354 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task); 355 __rt_mutex_adjust_prio(task);
356 spin_unlock_irqrestore(&task->pi_lock, flags); 356 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
357 } 357 }
358 return 1; 358 return 1;
359} 359}
@@ -420,7 +420,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
420 unsigned long flags; 420 unsigned long flags;
421 int chain_walk = 0, res; 421 int chain_walk = 0, res;
422 422
423 spin_lock_irqsave(&task->pi_lock, flags); 423 raw_spin_lock_irqsave(&task->pi_lock, flags);
424 __rt_mutex_adjust_prio(task); 424 __rt_mutex_adjust_prio(task);
425 waiter->task = task; 425 waiter->task = task;
426 waiter->lock = lock; 426 waiter->lock = lock;
@@ -434,17 +434,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
434 434
435 task->pi_blocked_on = waiter; 435 task->pi_blocked_on = waiter;
436 436
437 spin_unlock_irqrestore(&task->pi_lock, flags); 437 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
438 438
439 if (waiter == rt_mutex_top_waiter(lock)) { 439 if (waiter == rt_mutex_top_waiter(lock)) {
440 spin_lock_irqsave(&owner->pi_lock, flags); 440 raw_spin_lock_irqsave(&owner->pi_lock, flags);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
442 plist_add(&waiter->pi_list_entry, &owner->pi_waiters); 442 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
443 443
444 __rt_mutex_adjust_prio(owner); 444 __rt_mutex_adjust_prio(owner);
445 if (owner->pi_blocked_on) 445 if (owner->pi_blocked_on)
446 chain_walk = 1; 446 chain_walk = 1;
447 spin_unlock_irqrestore(&owner->pi_lock, flags); 447 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
448 } 448 }
449 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) 449 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
450 chain_walk = 1; 450 chain_walk = 1;
@@ -459,12 +459,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
459 */ 459 */
460 get_task_struct(owner); 460 get_task_struct(owner);
461 461
462 spin_unlock(&lock->wait_lock); 462 raw_spin_unlock(&lock->wait_lock);
463 463
464 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 464 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
465 task); 465 task);
466 466
467 spin_lock(&lock->wait_lock); 467 raw_spin_lock(&lock->wait_lock);
468 468
469 return res; 469 return res;
470} 470}
@@ -483,7 +483,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
483 struct task_struct *pendowner; 483 struct task_struct *pendowner;
484 unsigned long flags; 484 unsigned long flags;
485 485
486 spin_lock_irqsave(&current->pi_lock, flags); 486 raw_spin_lock_irqsave(&current->pi_lock, flags);
487 487
488 waiter = rt_mutex_top_waiter(lock); 488 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list); 489 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -500,7 +500,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
500 500
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
502 502
503 spin_unlock_irqrestore(&current->pi_lock, flags); 503 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
504 504
505 /* 505 /*
506 * Clear the pi_blocked_on variable and enqueue a possible 506 * Clear the pi_blocked_on variable and enqueue a possible
@@ -509,7 +509,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
509 * waiter with higher priority than pending-owner->normal_prio 509 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner. 510 * is blocked on the unboosted (pending) owner.
511 */ 511 */
512 spin_lock_irqsave(&pendowner->pi_lock, flags); 512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
513 513
514 WARN_ON(!pendowner->pi_blocked_on); 514 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter); 515 WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -523,7 +523,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
523 next = rt_mutex_top_waiter(lock); 523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters); 524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 } 525 }
526 spin_unlock_irqrestore(&pendowner->pi_lock, flags); 526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527 527
528 wake_up_process(pendowner); 528 wake_up_process(pendowner);
529} 529}
@@ -541,15 +541,15 @@ static void remove_waiter(struct rt_mutex *lock,
541 unsigned long flags; 541 unsigned long flags;
542 int chain_walk = 0; 542 int chain_walk = 0;
543 543
544 spin_lock_irqsave(&current->pi_lock, flags); 544 raw_spin_lock_irqsave(&current->pi_lock, flags);
545 plist_del(&waiter->list_entry, &lock->wait_list); 545 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL; 546 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 547 current->pi_blocked_on = NULL;
548 spin_unlock_irqrestore(&current->pi_lock, flags); 548 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
549 549
550 if (first && owner != current) { 550 if (first && owner != current) {
551 551
552 spin_lock_irqsave(&owner->pi_lock, flags); 552 raw_spin_lock_irqsave(&owner->pi_lock, flags);
553 553
554 plist_del(&waiter->pi_list_entry, &owner->pi_waiters); 554 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
555 555
@@ -564,7 +564,7 @@ static void remove_waiter(struct rt_mutex *lock,
564 if (owner->pi_blocked_on) 564 if (owner->pi_blocked_on)
565 chain_walk = 1; 565 chain_walk = 1;
566 566
567 spin_unlock_irqrestore(&owner->pi_lock, flags); 567 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
568 } 568 }
569 569
570 WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 570 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
@@ -575,11 +575,11 @@ static void remove_waiter(struct rt_mutex *lock,
575 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 575 /* gets dropped in rt_mutex_adjust_prio_chain()! */
576 get_task_struct(owner); 576 get_task_struct(owner);
577 577
578 spin_unlock(&lock->wait_lock); 578 raw_spin_unlock(&lock->wait_lock);
579 579
580 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 580 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
581 581
582 spin_lock(&lock->wait_lock); 582 raw_spin_lock(&lock->wait_lock);
583} 583}
584 584
585/* 585/*
@@ -592,15 +592,15 @@ void rt_mutex_adjust_pi(struct task_struct *task)
592 struct rt_mutex_waiter *waiter; 592 struct rt_mutex_waiter *waiter;
593 unsigned long flags; 593 unsigned long flags;
594 594
595 spin_lock_irqsave(&task->pi_lock, flags); 595 raw_spin_lock_irqsave(&task->pi_lock, flags);
596 596
597 waiter = task->pi_blocked_on; 597 waiter = task->pi_blocked_on;
598 if (!waiter || waiter->list_entry.prio == task->prio) { 598 if (!waiter || waiter->list_entry.prio == task->prio) {
599 spin_unlock_irqrestore(&task->pi_lock, flags); 599 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
600 return; 600 return;
601 } 601 }
602 602
603 spin_unlock_irqrestore(&task->pi_lock, flags); 603 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
604 604
605 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 605 /* gets dropped in rt_mutex_adjust_prio_chain()! */
606 get_task_struct(task); 606 get_task_struct(task);
@@ -672,14 +672,14 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
672 break; 672 break;
673 } 673 }
674 674
675 spin_unlock(&lock->wait_lock); 675 raw_spin_unlock(&lock->wait_lock);
676 676
677 debug_rt_mutex_print_deadlock(waiter); 677 debug_rt_mutex_print_deadlock(waiter);
678 678
679 if (waiter->task) 679 if (waiter->task)
680 schedule_rt_mutex(lock); 680 schedule_rt_mutex(lock);
681 681
682 spin_lock(&lock->wait_lock); 682 raw_spin_lock(&lock->wait_lock);
683 set_current_state(state); 683 set_current_state(state);
684 } 684 }
685 685
@@ -700,11 +700,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
700 debug_rt_mutex_init_waiter(&waiter); 700 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL; 701 waiter.task = NULL;
702 702
703 spin_lock(&lock->wait_lock); 703 raw_spin_lock(&lock->wait_lock);
704 704
705 /* Try to acquire the lock again: */ 705 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 706 if (try_to_take_rt_mutex(lock)) {
707 spin_unlock(&lock->wait_lock); 707 raw_spin_unlock(&lock->wait_lock);
708 return 0; 708 return 0;
709 } 709 }
710 710
@@ -731,7 +731,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
731 */ 731 */
732 fixup_rt_mutex_waiters(lock); 732 fixup_rt_mutex_waiters(lock);
733 733
734 spin_unlock(&lock->wait_lock); 734 raw_spin_unlock(&lock->wait_lock);
735 735
736 /* Remove pending timer: */ 736 /* Remove pending timer: */
737 if (unlikely(timeout)) 737 if (unlikely(timeout))
@@ -758,7 +758,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
758{ 758{
759 int ret = 0; 759 int ret = 0;
760 760
761 spin_lock(&lock->wait_lock); 761 raw_spin_lock(&lock->wait_lock);
762 762
763 if (likely(rt_mutex_owner(lock) != current)) { 763 if (likely(rt_mutex_owner(lock) != current)) {
764 764
@@ -770,7 +770,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
770 fixup_rt_mutex_waiters(lock); 770 fixup_rt_mutex_waiters(lock);
771 } 771 }
772 772
773 spin_unlock(&lock->wait_lock); 773 raw_spin_unlock(&lock->wait_lock);
774 774
775 return ret; 775 return ret;
776} 776}
@@ -781,7 +781,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
781static void __sched 781static void __sched
782rt_mutex_slowunlock(struct rt_mutex *lock) 782rt_mutex_slowunlock(struct rt_mutex *lock)
783{ 783{
784 spin_lock(&lock->wait_lock); 784 raw_spin_lock(&lock->wait_lock);
785 785
786 debug_rt_mutex_unlock(lock); 786 debug_rt_mutex_unlock(lock);
787 787
@@ -789,13 +789,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
789 789
790 if (!rt_mutex_has_waiters(lock)) { 790 if (!rt_mutex_has_waiters(lock)) {
791 lock->owner = NULL; 791 lock->owner = NULL;
792 spin_unlock(&lock->wait_lock); 792 raw_spin_unlock(&lock->wait_lock);
793 return; 793 return;
794 } 794 }
795 795
796 wakeup_next_waiter(lock); 796 wakeup_next_waiter(lock);
797 797
798 spin_unlock(&lock->wait_lock); 798 raw_spin_unlock(&lock->wait_lock);
799 799
800 /* Undo pi boosting if necessary: */ 800 /* Undo pi boosting if necessary: */
801 rt_mutex_adjust_prio(current); 801 rt_mutex_adjust_prio(current);
@@ -970,8 +970,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
970void __rt_mutex_init(struct rt_mutex *lock, const char *name) 970void __rt_mutex_init(struct rt_mutex *lock, const char *name)
971{ 971{
972 lock->owner = NULL; 972 lock->owner = NULL;
973 spin_lock_init(&lock->wait_lock); 973 raw_spin_lock_init(&lock->wait_lock);
974 plist_head_init(&lock->wait_list, &lock->wait_lock); 974 plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
975 975
976 debug_rt_mutex_init(lock, name); 976 debug_rt_mutex_init(lock, name);
977} 977}
@@ -1032,7 +1032,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1032{ 1032{
1033 int ret; 1033 int ret;
1034 1034
1035 spin_lock(&lock->wait_lock); 1035 raw_spin_lock(&lock->wait_lock);
1036 1036
1037 mark_rt_mutex_waiters(lock); 1037 mark_rt_mutex_waiters(lock);
1038 1038
@@ -1040,7 +1040,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1040 /* We got the lock for task. */ 1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock); 1041 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0); 1042 rt_mutex_set_owner(lock, task, 0);
1043 spin_unlock(&lock->wait_lock); 1043 raw_spin_unlock(&lock->wait_lock);
1044 rt_mutex_deadlock_account_lock(lock, task); 1044 rt_mutex_deadlock_account_lock(lock, task);
1045 return 1; 1045 return 1;
1046 } 1046 }
@@ -1056,7 +1056,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 1056 */
1057 ret = 0; 1057 ret = 0;
1058 } 1058 }
1059 spin_unlock(&lock->wait_lock); 1059 raw_spin_unlock(&lock->wait_lock);
1060 1060
1061 debug_rt_mutex_print_deadlock(waiter); 1061 debug_rt_mutex_print_deadlock(waiter);
1062 1062
@@ -1106,7 +1106,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1106{ 1106{
1107 int ret; 1107 int ret;
1108 1108
1109 spin_lock(&lock->wait_lock); 1109 raw_spin_lock(&lock->wait_lock);
1110 1110
1111 set_current_state(TASK_INTERRUPTIBLE); 1111 set_current_state(TASK_INTERRUPTIBLE);
1112 1112
@@ -1124,7 +1124,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1124 */ 1124 */
1125 fixup_rt_mutex_waiters(lock); 1125 fixup_rt_mutex_waiters(lock);
1126 1126
1127 spin_unlock(&lock->wait_lock); 1127 raw_spin_unlock(&lock->wait_lock);
1128 1128
1129 /* 1129 /*
1130 * Readjust priority, when we did not get the lock. We might have been 1130 * Readjust priority, when we did not get the lock. We might have been
diff --git a/kernel/sched.c b/kernel/sched.c
index fd05861b2111..4508fe7048be 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -141,7 +141,7 @@ struct rt_prio_array {
141 141
142struct rt_bandwidth { 142struct rt_bandwidth {
143 /* nests inside the rq lock: */ 143 /* nests inside the rq lock: */
144 spinlock_t rt_runtime_lock; 144 raw_spinlock_t rt_runtime_lock;
145 ktime_t rt_period; 145 ktime_t rt_period;
146 u64 rt_runtime; 146 u64 rt_runtime;
147 struct hrtimer rt_period_timer; 147 struct hrtimer rt_period_timer;
@@ -178,7 +178,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
178 rt_b->rt_period = ns_to_ktime(period); 178 rt_b->rt_period = ns_to_ktime(period);
179 rt_b->rt_runtime = runtime; 179 rt_b->rt_runtime = runtime;
180 180
181 spin_lock_init(&rt_b->rt_runtime_lock); 181 raw_spin_lock_init(&rt_b->rt_runtime_lock);
182 182
183 hrtimer_init(&rt_b->rt_period_timer, 183 hrtimer_init(&rt_b->rt_period_timer,
184 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 184 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -200,7 +200,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
200 if (hrtimer_active(&rt_b->rt_period_timer)) 200 if (hrtimer_active(&rt_b->rt_period_timer))
201 return; 201 return;
202 202
203 spin_lock(&rt_b->rt_runtime_lock); 203 raw_spin_lock(&rt_b->rt_runtime_lock);
204 for (;;) { 204 for (;;) {
205 unsigned long delta; 205 unsigned long delta;
206 ktime_t soft, hard; 206 ktime_t soft, hard;
@@ -217,7 +217,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
217 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, 217 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
218 HRTIMER_MODE_ABS_PINNED, 0); 218 HRTIMER_MODE_ABS_PINNED, 0);
219 } 219 }
220 spin_unlock(&rt_b->rt_runtime_lock); 220 raw_spin_unlock(&rt_b->rt_runtime_lock);
221} 221}
222 222
223#ifdef CONFIG_RT_GROUP_SCHED 223#ifdef CONFIG_RT_GROUP_SCHED
@@ -470,7 +470,7 @@ struct rt_rq {
470 u64 rt_time; 470 u64 rt_time;
471 u64 rt_runtime; 471 u64 rt_runtime;
472 /* Nests inside the rq lock: */ 472 /* Nests inside the rq lock: */
473 spinlock_t rt_runtime_lock; 473 raw_spinlock_t rt_runtime_lock;
474 474
475#ifdef CONFIG_RT_GROUP_SCHED 475#ifdef CONFIG_RT_GROUP_SCHED
476 unsigned long rt_nr_boosted; 476 unsigned long rt_nr_boosted;
@@ -525,7 +525,7 @@ static struct root_domain def_root_domain;
525 */ 525 */
526struct rq { 526struct rq {
527 /* runqueue lock: */ 527 /* runqueue lock: */
528 spinlock_t lock; 528 raw_spinlock_t lock;
529 529
530 /* 530 /*
531 * nr_running and cpu_load should be in the same cacheline because 531 * nr_running and cpu_load should be in the same cacheline because
@@ -685,7 +685,7 @@ inline void update_rq_clock(struct rq *rq)
685 */ 685 */
686int runqueue_is_locked(int cpu) 686int runqueue_is_locked(int cpu)
687{ 687{
688 return spin_is_locked(&cpu_rq(cpu)->lock); 688 return raw_spin_is_locked(&cpu_rq(cpu)->lock);
689} 689}
690 690
691/* 691/*
@@ -893,7 +893,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
893 */ 893 */
894 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); 894 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
895 895
896 spin_unlock_irq(&rq->lock); 896 raw_spin_unlock_irq(&rq->lock);
897} 897}
898 898
899#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 899#else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -917,9 +917,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
917 next->oncpu = 1; 917 next->oncpu = 1;
918#endif 918#endif
919#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 919#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
920 spin_unlock_irq(&rq->lock); 920 raw_spin_unlock_irq(&rq->lock);
921#else 921#else
922 spin_unlock(&rq->lock); 922 raw_spin_unlock(&rq->lock);
923#endif 923#endif
924} 924}
925 925
@@ -949,10 +949,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
949{ 949{
950 for (;;) { 950 for (;;) {
951 struct rq *rq = task_rq(p); 951 struct rq *rq = task_rq(p);
952 spin_lock(&rq->lock); 952 raw_spin_lock(&rq->lock);
953 if (likely(rq == task_rq(p))) 953 if (likely(rq == task_rq(p)))
954 return rq; 954 return rq;
955 spin_unlock(&rq->lock); 955 raw_spin_unlock(&rq->lock);
956 } 956 }
957} 957}
958 958
@@ -969,10 +969,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
969 for (;;) { 969 for (;;) {
970 local_irq_save(*flags); 970 local_irq_save(*flags);
971 rq = task_rq(p); 971 rq = task_rq(p);
972 spin_lock(&rq->lock); 972 raw_spin_lock(&rq->lock);
973 if (likely(rq == task_rq(p))) 973 if (likely(rq == task_rq(p)))
974 return rq; 974 return rq;
975 spin_unlock_irqrestore(&rq->lock, *flags); 975 raw_spin_unlock_irqrestore(&rq->lock, *flags);
976 } 976 }
977} 977}
978 978
@@ -981,19 +981,19 @@ void task_rq_unlock_wait(struct task_struct *p)
981 struct rq *rq = task_rq(p); 981 struct rq *rq = task_rq(p);
982 982
983 smp_mb(); /* spin-unlock-wait is not a full memory barrier */ 983 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
984 spin_unlock_wait(&rq->lock); 984 raw_spin_unlock_wait(&rq->lock);
985} 985}
986 986
987static void __task_rq_unlock(struct rq *rq) 987static void __task_rq_unlock(struct rq *rq)
988 __releases(rq->lock) 988 __releases(rq->lock)
989{ 989{
990 spin_unlock(&rq->lock); 990 raw_spin_unlock(&rq->lock);
991} 991}
992 992
993static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) 993static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
994 __releases(rq->lock) 994 __releases(rq->lock)
995{ 995{
996 spin_unlock_irqrestore(&rq->lock, *flags); 996 raw_spin_unlock_irqrestore(&rq->lock, *flags);
997} 997}
998 998
999/* 999/*
@@ -1006,7 +1006,7 @@ static struct rq *this_rq_lock(void)
1006 1006
1007 local_irq_disable(); 1007 local_irq_disable();
1008 rq = this_rq(); 1008 rq = this_rq();
1009 spin_lock(&rq->lock); 1009 raw_spin_lock(&rq->lock);
1010 1010
1011 return rq; 1011 return rq;
1012} 1012}
@@ -1053,10 +1053,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1053 1053
1054 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); 1054 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1055 1055
1056 spin_lock(&rq->lock); 1056 raw_spin_lock(&rq->lock);
1057 update_rq_clock(rq); 1057 update_rq_clock(rq);
1058 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1058 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1059 spin_unlock(&rq->lock); 1059 raw_spin_unlock(&rq->lock);
1060 1060
1061 return HRTIMER_NORESTART; 1061 return HRTIMER_NORESTART;
1062} 1062}
@@ -1069,10 +1069,10 @@ static void __hrtick_start(void *arg)
1069{ 1069{
1070 struct rq *rq = arg; 1070 struct rq *rq = arg;
1071 1071
1072 spin_lock(&rq->lock); 1072 raw_spin_lock(&rq->lock);
1073 hrtimer_restart(&rq->hrtick_timer); 1073 hrtimer_restart(&rq->hrtick_timer);
1074 rq->hrtick_csd_pending = 0; 1074 rq->hrtick_csd_pending = 0;
1075 spin_unlock(&rq->lock); 1075 raw_spin_unlock(&rq->lock);
1076} 1076}
1077 1077
1078/* 1078/*
@@ -1179,7 +1179,7 @@ static void resched_task(struct task_struct *p)
1179{ 1179{
1180 int cpu; 1180 int cpu;
1181 1181
1182 assert_spin_locked(&task_rq(p)->lock); 1182 assert_raw_spin_locked(&task_rq(p)->lock);
1183 1183
1184 if (test_tsk_need_resched(p)) 1184 if (test_tsk_need_resched(p))
1185 return; 1185 return;
@@ -1201,10 +1201,10 @@ static void resched_cpu(int cpu)
1201 struct rq *rq = cpu_rq(cpu); 1201 struct rq *rq = cpu_rq(cpu);
1202 unsigned long flags; 1202 unsigned long flags;
1203 1203
1204 if (!spin_trylock_irqsave(&rq->lock, flags)) 1204 if (!raw_spin_trylock_irqsave(&rq->lock, flags))
1205 return; 1205 return;
1206 resched_task(cpu_curr(cpu)); 1206 resched_task(cpu_curr(cpu));
1207 spin_unlock_irqrestore(&rq->lock, flags); 1207 raw_spin_unlock_irqrestore(&rq->lock, flags);
1208} 1208}
1209 1209
1210#ifdef CONFIG_NO_HZ 1210#ifdef CONFIG_NO_HZ
@@ -1273,7 +1273,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1273#else /* !CONFIG_SMP */ 1273#else /* !CONFIG_SMP */
1274static void resched_task(struct task_struct *p) 1274static void resched_task(struct task_struct *p)
1275{ 1275{
1276 assert_spin_locked(&task_rq(p)->lock); 1276 assert_raw_spin_locked(&task_rq(p)->lock);
1277 set_tsk_need_resched(p); 1277 set_tsk_need_resched(p);
1278} 1278}
1279 1279
@@ -1600,11 +1600,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
1600 struct rq *rq = cpu_rq(cpu); 1600 struct rq *rq = cpu_rq(cpu);
1601 unsigned long flags; 1601 unsigned long flags;
1602 1602
1603 spin_lock_irqsave(&rq->lock, flags); 1603 raw_spin_lock_irqsave(&rq->lock, flags);
1604 tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; 1604 tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
1605 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1605 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1606 __set_se_shares(tg->se[cpu], shares); 1606 __set_se_shares(tg->se[cpu], shares);
1607 spin_unlock_irqrestore(&rq->lock, flags); 1607 raw_spin_unlock_irqrestore(&rq->lock, flags);
1608 } 1608 }
1609} 1609}
1610 1610
@@ -1706,9 +1706,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1706 if (root_task_group_empty()) 1706 if (root_task_group_empty())
1707 return; 1707 return;
1708 1708
1709 spin_unlock(&rq->lock); 1709 raw_spin_unlock(&rq->lock);
1710 update_shares(sd); 1710 update_shares(sd);
1711 spin_lock(&rq->lock); 1711 raw_spin_lock(&rq->lock);
1712} 1712}
1713 1713
1714static void update_h_load(long cpu) 1714static void update_h_load(long cpu)
@@ -1748,7 +1748,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1748 __acquires(busiest->lock) 1748 __acquires(busiest->lock)
1749 __acquires(this_rq->lock) 1749 __acquires(this_rq->lock)
1750{ 1750{
1751 spin_unlock(&this_rq->lock); 1751 raw_spin_unlock(&this_rq->lock);
1752 double_rq_lock(this_rq, busiest); 1752 double_rq_lock(this_rq, busiest);
1753 1753
1754 return 1; 1754 return 1;
@@ -1769,14 +1769,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1769{ 1769{
1770 int ret = 0; 1770 int ret = 0;
1771 1771
1772 if (unlikely(!spin_trylock(&busiest->lock))) { 1772 if (unlikely(!raw_spin_trylock(&busiest->lock))) {
1773 if (busiest < this_rq) { 1773 if (busiest < this_rq) {
1774 spin_unlock(&this_rq->lock); 1774 raw_spin_unlock(&this_rq->lock);
1775 spin_lock(&busiest->lock); 1775 raw_spin_lock(&busiest->lock);
1776 spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); 1776 raw_spin_lock_nested(&this_rq->lock,
1777 SINGLE_DEPTH_NESTING);
1777 ret = 1; 1778 ret = 1;
1778 } else 1779 } else
1779 spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); 1780 raw_spin_lock_nested(&busiest->lock,
1781 SINGLE_DEPTH_NESTING);
1780 } 1782 }
1781 return ret; 1783 return ret;
1782} 1784}
@@ -1790,7 +1792,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1790{ 1792{
1791 if (unlikely(!irqs_disabled())) { 1793 if (unlikely(!irqs_disabled())) {
1792 /* printk() doesn't work good under rq->lock */ 1794 /* printk() doesn't work good under rq->lock */
1793 spin_unlock(&this_rq->lock); 1795 raw_spin_unlock(&this_rq->lock);
1794 BUG_ON(1); 1796 BUG_ON(1);
1795 } 1797 }
1796 1798
@@ -1800,7 +1802,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1800static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 1802static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1801 __releases(busiest->lock) 1803 __releases(busiest->lock)
1802{ 1804{
1803 spin_unlock(&busiest->lock); 1805 raw_spin_unlock(&busiest->lock);
1804 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); 1806 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
1805} 1807}
1806#endif 1808#endif
@@ -2000,39 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2000 p->sched_class->prio_changed(rq, p, oldprio, running); 2002 p->sched_class->prio_changed(rq, p, oldprio, running);
2001} 2003}
2002 2004
2003/**
2004 * kthread_bind - bind a just-created kthread to a cpu.
2005 * @p: thread created by kthread_create().
2006 * @cpu: cpu (might not be online, must be possible) for @k to run on.
2007 *
2008 * Description: This function is equivalent to set_cpus_allowed(),
2009 * except that @cpu doesn't need to be online, and the thread must be
2010 * stopped (i.e., just returned from kthread_create()).
2011 *
2012 * Function lives here instead of kthread.c because it messes with
2013 * scheduler internals which require locking.
2014 */
2015void kthread_bind(struct task_struct *p, unsigned int cpu)
2016{
2017 struct rq *rq = cpu_rq(cpu);
2018 unsigned long flags;
2019
2020 /* Must have done schedule() in kthread() before we set_task_cpu */
2021 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
2022 WARN_ON(1);
2023 return;
2024 }
2025
2026 spin_lock_irqsave(&rq->lock, flags);
2027 update_rq_clock(rq);
2028 set_task_cpu(p, cpu);
2029 p->cpus_allowed = cpumask_of_cpu(cpu);
2030 p->rt.nr_cpus_allowed = 1;
2031 p->flags |= PF_THREAD_BOUND;
2032 spin_unlock_irqrestore(&rq->lock, flags);
2033}
2034EXPORT_SYMBOL(kthread_bind);
2035
2036#ifdef CONFIG_SMP 2005#ifdef CONFIG_SMP
2037/* 2006/*
2038 * Is this task likely cache-hot: 2007 * Is this task likely cache-hot:
@@ -2042,6 +2011,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2042{ 2011{
2043 s64 delta; 2012 s64 delta;
2044 2013
2014 if (p->sched_class != &fair_sched_class)
2015 return 0;
2016
2045 /* 2017 /*
2046 * Buddy candidates are cache hot: 2018 * Buddy candidates are cache hot:
2047 */ 2019 */
@@ -2050,9 +2022,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2050 &p->se == cfs_rq_of(&p->se)->last)) 2022 &p->se == cfs_rq_of(&p->se)->last))
2051 return 1; 2023 return 1;
2052 2024
2053 if (p->sched_class != &fair_sched_class)
2054 return 0;
2055
2056 if (sysctl_sched_migration_cost == -1) 2025 if (sysctl_sched_migration_cost == -1)
2057 return 1; 2026 return 1;
2058 if (sysctl_sched_migration_cost == 0) 2027 if (sysctl_sched_migration_cost == 0)
@@ -2063,22 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2063 return delta < (s64)sysctl_sched_migration_cost; 2032 return delta < (s64)sysctl_sched_migration_cost;
2064} 2033}
2065 2034
2066
2067void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2035void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2068{ 2036{
2069 int old_cpu = task_cpu(p); 2037#ifdef CONFIG_SCHED_DEBUG
2070 struct cfs_rq *old_cfsrq = task_cfs_rq(p), 2038 /*
2071 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); 2039 * We should never call set_task_cpu() on a blocked task,
2040 * ttwu() will sort out the placement.
2041 */
2042 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2043 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2044#endif
2072 2045
2073 trace_sched_migrate_task(p, new_cpu); 2046 trace_sched_migrate_task(p, new_cpu);
2074 2047
2075 if (old_cpu != new_cpu) { 2048 if (task_cpu(p) != new_cpu) {
2076 p->se.nr_migrations++; 2049 p->se.nr_migrations++;
2077 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 2050 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
2078 1, 1, NULL, 0);
2079 } 2051 }
2080 p->se.vruntime -= old_cfsrq->min_vruntime -
2081 new_cfsrq->min_vruntime;
2082 2052
2083 __set_task_cpu(p, new_cpu); 2053 __set_task_cpu(p, new_cpu);
2084} 2054}
@@ -2103,13 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2103 2073
2104 /* 2074 /*
2105 * If the task is not on a runqueue (and not running), then 2075 * If the task is not on a runqueue (and not running), then
2106 * it is sufficient to simply update the task's cpu field. 2076 * the next wake-up will properly place the task.
2107 */ 2077 */
2108 if (!p->se.on_rq && !task_running(rq, p)) { 2078 if (!p->se.on_rq && !task_running(rq, p))
2109 update_rq_clock(rq);
2110 set_task_cpu(p, dest_cpu);
2111 return 0; 2079 return 0;
2112 }
2113 2080
2114 init_completion(&req->done); 2081 init_completion(&req->done);
2115 req->task = p; 2082 req->task = p;
@@ -2315,10 +2282,73 @@ void task_oncpu_function_call(struct task_struct *p,
2315} 2282}
2316 2283
2317#ifdef CONFIG_SMP 2284#ifdef CONFIG_SMP
2285static int select_fallback_rq(int cpu, struct task_struct *p)
2286{
2287 int dest_cpu;
2288 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
2289
2290 /* Look for allowed, online CPU in same node. */
2291 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
2292 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
2293 return dest_cpu;
2294
2295 /* Any allowed, online CPU? */
2296 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
2297 if (dest_cpu < nr_cpu_ids)
2298 return dest_cpu;
2299
2300 /* No more Mr. Nice Guy. */
2301 if (dest_cpu >= nr_cpu_ids) {
2302 rcu_read_lock();
2303 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
2304 rcu_read_unlock();
2305 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
2306
2307 /*
2308 * Don't tell them about moving exiting tasks or
2309 * kernel threads (both mm NULL), since they never
2310 * leave kernel.
2311 */
2312 if (p->mm && printk_ratelimit()) {
2313 printk(KERN_INFO "process %d (%s) no "
2314 "longer affine to cpu%d\n",
2315 task_pid_nr(p), p->comm, cpu);
2316 }
2317 }
2318
2319 return dest_cpu;
2320}
2321
2322/*
2323 * Called from:
2324 *
2325 * - fork, @p is stable because it isn't on the tasklist yet
2326 *
2327 * - exec, @p is unstable, retry loop
2328 *
2329 * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
2330 * we should be good.
2331 */
2318static inline 2332static inline
2319int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) 2333int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2320{ 2334{
2321 return p->sched_class->select_task_rq(p, sd_flags, wake_flags); 2335 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2336
2337 /*
2338 * In order not to call set_task_cpu() on a blocking task we need
2339 * to rely on ttwu() to place the task on a valid ->cpus_allowed
2340 * cpu.
2341 *
2342 * Since this is common to all placement strategies, this lives here.
2343 *
2344 * [ this allows ->select_task() to simply return task_cpu(p) and
2345 * not worry about this generic constraint ]
2346 */
2347 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
2348 !cpu_online(cpu)))
2349 cpu = select_fallback_rq(task_cpu(p), p);
2350
2351 return cpu;
2322} 2352}
2323#endif 2353#endif
2324 2354
@@ -2373,6 +2403,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2373 if (task_contributes_to_load(p)) 2403 if (task_contributes_to_load(p))
2374 rq->nr_uninterruptible--; 2404 rq->nr_uninterruptible--;
2375 p->state = TASK_WAKING; 2405 p->state = TASK_WAKING;
2406
2407 if (p->sched_class->task_waking)
2408 p->sched_class->task_waking(rq, p);
2409
2376 __task_rq_unlock(rq); 2410 __task_rq_unlock(rq);
2377 2411
2378 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2412 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2436,8 +2470,8 @@ out_running:
2436 2470
2437 p->state = TASK_RUNNING; 2471 p->state = TASK_RUNNING;
2438#ifdef CONFIG_SMP 2472#ifdef CONFIG_SMP
2439 if (p->sched_class->task_wake_up) 2473 if (p->sched_class->task_woken)
2440 p->sched_class->task_wake_up(rq, p); 2474 p->sched_class->task_woken(rq, p);
2441 2475
2442 if (unlikely(rq->idle_stamp)) { 2476 if (unlikely(rq->idle_stamp)) {
2443 u64 delta = rq->clock - rq->idle_stamp; 2477 u64 delta = rq->clock - rq->idle_stamp;
@@ -2536,14 +2570,6 @@ static void __sched_fork(struct task_struct *p)
2536#ifdef CONFIG_PREEMPT_NOTIFIERS 2570#ifdef CONFIG_PREEMPT_NOTIFIERS
2537 INIT_HLIST_HEAD(&p->preempt_notifiers); 2571 INIT_HLIST_HEAD(&p->preempt_notifiers);
2538#endif 2572#endif
2539
2540 /*
2541 * We mark the process as running here, but have not actually
2542 * inserted it onto the runqueue yet. This guarantees that
2543 * nobody will actually run it, and a signal or other external
2544 * event cannot wake it up and insert it on the runqueue either.
2545 */
2546 p->state = TASK_RUNNING;
2547} 2573}
2548 2574
2549/* 2575/*
@@ -2554,6 +2580,12 @@ void sched_fork(struct task_struct *p, int clone_flags)
2554 int cpu = get_cpu(); 2580 int cpu = get_cpu();
2555 2581
2556 __sched_fork(p); 2582 __sched_fork(p);
2583 /*
2584 * We mark the process as waking here. This guarantees that
2585 * nobody will actually run it, and a signal or other external
2586 * event cannot wake it up and insert it on the runqueue either.
2587 */
2588 p->state = TASK_WAKING;
2557 2589
2558 /* 2590 /*
2559 * Revert to default priority/policy on fork if requested. 2591 * Revert to default priority/policy on fork if requested.
@@ -2622,14 +2654,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2622 struct rq *rq; 2654 struct rq *rq;
2623 2655
2624 rq = task_rq_lock(p, &flags); 2656 rq = task_rq_lock(p, &flags);
2625 BUG_ON(p->state != TASK_RUNNING); 2657 BUG_ON(p->state != TASK_WAKING);
2658 p->state = TASK_RUNNING;
2626 update_rq_clock(rq); 2659 update_rq_clock(rq);
2627 activate_task(rq, p, 0); 2660 activate_task(rq, p, 0);
2628 trace_sched_wakeup_new(rq, p, 1); 2661 trace_sched_wakeup_new(rq, p, 1);
2629 check_preempt_curr(rq, p, WF_FORK); 2662 check_preempt_curr(rq, p, WF_FORK);
2630#ifdef CONFIG_SMP 2663#ifdef CONFIG_SMP
2631 if (p->sched_class->task_wake_up) 2664 if (p->sched_class->task_woken)
2632 p->sched_class->task_wake_up(rq, p); 2665 p->sched_class->task_woken(rq, p);
2633#endif 2666#endif
2634 task_rq_unlock(rq, &flags); 2667 task_rq_unlock(rq, &flags);
2635} 2668}
@@ -2781,10 +2814,10 @@ static inline void post_schedule(struct rq *rq)
2781 if (rq->post_schedule) { 2814 if (rq->post_schedule) {
2782 unsigned long flags; 2815 unsigned long flags;
2783 2816
2784 spin_lock_irqsave(&rq->lock, flags); 2817 raw_spin_lock_irqsave(&rq->lock, flags);
2785 if (rq->curr->sched_class->post_schedule) 2818 if (rq->curr->sched_class->post_schedule)
2786 rq->curr->sched_class->post_schedule(rq); 2819 rq->curr->sched_class->post_schedule(rq);
2787 spin_unlock_irqrestore(&rq->lock, flags); 2820 raw_spin_unlock_irqrestore(&rq->lock, flags);
2788 2821
2789 rq->post_schedule = 0; 2822 rq->post_schedule = 0;
2790 } 2823 }
@@ -3066,15 +3099,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
3066{ 3099{
3067 BUG_ON(!irqs_disabled()); 3100 BUG_ON(!irqs_disabled());
3068 if (rq1 == rq2) { 3101 if (rq1 == rq2) {
3069 spin_lock(&rq1->lock); 3102 raw_spin_lock(&rq1->lock);
3070 __acquire(rq2->lock); /* Fake it out ;) */ 3103 __acquire(rq2->lock); /* Fake it out ;) */
3071 } else { 3104 } else {
3072 if (rq1 < rq2) { 3105 if (rq1 < rq2) {
3073 spin_lock(&rq1->lock); 3106 raw_spin_lock(&rq1->lock);
3074 spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); 3107 raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
3075 } else { 3108 } else {
3076 spin_lock(&rq2->lock); 3109 raw_spin_lock(&rq2->lock);
3077 spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); 3110 raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
3078 } 3111 }
3079 } 3112 }
3080 update_rq_clock(rq1); 3113 update_rq_clock(rq1);
@@ -3091,29 +3124,44 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
3091 __releases(rq1->lock) 3124 __releases(rq1->lock)
3092 __releases(rq2->lock) 3125 __releases(rq2->lock)
3093{ 3126{
3094 spin_unlock(&rq1->lock); 3127 raw_spin_unlock(&rq1->lock);
3095 if (rq1 != rq2) 3128 if (rq1 != rq2)
3096 spin_unlock(&rq2->lock); 3129 raw_spin_unlock(&rq2->lock);
3097 else 3130 else
3098 __release(rq2->lock); 3131 __release(rq2->lock);
3099} 3132}
3100 3133
3101/* 3134/*
3102 * If dest_cpu is allowed for this process, migrate the task to it. 3135 * sched_exec - execve() is a valuable balancing opportunity, because at
3103 * This is accomplished by forcing the cpu_allowed mask to only 3136 * this point the task has the smallest effective memory and cache footprint.
3104 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
3105 * the cpu_allowed mask is restored.
3106 */ 3137 */
3107static void sched_migrate_task(struct task_struct *p, int dest_cpu) 3138void sched_exec(void)
3108{ 3139{
3140 struct task_struct *p = current;
3109 struct migration_req req; 3141 struct migration_req req;
3142 int dest_cpu, this_cpu;
3110 unsigned long flags; 3143 unsigned long flags;
3111 struct rq *rq; 3144 struct rq *rq;
3112 3145
3146again:
3147 this_cpu = get_cpu();
3148 dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
3149 if (dest_cpu == this_cpu) {
3150 put_cpu();
3151 return;
3152 }
3153
3113 rq = task_rq_lock(p, &flags); 3154 rq = task_rq_lock(p, &flags);
3155 put_cpu();
3156
3157 /*
3158 * select_task_rq() can race against ->cpus_allowed
3159 */
3114 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) 3160 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
3115 || unlikely(!cpu_active(dest_cpu))) 3161 || unlikely(!cpu_active(dest_cpu))) {
3116 goto out; 3162 task_rq_unlock(rq, &flags);
3163 goto again;
3164 }
3117 3165
3118 /* force the process onto the specified CPU */ 3166 /* force the process onto the specified CPU */
3119 if (migrate_task(p, dest_cpu, &req)) { 3167 if (migrate_task(p, dest_cpu, &req)) {
@@ -3128,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
3128 3176
3129 return; 3177 return;
3130 } 3178 }
3131out:
3132 task_rq_unlock(rq, &flags); 3179 task_rq_unlock(rq, &flags);
3133} 3180}
3134 3181
3135/* 3182/*
3136 * sched_exec - execve() is a valuable balancing opportunity, because at
3137 * this point the task has the smallest effective memory and cache footprint.
3138 */
3139void sched_exec(void)
3140{
3141 int new_cpu, this_cpu = get_cpu();
3142 new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
3143 put_cpu();
3144 if (new_cpu != this_cpu)
3145 sched_migrate_task(current, new_cpu);
3146}
3147
3148/*
3149 * pull_task - move a task from a remote runqueue to the local runqueue. 3183 * pull_task - move a task from a remote runqueue to the local runqueue.
3150 * Both runqueues must be locked. 3184 * Both runqueues must be locked.
3151 */ 3185 */
@@ -4186,14 +4220,15 @@ redo:
4186 4220
4187 if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { 4221 if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
4188 4222
4189 spin_lock_irqsave(&busiest->lock, flags); 4223 raw_spin_lock_irqsave(&busiest->lock, flags);
4190 4224
4191 /* don't kick the migration_thread, if the curr 4225 /* don't kick the migration_thread, if the curr
4192 * task on busiest cpu can't be moved to this_cpu 4226 * task on busiest cpu can't be moved to this_cpu
4193 */ 4227 */
4194 if (!cpumask_test_cpu(this_cpu, 4228 if (!cpumask_test_cpu(this_cpu,
4195 &busiest->curr->cpus_allowed)) { 4229 &busiest->curr->cpus_allowed)) {
4196 spin_unlock_irqrestore(&busiest->lock, flags); 4230 raw_spin_unlock_irqrestore(&busiest->lock,
4231 flags);
4197 all_pinned = 1; 4232 all_pinned = 1;
4198 goto out_one_pinned; 4233 goto out_one_pinned;
4199 } 4234 }
@@ -4203,7 +4238,7 @@ redo:
4203 busiest->push_cpu = this_cpu; 4238 busiest->push_cpu = this_cpu;
4204 active_balance = 1; 4239 active_balance = 1;
4205 } 4240 }
4206 spin_unlock_irqrestore(&busiest->lock, flags); 4241 raw_spin_unlock_irqrestore(&busiest->lock, flags);
4207 if (active_balance) 4242 if (active_balance)
4208 wake_up_process(busiest->migration_thread); 4243 wake_up_process(busiest->migration_thread);
4209 4244
@@ -4385,10 +4420,10 @@ redo:
4385 /* 4420 /*
4386 * Should not call ttwu while holding a rq->lock 4421 * Should not call ttwu while holding a rq->lock
4387 */ 4422 */
4388 spin_unlock(&this_rq->lock); 4423 raw_spin_unlock(&this_rq->lock);
4389 if (active_balance) 4424 if (active_balance)
4390 wake_up_process(busiest->migration_thread); 4425 wake_up_process(busiest->migration_thread);
4391 spin_lock(&this_rq->lock); 4426 raw_spin_lock(&this_rq->lock);
4392 4427
4393 } else 4428 } else
4394 sd->nr_balance_failed = 0; 4429 sd->nr_balance_failed = 0;
@@ -5257,11 +5292,11 @@ void scheduler_tick(void)
5257 5292
5258 sched_clock_tick(); 5293 sched_clock_tick();
5259 5294
5260 spin_lock(&rq->lock); 5295 raw_spin_lock(&rq->lock);
5261 update_rq_clock(rq); 5296 update_rq_clock(rq);
5262 update_cpu_load(rq); 5297 update_cpu_load(rq);
5263 curr->sched_class->task_tick(rq, curr, 0); 5298 curr->sched_class->task_tick(rq, curr, 0);
5264 spin_unlock(&rq->lock); 5299 raw_spin_unlock(&rq->lock);
5265 5300
5266 perf_event_task_tick(curr, cpu); 5301 perf_event_task_tick(curr, cpu);
5267 5302
@@ -5455,7 +5490,7 @@ need_resched_nonpreemptible:
5455 if (sched_feat(HRTICK)) 5490 if (sched_feat(HRTICK))
5456 hrtick_clear(rq); 5491 hrtick_clear(rq);
5457 5492
5458 spin_lock_irq(&rq->lock); 5493 raw_spin_lock_irq(&rq->lock);
5459 update_rq_clock(rq); 5494 update_rq_clock(rq);
5460 clear_tsk_need_resched(prev); 5495 clear_tsk_need_resched(prev);
5461 5496
@@ -5491,12 +5526,15 @@ need_resched_nonpreemptible:
5491 cpu = smp_processor_id(); 5526 cpu = smp_processor_id();
5492 rq = cpu_rq(cpu); 5527 rq = cpu_rq(cpu);
5493 } else 5528 } else
5494 spin_unlock_irq(&rq->lock); 5529 raw_spin_unlock_irq(&rq->lock);
5495 5530
5496 post_schedule(rq); 5531 post_schedule(rq);
5497 5532
5498 if (unlikely(reacquire_kernel_lock(current) < 0)) 5533 if (unlikely(reacquire_kernel_lock(current) < 0)) {
5534 prev = rq->curr;
5535 switch_count = &prev->nivcsw;
5499 goto need_resched_nonpreemptible; 5536 goto need_resched_nonpreemptible;
5537 }
5500 5538
5501 preempt_enable_no_resched(); 5539 preempt_enable_no_resched();
5502 if (need_resched()) 5540 if (need_resched())
@@ -5908,14 +5946,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
5908 */ 5946 */
5909bool try_wait_for_completion(struct completion *x) 5947bool try_wait_for_completion(struct completion *x)
5910{ 5948{
5949 unsigned long flags;
5911 int ret = 1; 5950 int ret = 1;
5912 5951
5913 spin_lock_irq(&x->wait.lock); 5952 spin_lock_irqsave(&x->wait.lock, flags);
5914 if (!x->done) 5953 if (!x->done)
5915 ret = 0; 5954 ret = 0;
5916 else 5955 else
5917 x->done--; 5956 x->done--;
5918 spin_unlock_irq(&x->wait.lock); 5957 spin_unlock_irqrestore(&x->wait.lock, flags);
5919 return ret; 5958 return ret;
5920} 5959}
5921EXPORT_SYMBOL(try_wait_for_completion); 5960EXPORT_SYMBOL(try_wait_for_completion);
@@ -5930,12 +5969,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
5930 */ 5969 */
5931bool completion_done(struct completion *x) 5970bool completion_done(struct completion *x)
5932{ 5971{
5972 unsigned long flags;
5933 int ret = 1; 5973 int ret = 1;
5934 5974
5935 spin_lock_irq(&x->wait.lock); 5975 spin_lock_irqsave(&x->wait.lock, flags);
5936 if (!x->done) 5976 if (!x->done)
5937 ret = 0; 5977 ret = 0;
5938 spin_unlock_irq(&x->wait.lock); 5978 spin_unlock_irqrestore(&x->wait.lock, flags);
5939 return ret; 5979 return ret;
5940} 5980}
5941EXPORT_SYMBOL(completion_done); 5981EXPORT_SYMBOL(completion_done);
@@ -6320,7 +6360,7 @@ recheck:
6320 * make sure no PI-waiters arrive (or leave) while we are 6360 * make sure no PI-waiters arrive (or leave) while we are
6321 * changing the priority of the task: 6361 * changing the priority of the task:
6322 */ 6362 */
6323 spin_lock_irqsave(&p->pi_lock, flags); 6363 raw_spin_lock_irqsave(&p->pi_lock, flags);
6324 /* 6364 /*
6325 * To be able to change p->policy safely, the apropriate 6365 * To be able to change p->policy safely, the apropriate
6326 * runqueue lock must be held. 6366 * runqueue lock must be held.
@@ -6330,7 +6370,7 @@ recheck:
6330 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 6370 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
6331 policy = oldpolicy = -1; 6371 policy = oldpolicy = -1;
6332 __task_rq_unlock(rq); 6372 __task_rq_unlock(rq);
6333 spin_unlock_irqrestore(&p->pi_lock, flags); 6373 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
6334 goto recheck; 6374 goto recheck;
6335 } 6375 }
6336 update_rq_clock(rq); 6376 update_rq_clock(rq);
@@ -6354,7 +6394,7 @@ recheck:
6354 check_class_changed(rq, p, prev_class, oldprio, running); 6394 check_class_changed(rq, p, prev_class, oldprio, running);
6355 } 6395 }
6356 __task_rq_unlock(rq); 6396 __task_rq_unlock(rq);
6357 spin_unlock_irqrestore(&p->pi_lock, flags); 6397 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
6358 6398
6359 rt_mutex_adjust_pi(p); 6399 rt_mutex_adjust_pi(p);
6360 6400
@@ -6454,7 +6494,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6454 return -EINVAL; 6494 return -EINVAL;
6455 6495
6456 retval = -ESRCH; 6496 retval = -ESRCH;
6457 read_lock(&tasklist_lock); 6497 rcu_read_lock();
6458 p = find_process_by_pid(pid); 6498 p = find_process_by_pid(pid);
6459 if (p) { 6499 if (p) {
6460 retval = security_task_getscheduler(p); 6500 retval = security_task_getscheduler(p);
@@ -6462,7 +6502,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6462 retval = p->policy 6502 retval = p->policy
6463 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); 6503 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
6464 } 6504 }
6465 read_unlock(&tasklist_lock); 6505 rcu_read_unlock();
6466 return retval; 6506 return retval;
6467} 6507}
6468 6508
@@ -6480,7 +6520,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6480 if (!param || pid < 0) 6520 if (!param || pid < 0)
6481 return -EINVAL; 6521 return -EINVAL;
6482 6522
6483 read_lock(&tasklist_lock); 6523 rcu_read_lock();
6484 p = find_process_by_pid(pid); 6524 p = find_process_by_pid(pid);
6485 retval = -ESRCH; 6525 retval = -ESRCH;
6486 if (!p) 6526 if (!p)
@@ -6491,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6491 goto out_unlock; 6531 goto out_unlock;
6492 6532
6493 lp.sched_priority = p->rt_priority; 6533 lp.sched_priority = p->rt_priority;
6494 read_unlock(&tasklist_lock); 6534 rcu_read_unlock();
6495 6535
6496 /* 6536 /*
6497 * This one might sleep, we cannot do it with a spinlock held ... 6537 * This one might sleep, we cannot do it with a spinlock held ...
@@ -6501,7 +6541,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6501 return retval; 6541 return retval;
6502 6542
6503out_unlock: 6543out_unlock:
6504 read_unlock(&tasklist_lock); 6544 rcu_read_unlock();
6505 return retval; 6545 return retval;
6506} 6546}
6507 6547
@@ -6512,22 +6552,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
6512 int retval; 6552 int retval;
6513 6553
6514 get_online_cpus(); 6554 get_online_cpus();
6515 read_lock(&tasklist_lock); 6555 rcu_read_lock();
6516 6556
6517 p = find_process_by_pid(pid); 6557 p = find_process_by_pid(pid);
6518 if (!p) { 6558 if (!p) {
6519 read_unlock(&tasklist_lock); 6559 rcu_read_unlock();
6520 put_online_cpus(); 6560 put_online_cpus();
6521 return -ESRCH; 6561 return -ESRCH;
6522 } 6562 }
6523 6563
6524 /* 6564 /* Prevent p going away */
6525 * It is not safe to call set_cpus_allowed with the
6526 * tasklist_lock held. We will bump the task_struct's
6527 * usage count and then drop tasklist_lock.
6528 */
6529 get_task_struct(p); 6565 get_task_struct(p);
6530 read_unlock(&tasklist_lock); 6566 rcu_read_unlock();
6531 6567
6532 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { 6568 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
6533 retval = -ENOMEM; 6569 retval = -ENOMEM;
@@ -6613,7 +6649,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6613 int retval; 6649 int retval;
6614 6650
6615 get_online_cpus(); 6651 get_online_cpus();
6616 read_lock(&tasklist_lock); 6652 rcu_read_lock();
6617 6653
6618 retval = -ESRCH; 6654 retval = -ESRCH;
6619 p = find_process_by_pid(pid); 6655 p = find_process_by_pid(pid);
@@ -6629,7 +6665,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6629 task_rq_unlock(rq, &flags); 6665 task_rq_unlock(rq, &flags);
6630 6666
6631out_unlock: 6667out_unlock:
6632 read_unlock(&tasklist_lock); 6668 rcu_read_unlock();
6633 put_online_cpus(); 6669 put_online_cpus();
6634 6670
6635 return retval; 6671 return retval;
@@ -6684,7 +6720,7 @@ SYSCALL_DEFINE0(sched_yield)
6684 */ 6720 */
6685 __release(rq->lock); 6721 __release(rq->lock);
6686 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 6722 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
6687 _raw_spin_unlock(&rq->lock); 6723 do_raw_spin_unlock(&rq->lock);
6688 preempt_enable_no_resched(); 6724 preempt_enable_no_resched();
6689 6725
6690 schedule(); 6726 schedule();
@@ -6873,7 +6909,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6873 return -EINVAL; 6909 return -EINVAL;
6874 6910
6875 retval = -ESRCH; 6911 retval = -ESRCH;
6876 read_lock(&tasklist_lock); 6912 rcu_read_lock();
6877 p = find_process_by_pid(pid); 6913 p = find_process_by_pid(pid);
6878 if (!p) 6914 if (!p)
6879 goto out_unlock; 6915 goto out_unlock;
@@ -6886,13 +6922,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6886 time_slice = p->sched_class->get_rr_interval(rq, p); 6922 time_slice = p->sched_class->get_rr_interval(rq, p);
6887 task_rq_unlock(rq, &flags); 6923 task_rq_unlock(rq, &flags);
6888 6924
6889 read_unlock(&tasklist_lock); 6925 rcu_read_unlock();
6890 jiffies_to_timespec(time_slice, &t); 6926 jiffies_to_timespec(time_slice, &t);
6891 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; 6927 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
6892 return retval; 6928 return retval;
6893 6929
6894out_unlock: 6930out_unlock:
6895 read_unlock(&tasklist_lock); 6931 rcu_read_unlock();
6896 return retval; 6932 return retval;
6897} 6933}
6898 6934
@@ -6980,9 +7016,10 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
6980 struct rq *rq = cpu_rq(cpu); 7016 struct rq *rq = cpu_rq(cpu);
6981 unsigned long flags; 7017 unsigned long flags;
6982 7018
6983 spin_lock_irqsave(&rq->lock, flags); 7019 raw_spin_lock_irqsave(&rq->lock, flags);
6984 7020
6985 __sched_fork(idle); 7021 __sched_fork(idle);
7022 idle->state = TASK_RUNNING;
6986 idle->se.exec_start = sched_clock(); 7023 idle->se.exec_start = sched_clock();
6987 7024
6988 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 7025 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
@@ -6992,7 +7029,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
6992#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 7029#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
6993 idle->oncpu = 1; 7030 idle->oncpu = 1;
6994#endif 7031#endif
6995 spin_unlock_irqrestore(&rq->lock, flags); 7032 raw_spin_unlock_irqrestore(&rq->lock, flags);
6996 7033
6997 /* Set the preempt count _outside_ the spinlocks! */ 7034 /* Set the preempt count _outside_ the spinlocks! */
6998#if defined(CONFIG_PREEMPT) 7035#if defined(CONFIG_PREEMPT)
@@ -7097,7 +7134,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7097 struct rq *rq; 7134 struct rq *rq;
7098 int ret = 0; 7135 int ret = 0;
7099 7136
7137 /*
7138 * Since we rely on wake-ups to migrate sleeping tasks, don't change
7139 * the ->cpus_allowed mask from under waking tasks, which would be
7140 * possible when we change rq->lock in ttwu(), so synchronize against
7141 * TASK_WAKING to avoid that.
7142 */
7143again:
7144 while (p->state == TASK_WAKING)
7145 cpu_relax();
7146
7100 rq = task_rq_lock(p, &flags); 7147 rq = task_rq_lock(p, &flags);
7148
7149 if (p->state == TASK_WAKING) {
7150 task_rq_unlock(rq, &flags);
7151 goto again;
7152 }
7153
7101 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 7154 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7102 ret = -EINVAL; 7155 ret = -EINVAL;
7103 goto out; 7156 goto out;
@@ -7153,7 +7206,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
7153static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 7206static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7154{ 7207{
7155 struct rq *rq_dest, *rq_src; 7208 struct rq *rq_dest, *rq_src;
7156 int ret = 0, on_rq; 7209 int ret = 0;
7157 7210
7158 if (unlikely(!cpu_active(dest_cpu))) 7211 if (unlikely(!cpu_active(dest_cpu)))
7159 return ret; 7212 return ret;
@@ -7169,12 +7222,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7169 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 7222 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7170 goto fail; 7223 goto fail;
7171 7224
7172 on_rq = p->se.on_rq; 7225 /*
7173 if (on_rq) 7226 * If we're not on a rq, the next wake-up will ensure we're
7227 * placed properly.
7228 */
7229 if (p->se.on_rq) {
7174 deactivate_task(rq_src, p, 0); 7230 deactivate_task(rq_src, p, 0);
7175 7231 set_task_cpu(p, dest_cpu);
7176 set_task_cpu(p, dest_cpu);
7177 if (on_rq) {
7178 activate_task(rq_dest, p, 0); 7232 activate_task(rq_dest, p, 0);
7179 check_preempt_curr(rq_dest, p, 0); 7233 check_preempt_curr(rq_dest, p, 0);
7180 } 7234 }
@@ -7209,10 +7263,10 @@ static int migration_thread(void *data)
7209 struct migration_req *req; 7263 struct migration_req *req;
7210 struct list_head *head; 7264 struct list_head *head;
7211 7265
7212 spin_lock_irq(&rq->lock); 7266 raw_spin_lock_irq(&rq->lock);
7213 7267
7214 if (cpu_is_offline(cpu)) { 7268 if (cpu_is_offline(cpu)) {
7215 spin_unlock_irq(&rq->lock); 7269 raw_spin_unlock_irq(&rq->lock);
7216 break; 7270 break;
7217 } 7271 }
7218 7272
@@ -7224,7 +7278,7 @@ static int migration_thread(void *data)
7224 head = &rq->migration_queue; 7278 head = &rq->migration_queue;
7225 7279
7226 if (list_empty(head)) { 7280 if (list_empty(head)) {
7227 spin_unlock_irq(&rq->lock); 7281 raw_spin_unlock_irq(&rq->lock);
7228 schedule(); 7282 schedule();
7229 set_current_state(TASK_INTERRUPTIBLE); 7283 set_current_state(TASK_INTERRUPTIBLE);
7230 continue; 7284 continue;
@@ -7233,14 +7287,14 @@ static int migration_thread(void *data)
7233 list_del_init(head->next); 7287 list_del_init(head->next);
7234 7288
7235 if (req->task != NULL) { 7289 if (req->task != NULL) {
7236 spin_unlock(&rq->lock); 7290 raw_spin_unlock(&rq->lock);
7237 __migrate_task(req->task, cpu, req->dest_cpu); 7291 __migrate_task(req->task, cpu, req->dest_cpu);
7238 } else if (likely(cpu == (badcpu = smp_processor_id()))) { 7292 } else if (likely(cpu == (badcpu = smp_processor_id()))) {
7239 req->dest_cpu = RCU_MIGRATION_GOT_QS; 7293 req->dest_cpu = RCU_MIGRATION_GOT_QS;
7240 spin_unlock(&rq->lock); 7294 raw_spin_unlock(&rq->lock);
7241 } else { 7295 } else {
7242 req->dest_cpu = RCU_MIGRATION_MUST_SYNC; 7296 req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
7243 spin_unlock(&rq->lock); 7297 raw_spin_unlock(&rq->lock);
7244 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); 7298 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
7245 } 7299 }
7246 local_irq_enable(); 7300 local_irq_enable();
@@ -7270,37 +7324,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
7270static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 7324static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
7271{ 7325{
7272 int dest_cpu; 7326 int dest_cpu;
7273 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
7274 7327
7275again: 7328again:
7276 /* Look for allowed, online CPU in same node. */ 7329 dest_cpu = select_fallback_rq(dead_cpu, p);
7277 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
7278 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7279 goto move;
7280
7281 /* Any allowed, online CPU? */
7282 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
7283 if (dest_cpu < nr_cpu_ids)
7284 goto move;
7285
7286 /* No more Mr. Nice Guy. */
7287 if (dest_cpu >= nr_cpu_ids) {
7288 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
7289 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
7290
7291 /*
7292 * Don't tell them about moving exiting tasks or
7293 * kernel threads (both mm NULL), since they never
7294 * leave kernel.
7295 */
7296 if (p->mm && printk_ratelimit()) {
7297 printk(KERN_INFO "process %d (%s) no "
7298 "longer affine to cpu%d\n",
7299 task_pid_nr(p), p->comm, dead_cpu);
7300 }
7301 }
7302 7330
7303move:
7304 /* It can have affinity changed while we were choosing. */ 7331 /* It can have affinity changed while we were choosing. */
7305 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) 7332 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
7306 goto again; 7333 goto again;
@@ -7363,14 +7390,14 @@ void sched_idle_next(void)
7363 * Strictly not necessary since rest of the CPUs are stopped by now 7390 * Strictly not necessary since rest of the CPUs are stopped by now
7364 * and interrupts disabled on the current cpu. 7391 * and interrupts disabled on the current cpu.
7365 */ 7392 */
7366 spin_lock_irqsave(&rq->lock, flags); 7393 raw_spin_lock_irqsave(&rq->lock, flags);
7367 7394
7368 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); 7395 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
7369 7396
7370 update_rq_clock(rq); 7397 update_rq_clock(rq);
7371 activate_task(rq, p, 0); 7398 activate_task(rq, p, 0);
7372 7399
7373 spin_unlock_irqrestore(&rq->lock, flags); 7400 raw_spin_unlock_irqrestore(&rq->lock, flags);
7374} 7401}
7375 7402
7376/* 7403/*
@@ -7406,9 +7433,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
7406 * that's OK. No task can be added to this CPU, so iteration is 7433 * that's OK. No task can be added to this CPU, so iteration is
7407 * fine. 7434 * fine.
7408 */ 7435 */
7409 spin_unlock_irq(&rq->lock); 7436 raw_spin_unlock_irq(&rq->lock);
7410 move_task_off_dead_cpu(dead_cpu, p); 7437 move_task_off_dead_cpu(dead_cpu, p);
7411 spin_lock_irq(&rq->lock); 7438 raw_spin_lock_irq(&rq->lock);
7412 7439
7413 put_task_struct(p); 7440 put_task_struct(p);
7414} 7441}
@@ -7674,13 +7701,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7674 7701
7675 /* Update our root-domain */ 7702 /* Update our root-domain */
7676 rq = cpu_rq(cpu); 7703 rq = cpu_rq(cpu);
7677 spin_lock_irqsave(&rq->lock, flags); 7704 raw_spin_lock_irqsave(&rq->lock, flags);
7678 if (rq->rd) { 7705 if (rq->rd) {
7679 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7706 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7680 7707
7681 set_rq_online(rq); 7708 set_rq_online(rq);
7682 } 7709 }
7683 spin_unlock_irqrestore(&rq->lock, flags); 7710 raw_spin_unlock_irqrestore(&rq->lock, flags);
7684 break; 7711 break;
7685 7712
7686#ifdef CONFIG_HOTPLUG_CPU 7713#ifdef CONFIG_HOTPLUG_CPU
@@ -7705,13 +7732,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7705 put_task_struct(rq->migration_thread); 7732 put_task_struct(rq->migration_thread);
7706 rq->migration_thread = NULL; 7733 rq->migration_thread = NULL;
7707 /* Idle task back to normal (off runqueue, low prio) */ 7734 /* Idle task back to normal (off runqueue, low prio) */
7708 spin_lock_irq(&rq->lock); 7735 raw_spin_lock_irq(&rq->lock);
7709 update_rq_clock(rq); 7736 update_rq_clock(rq);
7710 deactivate_task(rq, rq->idle, 0); 7737 deactivate_task(rq, rq->idle, 0);
7711 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 7738 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
7712 rq->idle->sched_class = &idle_sched_class; 7739 rq->idle->sched_class = &idle_sched_class;
7713 migrate_dead_tasks(cpu); 7740 migrate_dead_tasks(cpu);
7714 spin_unlock_irq(&rq->lock); 7741 raw_spin_unlock_irq(&rq->lock);
7715 cpuset_unlock(); 7742 cpuset_unlock();
7716 migrate_nr_uninterruptible(rq); 7743 migrate_nr_uninterruptible(rq);
7717 BUG_ON(rq->nr_running != 0); 7744 BUG_ON(rq->nr_running != 0);
@@ -7721,30 +7748,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7721 * they didn't take sched_hotcpu_mutex. Just wake up 7748 * they didn't take sched_hotcpu_mutex. Just wake up
7722 * the requestors. 7749 * the requestors.
7723 */ 7750 */
7724 spin_lock_irq(&rq->lock); 7751 raw_spin_lock_irq(&rq->lock);
7725 while (!list_empty(&rq->migration_queue)) { 7752 while (!list_empty(&rq->migration_queue)) {
7726 struct migration_req *req; 7753 struct migration_req *req;
7727 7754
7728 req = list_entry(rq->migration_queue.next, 7755 req = list_entry(rq->migration_queue.next,
7729 struct migration_req, list); 7756 struct migration_req, list);
7730 list_del_init(&req->list); 7757 list_del_init(&req->list);
7731 spin_unlock_irq(&rq->lock); 7758 raw_spin_unlock_irq(&rq->lock);
7732 complete(&req->done); 7759 complete(&req->done);
7733 spin_lock_irq(&rq->lock); 7760 raw_spin_lock_irq(&rq->lock);
7734 } 7761 }
7735 spin_unlock_irq(&rq->lock); 7762 raw_spin_unlock_irq(&rq->lock);
7736 break; 7763 break;
7737 7764
7738 case CPU_DYING: 7765 case CPU_DYING:
7739 case CPU_DYING_FROZEN: 7766 case CPU_DYING_FROZEN:
7740 /* Update our root-domain */ 7767 /* Update our root-domain */
7741 rq = cpu_rq(cpu); 7768 rq = cpu_rq(cpu);
7742 spin_lock_irqsave(&rq->lock, flags); 7769 raw_spin_lock_irqsave(&rq->lock, flags);
7743 if (rq->rd) { 7770 if (rq->rd) {
7744 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7771 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7745 set_rq_offline(rq); 7772 set_rq_offline(rq);
7746 } 7773 }
7747 spin_unlock_irqrestore(&rq->lock, flags); 7774 raw_spin_unlock_irqrestore(&rq->lock, flags);
7748 break; 7775 break;
7749#endif 7776#endif
7750 } 7777 }
@@ -7974,7 +8001,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
7974 struct root_domain *old_rd = NULL; 8001 struct root_domain *old_rd = NULL;
7975 unsigned long flags; 8002 unsigned long flags;
7976 8003
7977 spin_lock_irqsave(&rq->lock, flags); 8004 raw_spin_lock_irqsave(&rq->lock, flags);
7978 8005
7979 if (rq->rd) { 8006 if (rq->rd) {
7980 old_rd = rq->rd; 8007 old_rd = rq->rd;
@@ -8000,7 +8027,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
8000 if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) 8027 if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
8001 set_rq_online(rq); 8028 set_rq_online(rq);
8002 8029
8003 spin_unlock_irqrestore(&rq->lock, flags); 8030 raw_spin_unlock_irqrestore(&rq->lock, flags);
8004 8031
8005 if (old_rd) 8032 if (old_rd)
8006 free_rootdomain(old_rd); 8033 free_rootdomain(old_rd);
@@ -9357,13 +9384,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
9357#ifdef CONFIG_SMP 9384#ifdef CONFIG_SMP
9358 rt_rq->rt_nr_migratory = 0; 9385 rt_rq->rt_nr_migratory = 0;
9359 rt_rq->overloaded = 0; 9386 rt_rq->overloaded = 0;
9360 plist_head_init(&rt_rq->pushable_tasks, &rq->lock); 9387 plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
9361#endif 9388#endif
9362 9389
9363 rt_rq->rt_time = 0; 9390 rt_rq->rt_time = 0;
9364 rt_rq->rt_throttled = 0; 9391 rt_rq->rt_throttled = 0;
9365 rt_rq->rt_runtime = 0; 9392 rt_rq->rt_runtime = 0;
9366 spin_lock_init(&rt_rq->rt_runtime_lock); 9393 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
9367 9394
9368#ifdef CONFIG_RT_GROUP_SCHED 9395#ifdef CONFIG_RT_GROUP_SCHED
9369 rt_rq->rt_nr_boosted = 0; 9396 rt_rq->rt_nr_boosted = 0;
@@ -9523,7 +9550,7 @@ void __init sched_init(void)
9523 struct rq *rq; 9550 struct rq *rq;
9524 9551
9525 rq = cpu_rq(i); 9552 rq = cpu_rq(i);
9526 spin_lock_init(&rq->lock); 9553 raw_spin_lock_init(&rq->lock);
9527 rq->nr_running = 0; 9554 rq->nr_running = 0;
9528 rq->calc_load_active = 0; 9555 rq->calc_load_active = 0;
9529 rq->calc_load_update = jiffies + LOAD_FREQ; 9556 rq->calc_load_update = jiffies + LOAD_FREQ;
@@ -9621,7 +9648,7 @@ void __init sched_init(void)
9621#endif 9648#endif
9622 9649
9623#ifdef CONFIG_RT_MUTEXES 9650#ifdef CONFIG_RT_MUTEXES
9624 plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); 9651 plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
9625#endif 9652#endif
9626 9653
9627 /* 9654 /*
@@ -9665,7 +9692,7 @@ void __init sched_init(void)
9665#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 9692#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
9666static inline int preempt_count_equals(int preempt_offset) 9693static inline int preempt_count_equals(int preempt_offset)
9667{ 9694{
9668 int nested = preempt_count() & ~PREEMPT_ACTIVE; 9695 int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
9669 9696
9670 return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); 9697 return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
9671} 9698}
@@ -9746,13 +9773,13 @@ void normalize_rt_tasks(void)
9746 continue; 9773 continue;
9747 } 9774 }
9748 9775
9749 spin_lock(&p->pi_lock); 9776 raw_spin_lock(&p->pi_lock);
9750 rq = __task_rq_lock(p); 9777 rq = __task_rq_lock(p);
9751 9778
9752 normalize_task(rq, p); 9779 normalize_task(rq, p);
9753 9780
9754 __task_rq_unlock(rq); 9781 __task_rq_unlock(rq);
9755 spin_unlock(&p->pi_lock); 9782 raw_spin_unlock(&p->pi_lock);
9756 } while_each_thread(g, p); 9783 } while_each_thread(g, p);
9757 9784
9758 read_unlock_irqrestore(&tasklist_lock, flags); 9785 read_unlock_irqrestore(&tasklist_lock, flags);
@@ -10080,7 +10107,7 @@ void sched_move_task(struct task_struct *tsk)
10080 10107
10081#ifdef CONFIG_FAIR_GROUP_SCHED 10108#ifdef CONFIG_FAIR_GROUP_SCHED
10082 if (tsk->sched_class->moved_group) 10109 if (tsk->sched_class->moved_group)
10083 tsk->sched_class->moved_group(tsk); 10110 tsk->sched_class->moved_group(tsk, on_rq);
10084#endif 10111#endif
10085 10112
10086 if (unlikely(running)) 10113 if (unlikely(running))
@@ -10115,9 +10142,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
10115 struct rq *rq = cfs_rq->rq; 10142 struct rq *rq = cfs_rq->rq;
10116 unsigned long flags; 10143 unsigned long flags;
10117 10144
10118 spin_lock_irqsave(&rq->lock, flags); 10145 raw_spin_lock_irqsave(&rq->lock, flags);
10119 __set_se_shares(se, shares); 10146 __set_se_shares(se, shares);
10120 spin_unlock_irqrestore(&rq->lock, flags); 10147 raw_spin_unlock_irqrestore(&rq->lock, flags);
10121} 10148}
10122 10149
10123static DEFINE_MUTEX(shares_mutex); 10150static DEFINE_MUTEX(shares_mutex);
@@ -10302,18 +10329,18 @@ static int tg_set_bandwidth(struct task_group *tg,
10302 if (err) 10329 if (err)
10303 goto unlock; 10330 goto unlock;
10304 10331
10305 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); 10332 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
10306 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 10333 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
10307 tg->rt_bandwidth.rt_runtime = rt_runtime; 10334 tg->rt_bandwidth.rt_runtime = rt_runtime;
10308 10335
10309 for_each_possible_cpu(i) { 10336 for_each_possible_cpu(i) {
10310 struct rt_rq *rt_rq = tg->rt_rq[i]; 10337 struct rt_rq *rt_rq = tg->rt_rq[i];
10311 10338
10312 spin_lock(&rt_rq->rt_runtime_lock); 10339 raw_spin_lock(&rt_rq->rt_runtime_lock);
10313 rt_rq->rt_runtime = rt_runtime; 10340 rt_rq->rt_runtime = rt_runtime;
10314 spin_unlock(&rt_rq->rt_runtime_lock); 10341 raw_spin_unlock(&rt_rq->rt_runtime_lock);
10315 } 10342 }
10316 spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); 10343 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
10317 unlock: 10344 unlock:
10318 read_unlock(&tasklist_lock); 10345 read_unlock(&tasklist_lock);
10319 mutex_unlock(&rt_constraints_mutex); 10346 mutex_unlock(&rt_constraints_mutex);
@@ -10418,15 +10445,15 @@ static int sched_rt_global_constraints(void)
10418 if (sysctl_sched_rt_runtime == 0) 10445 if (sysctl_sched_rt_runtime == 0)
10419 return -EBUSY; 10446 return -EBUSY;
10420 10447
10421 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 10448 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
10422 for_each_possible_cpu(i) { 10449 for_each_possible_cpu(i) {
10423 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 10450 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
10424 10451
10425 spin_lock(&rt_rq->rt_runtime_lock); 10452 raw_spin_lock(&rt_rq->rt_runtime_lock);
10426 rt_rq->rt_runtime = global_rt_runtime(); 10453 rt_rq->rt_runtime = global_rt_runtime();
10427 spin_unlock(&rt_rq->rt_runtime_lock); 10454 raw_spin_unlock(&rt_rq->rt_runtime_lock);
10428 } 10455 }
10429 spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 10456 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
10430 10457
10431 return 0; 10458 return 0;
10432} 10459}
@@ -10717,9 +10744,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
10717 /* 10744 /*
10718 * Take rq->lock to make 64-bit read safe on 32-bit platforms. 10745 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
10719 */ 10746 */
10720 spin_lock_irq(&cpu_rq(cpu)->lock); 10747 raw_spin_lock_irq(&cpu_rq(cpu)->lock);
10721 data = *cpuusage; 10748 data = *cpuusage;
10722 spin_unlock_irq(&cpu_rq(cpu)->lock); 10749 raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
10723#else 10750#else
10724 data = *cpuusage; 10751 data = *cpuusage;
10725#endif 10752#endif
@@ -10735,9 +10762,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
10735 /* 10762 /*
10736 * Take rq->lock to make 64-bit write safe on 32-bit platforms. 10763 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
10737 */ 10764 */
10738 spin_lock_irq(&cpu_rq(cpu)->lock); 10765 raw_spin_lock_irq(&cpu_rq(cpu)->lock);
10739 *cpuusage = val; 10766 *cpuusage = val;
10740 spin_unlock_irq(&cpu_rq(cpu)->lock); 10767 raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
10741#else 10768#else
10742 *cpuusage = val; 10769 *cpuusage = val;
10743#endif 10770#endif
@@ -10971,9 +10998,9 @@ void synchronize_sched_expedited(void)
10971 init_completion(&req->done); 10998 init_completion(&req->done);
10972 req->task = NULL; 10999 req->task = NULL;
10973 req->dest_cpu = RCU_MIGRATION_NEED_QS; 11000 req->dest_cpu = RCU_MIGRATION_NEED_QS;
10974 spin_lock_irqsave(&rq->lock, flags); 11001 raw_spin_lock_irqsave(&rq->lock, flags);
10975 list_add(&req->list, &rq->migration_queue); 11002 list_add(&req->list, &rq->migration_queue);
10976 spin_unlock_irqrestore(&rq->lock, flags); 11003 raw_spin_unlock_irqrestore(&rq->lock, flags);
10977 wake_up_process(rq->migration_thread); 11004 wake_up_process(rq->migration_thread);
10978 } 11005 }
10979 for_each_online_cpu(cpu) { 11006 for_each_online_cpu(cpu) {
@@ -10981,11 +11008,11 @@ void synchronize_sched_expedited(void)
10981 req = &per_cpu(rcu_migration_req, cpu); 11008 req = &per_cpu(rcu_migration_req, cpu);
10982 rq = cpu_rq(cpu); 11009 rq = cpu_rq(cpu);
10983 wait_for_completion(&req->done); 11010 wait_for_completion(&req->done);
10984 spin_lock_irqsave(&rq->lock, flags); 11011 raw_spin_lock_irqsave(&rq->lock, flags);
10985 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) 11012 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
10986 need_full_sync = 1; 11013 need_full_sync = 1;
10987 req->dest_cpu = RCU_MIGRATION_IDLE; 11014 req->dest_cpu = RCU_MIGRATION_IDLE;
10988 spin_unlock_irqrestore(&rq->lock, flags); 11015 raw_spin_unlock_irqrestore(&rq->lock, flags);
10989 } 11016 }
10990 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; 11017 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10991 synchronize_sched_expedited_count++; 11018 synchronize_sched_expedited_count++;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 479ce5682d7c..5b496132c28a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
236} 236}
237EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 237EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
238 238
239unsigned long long cpu_clock(int cpu)
240{
241 unsigned long long clock;
242 unsigned long flags;
243
244 local_irq_save(flags);
245 clock = sched_clock_cpu(cpu);
246 local_irq_restore(flags);
247
248 return clock;
249}
250
239#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 251#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
240 252
241void sched_clock_init(void) 253void sched_clock_init(void)
@@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu)
251 return sched_clock(); 263 return sched_clock();
252} 264}
253 265
254#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
255 266
256unsigned long long cpu_clock(int cpu) 267unsigned long long cpu_clock(int cpu)
257{ 268{
258 unsigned long long clock; 269 return sched_clock_cpu(cpu);
259 unsigned long flags; 270}
260 271
261 local_irq_save(flags); 272#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
262 clock = sched_clock_cpu(cpu);
263 local_irq_restore(flags);
264 273
265 return clock;
266}
267EXPORT_SYMBOL_GPL(cpu_clock); 274EXPORT_SYMBOL_GPL(cpu_clock);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 0f052fc674d5..597b33099dfa 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -135,26 +135,26 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
135 if (likely(newpri != CPUPRI_INVALID)) { 135 if (likely(newpri != CPUPRI_INVALID)) {
136 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; 136 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
137 137
138 spin_lock_irqsave(&vec->lock, flags); 138 raw_spin_lock_irqsave(&vec->lock, flags);
139 139
140 cpumask_set_cpu(cpu, vec->mask); 140 cpumask_set_cpu(cpu, vec->mask);
141 vec->count++; 141 vec->count++;
142 if (vec->count == 1) 142 if (vec->count == 1)
143 set_bit(newpri, cp->pri_active); 143 set_bit(newpri, cp->pri_active);
144 144
145 spin_unlock_irqrestore(&vec->lock, flags); 145 raw_spin_unlock_irqrestore(&vec->lock, flags);
146 } 146 }
147 if (likely(oldpri != CPUPRI_INVALID)) { 147 if (likely(oldpri != CPUPRI_INVALID)) {
148 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; 148 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
149 149
150 spin_lock_irqsave(&vec->lock, flags); 150 raw_spin_lock_irqsave(&vec->lock, flags);
151 151
152 vec->count--; 152 vec->count--;
153 if (!vec->count) 153 if (!vec->count)
154 clear_bit(oldpri, cp->pri_active); 154 clear_bit(oldpri, cp->pri_active);
155 cpumask_clear_cpu(cpu, vec->mask); 155 cpumask_clear_cpu(cpu, vec->mask);
156 156
157 spin_unlock_irqrestore(&vec->lock, flags); 157 raw_spin_unlock_irqrestore(&vec->lock, flags);
158 } 158 }
159 159
160 *currpri = newpri; 160 *currpri = newpri;
@@ -180,7 +180,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
180 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { 180 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
181 struct cpupri_vec *vec = &cp->pri_to_cpu[i]; 181 struct cpupri_vec *vec = &cp->pri_to_cpu[i];
182 182
183 spin_lock_init(&vec->lock); 183 raw_spin_lock_init(&vec->lock);
184 vec->count = 0; 184 vec->count = 0;
185 if (!zalloc_cpumask_var(&vec->mask, gfp)) 185 if (!zalloc_cpumask_var(&vec->mask, gfp))
186 goto cleanup; 186 goto cleanup;
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index 9a7e859b8fbf..7cb5bb6b95be 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -12,7 +12,7 @@
12/* values 2-101 are RT priorities 0-99 */ 12/* values 2-101 are RT priorities 0-99 */
13 13
14struct cpupri_vec { 14struct cpupri_vec {
15 spinlock_t lock; 15 raw_spinlock_t lock;
16 int count; 16 int count;
17 cpumask_var_t mask; 17 cpumask_var_t mask;
18}; 18};
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ae24fc65d75..67f95aada4b9 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -184,7 +184,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
184 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 184 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
185 SPLIT_NS(cfs_rq->exec_clock)); 185 SPLIT_NS(cfs_rq->exec_clock));
186 186
187 spin_lock_irqsave(&rq->lock, flags); 187 raw_spin_lock_irqsave(&rq->lock, flags);
188 if (cfs_rq->rb_leftmost) 188 if (cfs_rq->rb_leftmost)
189 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; 189 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
190 last = __pick_last_entity(cfs_rq); 190 last = __pick_last_entity(cfs_rq);
@@ -192,7 +192,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
192 max_vruntime = last->vruntime; 192 max_vruntime = last->vruntime;
193 min_vruntime = cfs_rq->min_vruntime; 193 min_vruntime = cfs_rq->min_vruntime;
194 rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; 194 rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
195 spin_unlock_irqrestore(&rq->lock, flags); 195 raw_spin_unlock_irqrestore(&rq->lock, flags);
196 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 196 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
197 SPLIT_NS(MIN_vruntime)); 197 SPLIT_NS(MIN_vruntime));
198 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", 198 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 804a411838f1..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
510 curr->sum_exec_runtime += delta_exec; 510 curr->sum_exec_runtime += delta_exec;
511 schedstat_add(cfs_rq, exec_clock, delta_exec); 511 schedstat_add(cfs_rq, exec_clock, delta_exec);
512 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 512 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
513
513 curr->vruntime += delta_exec_weighted; 514 curr->vruntime += delta_exec_weighted;
514 update_min_vruntime(cfs_rq); 515 update_min_vruntime(cfs_rq);
515} 516}
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
765 se->vruntime = vruntime; 766 se->vruntime = vruntime;
766} 767}
767 768
769#define ENQUEUE_WAKEUP 1
770#define ENQUEUE_MIGRATE 2
771
768static void 772static void
769enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) 773enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
770{ 774{
771 /* 775 /*
776 * Update the normalized vruntime before updating min_vruntime
777 * through callig update_curr().
778 */
779 if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
780 se->vruntime += cfs_rq->min_vruntime;
781
782 /*
772 * Update run-time statistics of the 'current'. 783 * Update run-time statistics of the 'current'.
773 */ 784 */
774 update_curr(cfs_rq); 785 update_curr(cfs_rq);
775 account_entity_enqueue(cfs_rq, se); 786 account_entity_enqueue(cfs_rq, se);
776 787
777 if (wakeup) { 788 if (flags & ENQUEUE_WAKEUP) {
778 place_entity(cfs_rq, se, 0); 789 place_entity(cfs_rq, se, 0);
779 enqueue_sleeper(cfs_rq, se); 790 enqueue_sleeper(cfs_rq, se);
780 } 791 }
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
828 __dequeue_entity(cfs_rq, se); 839 __dequeue_entity(cfs_rq, se);
829 account_entity_dequeue(cfs_rq, se); 840 account_entity_dequeue(cfs_rq, se);
830 update_min_vruntime(cfs_rq); 841 update_min_vruntime(cfs_rq);
842
843 /*
844 * Normalize the entity after updating the min_vruntime because the
845 * update can refer to the ->curr item and we need to reflect this
846 * movement in our normalized position.
847 */
848 if (!sleep)
849 se->vruntime -= cfs_rq->min_vruntime;
831} 850}
832 851
833/* 852/*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
1038{ 1057{
1039 struct cfs_rq *cfs_rq; 1058 struct cfs_rq *cfs_rq;
1040 struct sched_entity *se = &p->se; 1059 struct sched_entity *se = &p->se;
1060 int flags = 0;
1061
1062 if (wakeup)
1063 flags |= ENQUEUE_WAKEUP;
1064 if (p->state == TASK_WAKING)
1065 flags |= ENQUEUE_MIGRATE;
1041 1066
1042 for_each_sched_entity(se) { 1067 for_each_sched_entity(se) {
1043 if (se->on_rq) 1068 if (se->on_rq)
1044 break; 1069 break;
1045 cfs_rq = cfs_rq_of(se); 1070 cfs_rq = cfs_rq_of(se);
1046 enqueue_entity(cfs_rq, se, wakeup); 1071 enqueue_entity(cfs_rq, se, flags);
1047 wakeup = 1; 1072 flags = ENQUEUE_WAKEUP;
1048 } 1073 }
1049 1074
1050 hrtick_update(rq); 1075 hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
1120 1145
1121#ifdef CONFIG_SMP 1146#ifdef CONFIG_SMP
1122 1147
1148static void task_waking_fair(struct rq *rq, struct task_struct *p)
1149{
1150 struct sched_entity *se = &p->se;
1151 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1152
1153 se->vruntime -= cfs_rq->min_vruntime;
1154}
1155
1123#ifdef CONFIG_FAIR_GROUP_SCHED 1156#ifdef CONFIG_FAIR_GROUP_SCHED
1124/* 1157/*
1125 * effective_load() calculates the load change as seen from the root_task_group 1158 * effective_load() calculates the load change as seen from the root_task_group
@@ -1429,6 +1462,9 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1429 } 1462 }
1430 1463
1431 for_each_domain(cpu, tmp) { 1464 for_each_domain(cpu, tmp) {
1465 if (!(tmp->flags & SD_LOAD_BALANCE))
1466 continue;
1467
1432 /* 1468 /*
1433 * If power savings logic is enabled for a domain, see if we 1469 * If power savings logic is enabled for a domain, see if we
1434 * are not overloaded, if so, don't balance wider. 1470 * are not overloaded, if so, don't balance wider.
@@ -1472,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1472 * If there's an idle sibling in this domain, make that 1508 * If there's an idle sibling in this domain, make that
1473 * the wake_affine target instead of the current cpu. 1509 * the wake_affine target instead of the current cpu.
1474 */ 1510 */
1475 if (tmp->flags & SD_PREFER_SIBLING) 1511 if (tmp->flags & SD_SHARE_PKG_RESOURCES)
1476 target = select_idle_sibling(p, tmp, target); 1512 target = select_idle_sibling(p, tmp, target);
1477 1513
1478 if (target >= 0) { 1514 if (target >= 0) {
@@ -1955,7 +1991,7 @@ static void task_fork_fair(struct task_struct *p)
1955 struct rq *rq = this_rq(); 1991 struct rq *rq = this_rq();
1956 unsigned long flags; 1992 unsigned long flags;
1957 1993
1958 spin_lock_irqsave(&rq->lock, flags); 1994 raw_spin_lock_irqsave(&rq->lock, flags);
1959 1995
1960 if (unlikely(task_cpu(p) != this_cpu)) 1996 if (unlikely(task_cpu(p) != this_cpu))
1961 __set_task_cpu(p, this_cpu); 1997 __set_task_cpu(p, this_cpu);
@@ -1975,7 +2011,9 @@ static void task_fork_fair(struct task_struct *p)
1975 resched_task(rq->curr); 2011 resched_task(rq->curr);
1976 } 2012 }
1977 2013
1978 spin_unlock_irqrestore(&rq->lock, flags); 2014 se->vruntime -= cfs_rq->min_vruntime;
2015
2016 raw_spin_unlock_irqrestore(&rq->lock, flags);
1979} 2017}
1980 2018
1981/* 2019/*
@@ -2028,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
2028} 2066}
2029 2067
2030#ifdef CONFIG_FAIR_GROUP_SCHED 2068#ifdef CONFIG_FAIR_GROUP_SCHED
2031static void moved_group_fair(struct task_struct *p) 2069static void moved_group_fair(struct task_struct *p, int on_rq)
2032{ 2070{
2033 struct cfs_rq *cfs_rq = task_cfs_rq(p); 2071 struct cfs_rq *cfs_rq = task_cfs_rq(p);
2034 2072
2035 update_curr(cfs_rq); 2073 update_curr(cfs_rq);
2036 place_entity(cfs_rq, &p->se, 1); 2074 if (!on_rq)
2075 place_entity(cfs_rq, &p->se, 1);
2037} 2076}
2038#endif 2077#endif
2039 2078
@@ -2073,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
2073 .move_one_task = move_one_task_fair, 2112 .move_one_task = move_one_task_fair,
2074 .rq_online = rq_online_fair, 2113 .rq_online = rq_online_fair,
2075 .rq_offline = rq_offline_fair, 2114 .rq_offline = rq_offline_fair,
2115
2116 .task_waking = task_waking_fair,
2076#endif 2117#endif
2077 2118
2078 .set_curr_task = set_curr_task_fair, 2119 .set_curr_task = set_curr_task_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 33d5384a73a8..5f93b570d383 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -34,10 +34,10 @@ static struct task_struct *pick_next_task_idle(struct rq *rq)
34static void 34static void
35dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) 35dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
36{ 36{
37 spin_unlock_irq(&rq->lock); 37 raw_spin_unlock_irq(&rq->lock);
38 printk(KERN_ERR "bad: scheduling from the idle thread!\n"); 38 printk(KERN_ERR "bad: scheduling from the idle thread!\n");
39 dump_stack(); 39 dump_stack();
40 spin_lock_irq(&rq->lock); 40 raw_spin_lock_irq(&rq->lock);
41} 41}
42 42
43static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) 43static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index aecbd9c6b20c..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -327,7 +327,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
327 327
328 weight = cpumask_weight(rd->span); 328 weight = cpumask_weight(rd->span);
329 329
330 spin_lock(&rt_b->rt_runtime_lock); 330 raw_spin_lock(&rt_b->rt_runtime_lock);
331 rt_period = ktime_to_ns(rt_b->rt_period); 331 rt_period = ktime_to_ns(rt_b->rt_period);
332 for_each_cpu(i, rd->span) { 332 for_each_cpu(i, rd->span) {
333 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 333 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
@@ -336,7 +336,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
336 if (iter == rt_rq) 336 if (iter == rt_rq)
337 continue; 337 continue;
338 338
339 spin_lock(&iter->rt_runtime_lock); 339 raw_spin_lock(&iter->rt_runtime_lock);
340 /* 340 /*
341 * Either all rqs have inf runtime and there's nothing to steal 341 * Either all rqs have inf runtime and there's nothing to steal
342 * or __disable_runtime() below sets a specific rq to inf to 342 * or __disable_runtime() below sets a specific rq to inf to
@@ -358,14 +358,14 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
358 rt_rq->rt_runtime += diff; 358 rt_rq->rt_runtime += diff;
359 more = 1; 359 more = 1;
360 if (rt_rq->rt_runtime == rt_period) { 360 if (rt_rq->rt_runtime == rt_period) {
361 spin_unlock(&iter->rt_runtime_lock); 361 raw_spin_unlock(&iter->rt_runtime_lock);
362 break; 362 break;
363 } 363 }
364 } 364 }
365next: 365next:
366 spin_unlock(&iter->rt_runtime_lock); 366 raw_spin_unlock(&iter->rt_runtime_lock);
367 } 367 }
368 spin_unlock(&rt_b->rt_runtime_lock); 368 raw_spin_unlock(&rt_b->rt_runtime_lock);
369 369
370 return more; 370 return more;
371} 371}
@@ -386,8 +386,8 @@ static void __disable_runtime(struct rq *rq)
386 s64 want; 386 s64 want;
387 int i; 387 int i;
388 388
389 spin_lock(&rt_b->rt_runtime_lock); 389 raw_spin_lock(&rt_b->rt_runtime_lock);
390 spin_lock(&rt_rq->rt_runtime_lock); 390 raw_spin_lock(&rt_rq->rt_runtime_lock);
391 /* 391 /*
392 * Either we're all inf and nobody needs to borrow, or we're 392 * Either we're all inf and nobody needs to borrow, or we're
393 * already disabled and thus have nothing to do, or we have 393 * already disabled and thus have nothing to do, or we have
@@ -396,7 +396,7 @@ static void __disable_runtime(struct rq *rq)
396 if (rt_rq->rt_runtime == RUNTIME_INF || 396 if (rt_rq->rt_runtime == RUNTIME_INF ||
397 rt_rq->rt_runtime == rt_b->rt_runtime) 397 rt_rq->rt_runtime == rt_b->rt_runtime)
398 goto balanced; 398 goto balanced;
399 spin_unlock(&rt_rq->rt_runtime_lock); 399 raw_spin_unlock(&rt_rq->rt_runtime_lock);
400 400
401 /* 401 /*
402 * Calculate the difference between what we started out with 402 * Calculate the difference between what we started out with
@@ -418,7 +418,7 @@ static void __disable_runtime(struct rq *rq)
418 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 418 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
419 continue; 419 continue;
420 420
421 spin_lock(&iter->rt_runtime_lock); 421 raw_spin_lock(&iter->rt_runtime_lock);
422 if (want > 0) { 422 if (want > 0) {
423 diff = min_t(s64, iter->rt_runtime, want); 423 diff = min_t(s64, iter->rt_runtime, want);
424 iter->rt_runtime -= diff; 424 iter->rt_runtime -= diff;
@@ -427,13 +427,13 @@ static void __disable_runtime(struct rq *rq)
427 iter->rt_runtime -= want; 427 iter->rt_runtime -= want;
428 want -= want; 428 want -= want;
429 } 429 }
430 spin_unlock(&iter->rt_runtime_lock); 430 raw_spin_unlock(&iter->rt_runtime_lock);
431 431
432 if (!want) 432 if (!want)
433 break; 433 break;
434 } 434 }
435 435
436 spin_lock(&rt_rq->rt_runtime_lock); 436 raw_spin_lock(&rt_rq->rt_runtime_lock);
437 /* 437 /*
438 * We cannot be left wanting - that would mean some runtime 438 * We cannot be left wanting - that would mean some runtime
439 * leaked out of the system. 439 * leaked out of the system.
@@ -445,8 +445,8 @@ balanced:
445 * runtime - in which case borrowing doesn't make sense. 445 * runtime - in which case borrowing doesn't make sense.
446 */ 446 */
447 rt_rq->rt_runtime = RUNTIME_INF; 447 rt_rq->rt_runtime = RUNTIME_INF;
448 spin_unlock(&rt_rq->rt_runtime_lock); 448 raw_spin_unlock(&rt_rq->rt_runtime_lock);
449 spin_unlock(&rt_b->rt_runtime_lock); 449 raw_spin_unlock(&rt_b->rt_runtime_lock);
450 } 450 }
451} 451}
452 452
@@ -454,9 +454,9 @@ static void disable_runtime(struct rq *rq)
454{ 454{
455 unsigned long flags; 455 unsigned long flags;
456 456
457 spin_lock_irqsave(&rq->lock, flags); 457 raw_spin_lock_irqsave(&rq->lock, flags);
458 __disable_runtime(rq); 458 __disable_runtime(rq);
459 spin_unlock_irqrestore(&rq->lock, flags); 459 raw_spin_unlock_irqrestore(&rq->lock, flags);
460} 460}
461 461
462static void __enable_runtime(struct rq *rq) 462static void __enable_runtime(struct rq *rq)
@@ -472,13 +472,13 @@ static void __enable_runtime(struct rq *rq)
472 for_each_leaf_rt_rq(rt_rq, rq) { 472 for_each_leaf_rt_rq(rt_rq, rq) {
473 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 473 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
474 474
475 spin_lock(&rt_b->rt_runtime_lock); 475 raw_spin_lock(&rt_b->rt_runtime_lock);
476 spin_lock(&rt_rq->rt_runtime_lock); 476 raw_spin_lock(&rt_rq->rt_runtime_lock);
477 rt_rq->rt_runtime = rt_b->rt_runtime; 477 rt_rq->rt_runtime = rt_b->rt_runtime;
478 rt_rq->rt_time = 0; 478 rt_rq->rt_time = 0;
479 rt_rq->rt_throttled = 0; 479 rt_rq->rt_throttled = 0;
480 spin_unlock(&rt_rq->rt_runtime_lock); 480 raw_spin_unlock(&rt_rq->rt_runtime_lock);
481 spin_unlock(&rt_b->rt_runtime_lock); 481 raw_spin_unlock(&rt_b->rt_runtime_lock);
482 } 482 }
483} 483}
484 484
@@ -486,9 +486,9 @@ static void enable_runtime(struct rq *rq)
486{ 486{
487 unsigned long flags; 487 unsigned long flags;
488 488
489 spin_lock_irqsave(&rq->lock, flags); 489 raw_spin_lock_irqsave(&rq->lock, flags);
490 __enable_runtime(rq); 490 __enable_runtime(rq);
491 spin_unlock_irqrestore(&rq->lock, flags); 491 raw_spin_unlock_irqrestore(&rq->lock, flags);
492} 492}
493 493
494static int balance_runtime(struct rt_rq *rt_rq) 494static int balance_runtime(struct rt_rq *rt_rq)
@@ -496,9 +496,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
496 int more = 0; 496 int more = 0;
497 497
498 if (rt_rq->rt_time > rt_rq->rt_runtime) { 498 if (rt_rq->rt_time > rt_rq->rt_runtime) {
499 spin_unlock(&rt_rq->rt_runtime_lock); 499 raw_spin_unlock(&rt_rq->rt_runtime_lock);
500 more = do_balance_runtime(rt_rq); 500 more = do_balance_runtime(rt_rq);
501 spin_lock(&rt_rq->rt_runtime_lock); 501 raw_spin_lock(&rt_rq->rt_runtime_lock);
502 } 502 }
503 503
504 return more; 504 return more;
@@ -524,11 +524,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
524 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 524 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
525 struct rq *rq = rq_of_rt_rq(rt_rq); 525 struct rq *rq = rq_of_rt_rq(rt_rq);
526 526
527 spin_lock(&rq->lock); 527 raw_spin_lock(&rq->lock);
528 if (rt_rq->rt_time) { 528 if (rt_rq->rt_time) {
529 u64 runtime; 529 u64 runtime;
530 530
531 spin_lock(&rt_rq->rt_runtime_lock); 531 raw_spin_lock(&rt_rq->rt_runtime_lock);
532 if (rt_rq->rt_throttled) 532 if (rt_rq->rt_throttled)
533 balance_runtime(rt_rq); 533 balance_runtime(rt_rq);
534 runtime = rt_rq->rt_runtime; 534 runtime = rt_rq->rt_runtime;
@@ -539,13 +539,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
539 } 539 }
540 if (rt_rq->rt_time || rt_rq->rt_nr_running) 540 if (rt_rq->rt_time || rt_rq->rt_nr_running)
541 idle = 0; 541 idle = 0;
542 spin_unlock(&rt_rq->rt_runtime_lock); 542 raw_spin_unlock(&rt_rq->rt_runtime_lock);
543 } else if (rt_rq->rt_nr_running) 543 } else if (rt_rq->rt_nr_running)
544 idle = 0; 544 idle = 0;
545 545
546 if (enqueue) 546 if (enqueue)
547 sched_rt_rq_enqueue(rt_rq); 547 sched_rt_rq_enqueue(rt_rq);
548 spin_unlock(&rq->lock); 548 raw_spin_unlock(&rq->lock);
549 } 549 }
550 550
551 return idle; 551 return idle;
@@ -624,11 +624,11 @@ static void update_curr_rt(struct rq *rq)
624 rt_rq = rt_rq_of_se(rt_se); 624 rt_rq = rt_rq_of_se(rt_se);
625 625
626 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 626 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
627 spin_lock(&rt_rq->rt_runtime_lock); 627 raw_spin_lock(&rt_rq->rt_runtime_lock);
628 rt_rq->rt_time += delta_exec; 628 rt_rq->rt_time += delta_exec;
629 if (sched_rt_runtime_exceeded(rt_rq)) 629 if (sched_rt_runtime_exceeded(rt_rq))
630 resched_task(curr); 630 resched_task(curr);
631 spin_unlock(&rt_rq->rt_runtime_lock); 631 raw_spin_unlock(&rt_rq->rt_runtime_lock);
632 } 632 }
633 } 633 }
634} 634}
@@ -1246,7 +1246,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1246 task_running(rq, task) || 1246 task_running(rq, task) ||
1247 !task->se.on_rq)) { 1247 !task->se.on_rq)) {
1248 1248
1249 spin_unlock(&lowest_rq->lock); 1249 raw_spin_unlock(&lowest_rq->lock);
1250 lowest_rq = NULL; 1250 lowest_rq = NULL;
1251 break; 1251 break;
1252 } 1252 }
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
1472 * If we are not running and we are not going to reschedule soon, we should 1472 * If we are not running and we are not going to reschedule soon, we should
1473 * try to push tasks away now 1473 * try to push tasks away now
1474 */ 1474 */
1475static void task_wake_up_rt(struct rq *rq, struct task_struct *p) 1475static void task_woken_rt(struct rq *rq, struct task_struct *p)
1476{ 1476{
1477 if (!task_running(rq, p) && 1477 if (!task_running(rq, p) &&
1478 !test_tsk_need_resched(rq->curr) && 1478 !test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
1753 .rq_offline = rq_offline_rt, 1753 .rq_offline = rq_offline_rt,
1754 .pre_schedule = pre_schedule_rt, 1754 .pre_schedule = pre_schedule_rt,
1755 .post_schedule = post_schedule_rt, 1755 .post_schedule = post_schedule_rt,
1756 .task_wake_up = task_wake_up_rt, 1756 .task_woken = task_woken_rt,
1757 .switched_from = switched_from_rt, 1757 .switched_from = switched_from_rt,
1758#endif 1758#endif
1759 1759
diff --git a/kernel/signal.c b/kernel/signal.c
index 6b982f2cf524..934ae5e687b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
218 struct user_struct *user; 218 struct user_struct *user;
219 219
220 /* 220 /*
221 * We won't get problems with the target's UID changing under us 221 * Protect access to @t credentials. This can go away when all
222 * because changing it requires RCU be used, and if t != current, the 222 * callers hold rcu read lock.
223 * caller must be holding the RCU readlock (by way of a spinlock) and
224 * we use RCU protection here
225 */ 223 */
224 rcu_read_lock();
226 user = get_uid(__task_cred(t)->user); 225 user = get_uid(__task_cred(t)->user);
227 atomic_inc(&user->sigpending); 226 atomic_inc(&user->sigpending);
227 rcu_read_unlock();
228 228
229 if (override_rlimit || 229 if (override_rlimit ||
230 atomic_read(&user->sigpending) <= 230 atomic_read(&user->sigpending) <=
@@ -423,7 +423,7 @@ still_pending:
423 */ 423 */
424 info->si_signo = sig; 424 info->si_signo = sig;
425 info->si_errno = 0; 425 info->si_errno = 0;
426 info->si_code = 0; 426 info->si_code = SI_USER;
427 info->si_pid = 0; 427 info->si_pid = 0;
428 info->si_uid = 0; 428 info->si_uid = 0;
429 } 429 }
@@ -607,6 +607,17 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
607 return 1; 607 return 1;
608} 608}
609 609
610static inline int is_si_special(const struct siginfo *info)
611{
612 return info <= SEND_SIG_FORCED;
613}
614
615static inline bool si_fromuser(const struct siginfo *info)
616{
617 return info == SEND_SIG_NOINFO ||
618 (!is_si_special(info) && SI_FROMUSER(info));
619}
620
610/* 621/*
611 * Bad permissions for sending the signal 622 * Bad permissions for sending the signal
612 * - the caller must hold at least the RCU read lock 623 * - the caller must hold at least the RCU read lock
@@ -621,7 +632,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
621 if (!valid_signal(sig)) 632 if (!valid_signal(sig))
622 return -EINVAL; 633 return -EINVAL;
623 634
624 if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info))) 635 if (!si_fromuser(info))
625 return 0; 636 return 0;
626 637
627 error = audit_signal_info(sig, t); /* Let audit system see the signal */ 638 error = audit_signal_info(sig, t); /* Let audit system see the signal */
@@ -949,9 +960,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
949 int from_ancestor_ns = 0; 960 int from_ancestor_ns = 0;
950 961
951#ifdef CONFIG_PID_NS 962#ifdef CONFIG_PID_NS
952 if (!is_si_special(info) && SI_FROMUSER(info) && 963 from_ancestor_ns = si_fromuser(info) &&
953 task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0) 964 !task_pid_nr_ns(current, task_active_pid_ns(t));
954 from_ancestor_ns = 1;
955#endif 965#endif
956 966
957 return __send_signal(sig, info, t, group, from_ancestor_ns); 967 return __send_signal(sig, info, t, group, from_ancestor_ns);
@@ -969,7 +979,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
969 for (i = 0; i < 16; i++) { 979 for (i = 0; i < 16; i++) {
970 unsigned char insn; 980 unsigned char insn;
971 981
972 __get_user(insn, (unsigned char *)(regs->ip + i)); 982 if (get_user(insn, (unsigned char *)(regs->ip + i)))
983 break;
973 printk("%02x ", insn); 984 printk("%02x ", insn);
974 } 985 }
975 } 986 }
@@ -1052,12 +1063,6 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
1052 return ret; 1063 return ret;
1053} 1064}
1054 1065
1055void
1056force_sig_specific(int sig, struct task_struct *t)
1057{
1058 force_sig_info(sig, SEND_SIG_FORCED, t);
1059}
1060
1061/* 1066/*
1062 * Nuke all other threads in the group. 1067 * Nuke all other threads in the group.
1063 */ 1068 */
@@ -1175,19 +1180,19 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1175 int ret = -EINVAL; 1180 int ret = -EINVAL;
1176 struct task_struct *p; 1181 struct task_struct *p;
1177 const struct cred *pcred; 1182 const struct cred *pcred;
1183 unsigned long flags;
1178 1184
1179 if (!valid_signal(sig)) 1185 if (!valid_signal(sig))
1180 return ret; 1186 return ret;
1181 1187
1182 read_lock(&tasklist_lock); 1188 rcu_read_lock();
1183 p = pid_task(pid, PIDTYPE_PID); 1189 p = pid_task(pid, PIDTYPE_PID);
1184 if (!p) { 1190 if (!p) {
1185 ret = -ESRCH; 1191 ret = -ESRCH;
1186 goto out_unlock; 1192 goto out_unlock;
1187 } 1193 }
1188 pcred = __task_cred(p); 1194 pcred = __task_cred(p);
1189 if ((info == SEND_SIG_NOINFO || 1195 if (si_fromuser(info) &&
1190 (!is_si_special(info) && SI_FROMUSER(info))) &&
1191 euid != pcred->suid && euid != pcred->uid && 1196 euid != pcred->suid && euid != pcred->uid &&
1192 uid != pcred->suid && uid != pcred->uid) { 1197 uid != pcred->suid && uid != pcred->uid) {
1193 ret = -EPERM; 1198 ret = -EPERM;
@@ -1196,14 +1201,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1196 ret = security_task_kill(p, info, sig, secid); 1201 ret = security_task_kill(p, info, sig, secid);
1197 if (ret) 1202 if (ret)
1198 goto out_unlock; 1203 goto out_unlock;
1199 if (sig && p->sighand) { 1204
1200 unsigned long flags; 1205 if (sig) {
1201 spin_lock_irqsave(&p->sighand->siglock, flags); 1206 if (lock_task_sighand(p, &flags)) {
1202 ret = __send_signal(sig, info, p, 1, 0); 1207 ret = __send_signal(sig, info, p, 1, 0);
1203 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1208 unlock_task_sighand(p, &flags);
1209 } else
1210 ret = -ESRCH;
1204 } 1211 }
1205out_unlock: 1212out_unlock:
1206 read_unlock(&tasklist_lock); 1213 rcu_read_unlock();
1207 return ret; 1214 return ret;
1208} 1215}
1209EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); 1216EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
@@ -1837,11 +1844,6 @@ relock:
1837 1844
1838 for (;;) { 1845 for (;;) {
1839 struct k_sigaction *ka; 1846 struct k_sigaction *ka;
1840
1841 if (unlikely(signal->group_stop_count > 0) &&
1842 do_signal_stop(0))
1843 goto relock;
1844
1845 /* 1847 /*
1846 * Tracing can induce an artifical signal and choose sigaction. 1848 * Tracing can induce an artifical signal and choose sigaction.
1847 * The return value in @signr determines the default action, 1849 * The return value in @signr determines the default action,
@@ -1853,6 +1855,10 @@ relock:
1853 if (unlikely(signr != 0)) 1855 if (unlikely(signr != 0))
1854 ka = return_ka; 1856 ka = return_ka;
1855 else { 1857 else {
1858 if (unlikely(signal->group_stop_count > 0) &&
1859 do_signal_stop(0))
1860 goto relock;
1861
1856 signr = dequeue_signal(current, &current->blocked, 1862 signr = dequeue_signal(current, &current->blocked,
1857 info); 1863 info);
1858 1864
diff --git a/kernel/smp.c b/kernel/smp.c
index a8c76069cf50..f10408422444 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -16,11 +16,11 @@ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
16 16
17static struct { 17static struct {
18 struct list_head queue; 18 struct list_head queue;
19 spinlock_t lock; 19 raw_spinlock_t lock;
20} call_function __cacheline_aligned_in_smp = 20} call_function __cacheline_aligned_in_smp =
21 { 21 {
22 .queue = LIST_HEAD_INIT(call_function.queue), 22 .queue = LIST_HEAD_INIT(call_function.queue),
23 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), 23 .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
24 }; 24 };
25 25
26enum { 26enum {
@@ -35,7 +35,7 @@ struct call_function_data {
35 35
36struct call_single_queue { 36struct call_single_queue {
37 struct list_head list; 37 struct list_head list;
38 spinlock_t lock; 38 raw_spinlock_t lock;
39}; 39};
40 40
41static DEFINE_PER_CPU(struct call_function_data, cfd_data); 41static DEFINE_PER_CPU(struct call_function_data, cfd_data);
@@ -80,7 +80,7 @@ static int __cpuinit init_call_single_data(void)
80 for_each_possible_cpu(i) { 80 for_each_possible_cpu(i) {
81 struct call_single_queue *q = &per_cpu(call_single_queue, i); 81 struct call_single_queue *q = &per_cpu(call_single_queue, i);
82 82
83 spin_lock_init(&q->lock); 83 raw_spin_lock_init(&q->lock);
84 INIT_LIST_HEAD(&q->list); 84 INIT_LIST_HEAD(&q->list);
85 } 85 }
86 86
@@ -141,10 +141,10 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
141 unsigned long flags; 141 unsigned long flags;
142 int ipi; 142 int ipi;
143 143
144 spin_lock_irqsave(&dst->lock, flags); 144 raw_spin_lock_irqsave(&dst->lock, flags);
145 ipi = list_empty(&dst->list); 145 ipi = list_empty(&dst->list);
146 list_add_tail(&data->list, &dst->list); 146 list_add_tail(&data->list, &dst->list);
147 spin_unlock_irqrestore(&dst->lock, flags); 147 raw_spin_unlock_irqrestore(&dst->lock, flags);
148 148
149 /* 149 /*
150 * The list addition should be visible before sending the IPI 150 * The list addition should be visible before sending the IPI
@@ -171,7 +171,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
171void generic_smp_call_function_interrupt(void) 171void generic_smp_call_function_interrupt(void)
172{ 172{
173 struct call_function_data *data; 173 struct call_function_data *data;
174 int cpu = get_cpu(); 174 int cpu = smp_processor_id();
175 175
176 /* 176 /*
177 * Shouldn't receive this interrupt on a cpu that is not yet online. 177 * Shouldn't receive this interrupt on a cpu that is not yet online.
@@ -201,9 +201,9 @@ void generic_smp_call_function_interrupt(void)
201 refs = atomic_dec_return(&data->refs); 201 refs = atomic_dec_return(&data->refs);
202 WARN_ON(refs < 0); 202 WARN_ON(refs < 0);
203 if (!refs) { 203 if (!refs) {
204 spin_lock(&call_function.lock); 204 raw_spin_lock(&call_function.lock);
205 list_del_rcu(&data->csd.list); 205 list_del_rcu(&data->csd.list);
206 spin_unlock(&call_function.lock); 206 raw_spin_unlock(&call_function.lock);
207 } 207 }
208 208
209 if (refs) 209 if (refs)
@@ -212,7 +212,6 @@ void generic_smp_call_function_interrupt(void)
212 csd_unlock(&data->csd); 212 csd_unlock(&data->csd);
213 } 213 }
214 214
215 put_cpu();
216} 215}
217 216
218/* 217/*
@@ -230,9 +229,9 @@ void generic_smp_call_function_single_interrupt(void)
230 */ 229 */
231 WARN_ON_ONCE(!cpu_online(smp_processor_id())); 230 WARN_ON_ONCE(!cpu_online(smp_processor_id()));
232 231
233 spin_lock(&q->lock); 232 raw_spin_lock(&q->lock);
234 list_replace_init(&q->list, &list); 233 list_replace_init(&q->list, &list);
235 spin_unlock(&q->lock); 234 raw_spin_unlock(&q->lock);
236 235
237 while (!list_empty(&list)) { 236 while (!list_empty(&list)) {
238 struct call_single_data *data; 237 struct call_single_data *data;
@@ -348,7 +347,7 @@ int smp_call_function_any(const struct cpumask *mask,
348 goto call; 347 goto call;
349 348
350 /* Try for same node. */ 349 /* Try for same node. */
351 nodemask = cpumask_of_node(cpu); 350 nodemask = cpumask_of_node(cpu_to_node(cpu));
352 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; 351 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
353 cpu = cpumask_next_and(cpu, nodemask, mask)) { 352 cpu = cpumask_next_and(cpu, nodemask, mask)) {
354 if (cpu_online(cpu)) 353 if (cpu_online(cpu))
@@ -449,14 +448,14 @@ void smp_call_function_many(const struct cpumask *mask,
449 cpumask_clear_cpu(this_cpu, data->cpumask); 448 cpumask_clear_cpu(this_cpu, data->cpumask);
450 atomic_set(&data->refs, cpumask_weight(data->cpumask)); 449 atomic_set(&data->refs, cpumask_weight(data->cpumask));
451 450
452 spin_lock_irqsave(&call_function.lock, flags); 451 raw_spin_lock_irqsave(&call_function.lock, flags);
453 /* 452 /*
454 * Place entry at the _HEAD_ of the list, so that any cpu still 453 * Place entry at the _HEAD_ of the list, so that any cpu still
455 * observing the entry in generic_smp_call_function_interrupt() 454 * observing the entry in generic_smp_call_function_interrupt()
456 * will not miss any other list entries: 455 * will not miss any other list entries:
457 */ 456 */
458 list_add_rcu(&data->csd.list, &call_function.queue); 457 list_add_rcu(&data->csd.list, &call_function.queue);
459 spin_unlock_irqrestore(&call_function.lock, flags); 458 raw_spin_unlock_irqrestore(&call_function.lock, flags);
460 459
461 /* 460 /*
462 * Make the list addition visible before sending the ipi. 461 * Make the list addition visible before sending the ipi.
@@ -501,20 +500,20 @@ EXPORT_SYMBOL(smp_call_function);
501 500
502void ipi_call_lock(void) 501void ipi_call_lock(void)
503{ 502{
504 spin_lock(&call_function.lock); 503 raw_spin_lock(&call_function.lock);
505} 504}
506 505
507void ipi_call_unlock(void) 506void ipi_call_unlock(void)
508{ 507{
509 spin_unlock(&call_function.lock); 508 raw_spin_unlock(&call_function.lock);
510} 509}
511 510
512void ipi_call_lock_irq(void) 511void ipi_call_lock_irq(void)
513{ 512{
514 spin_lock_irq(&call_function.lock); 513 raw_spin_lock_irq(&call_function.lock);
515} 514}
516 515
517void ipi_call_unlock_irq(void) 516void ipi_call_unlock_irq(void)
518{ 517{
519 spin_unlock_irq(&call_function.lock); 518 raw_spin_unlock_irq(&call_function.lock);
520} 519}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 41e042219ff6..be6517fb9c14 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -32,6 +32,8 @@
32 * include/linux/spinlock_api_smp.h 32 * include/linux/spinlock_api_smp.h
33 */ 33 */
34#else 34#else
35#define raw_read_can_lock(l) read_can_lock(l)
36#define raw_write_can_lock(l) write_can_lock(l)
35/* 37/*
36 * We build the __lock_function inlines here. They are too large for 38 * We build the __lock_function inlines here. They are too large for
37 * inlining all over the place, but here is only one user per function 39 * inlining all over the place, but here is only one user per function
@@ -42,49 +44,49 @@
42 * towards that other CPU that it should break the lock ASAP. 44 * towards that other CPU that it should break the lock ASAP.
43 */ 45 */
44#define BUILD_LOCK_OPS(op, locktype) \ 46#define BUILD_LOCK_OPS(op, locktype) \
45void __lockfunc __##op##_lock(locktype##_t *lock) \ 47void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
46{ \ 48{ \
47 for (;;) { \ 49 for (;;) { \
48 preempt_disable(); \ 50 preempt_disable(); \
49 if (likely(_raw_##op##_trylock(lock))) \ 51 if (likely(do_raw_##op##_trylock(lock))) \
50 break; \ 52 break; \
51 preempt_enable(); \ 53 preempt_enable(); \
52 \ 54 \
53 if (!(lock)->break_lock) \ 55 if (!(lock)->break_lock) \
54 (lock)->break_lock = 1; \ 56 (lock)->break_lock = 1; \
55 while (!op##_can_lock(lock) && (lock)->break_lock) \ 57 while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
56 _raw_##op##_relax(&lock->raw_lock); \ 58 arch_##op##_relax(&lock->raw_lock); \
57 } \ 59 } \
58 (lock)->break_lock = 0; \ 60 (lock)->break_lock = 0; \
59} \ 61} \
60 \ 62 \
61unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \ 63unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
62{ \ 64{ \
63 unsigned long flags; \ 65 unsigned long flags; \
64 \ 66 \
65 for (;;) { \ 67 for (;;) { \
66 preempt_disable(); \ 68 preempt_disable(); \
67 local_irq_save(flags); \ 69 local_irq_save(flags); \
68 if (likely(_raw_##op##_trylock(lock))) \ 70 if (likely(do_raw_##op##_trylock(lock))) \
69 break; \ 71 break; \
70 local_irq_restore(flags); \ 72 local_irq_restore(flags); \
71 preempt_enable(); \ 73 preempt_enable(); \
72 \ 74 \
73 if (!(lock)->break_lock) \ 75 if (!(lock)->break_lock) \
74 (lock)->break_lock = 1; \ 76 (lock)->break_lock = 1; \
75 while (!op##_can_lock(lock) && (lock)->break_lock) \ 77 while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
76 _raw_##op##_relax(&lock->raw_lock); \ 78 arch_##op##_relax(&lock->raw_lock); \
77 } \ 79 } \
78 (lock)->break_lock = 0; \ 80 (lock)->break_lock = 0; \
79 return flags; \ 81 return flags; \
80} \ 82} \
81 \ 83 \
82void __lockfunc __##op##_lock_irq(locktype##_t *lock) \ 84void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \
83{ \ 85{ \
84 _##op##_lock_irqsave(lock); \ 86 _raw_##op##_lock_irqsave(lock); \
85} \ 87} \
86 \ 88 \
87void __lockfunc __##op##_lock_bh(locktype##_t *lock) \ 89void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
88{ \ 90{ \
89 unsigned long flags; \ 91 unsigned long flags; \
90 \ 92 \
@@ -93,7 +95,7 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
93 /* irq-disabling. We use the generic preemption-aware */ \ 95 /* irq-disabling. We use the generic preemption-aware */ \
94 /* function: */ \ 96 /* function: */ \
95 /**/ \ 97 /**/ \
96 flags = _##op##_lock_irqsave(lock); \ 98 flags = _raw_##op##_lock_irqsave(lock); \
97 local_bh_disable(); \ 99 local_bh_disable(); \
98 local_irq_restore(flags); \ 100 local_irq_restore(flags); \
99} \ 101} \
@@ -107,269 +109,269 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
107 * __[spin|read|write]_lock_irqsave() 109 * __[spin|read|write]_lock_irqsave()
108 * __[spin|read|write]_lock_bh() 110 * __[spin|read|write]_lock_bh()
109 */ 111 */
110BUILD_LOCK_OPS(spin, spinlock); 112BUILD_LOCK_OPS(spin, raw_spinlock);
111BUILD_LOCK_OPS(read, rwlock); 113BUILD_LOCK_OPS(read, rwlock);
112BUILD_LOCK_OPS(write, rwlock); 114BUILD_LOCK_OPS(write, rwlock);
113 115
114#endif 116#endif
115 117
116#ifdef CONFIG_DEBUG_LOCK_ALLOC 118#ifndef CONFIG_INLINE_SPIN_TRYLOCK
117 119int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
118void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
119{ 120{
120 preempt_disable(); 121 return __raw_spin_trylock(lock);
121 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
122 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
123} 122}
124EXPORT_SYMBOL(_spin_lock_nested); 123EXPORT_SYMBOL(_raw_spin_trylock);
124#endif
125 125
126unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, 126#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
127 int subclass) 127int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
128{ 128{
129 unsigned long flags; 129 return __raw_spin_trylock_bh(lock);
130
131 local_irq_save(flags);
132 preempt_disable();
133 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
134 LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock,
135 _raw_spin_lock_flags, &flags);
136 return flags;
137} 130}
138EXPORT_SYMBOL(_spin_lock_irqsave_nested); 131EXPORT_SYMBOL(_raw_spin_trylock_bh);
132#endif
139 133
140void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, 134#ifndef CONFIG_INLINE_SPIN_LOCK
141 struct lockdep_map *nest_lock) 135void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
142{ 136{
143 preempt_disable(); 137 __raw_spin_lock(lock);
144 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
145 LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
146} 138}
147EXPORT_SYMBOL(_spin_lock_nest_lock); 139EXPORT_SYMBOL(_raw_spin_lock);
148
149#endif 140#endif
150 141
151#ifndef CONFIG_INLINE_SPIN_TRYLOCK 142#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
152int __lockfunc _spin_trylock(spinlock_t *lock) 143unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
153{ 144{
154 return __spin_trylock(lock); 145 return __raw_spin_lock_irqsave(lock);
155} 146}
156EXPORT_SYMBOL(_spin_trylock); 147EXPORT_SYMBOL(_raw_spin_lock_irqsave);
157#endif 148#endif
158 149
159#ifndef CONFIG_INLINE_READ_TRYLOCK 150#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
160int __lockfunc _read_trylock(rwlock_t *lock) 151void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
161{ 152{
162 return __read_trylock(lock); 153 __raw_spin_lock_irq(lock);
163} 154}
164EXPORT_SYMBOL(_read_trylock); 155EXPORT_SYMBOL(_raw_spin_lock_irq);
165#endif 156#endif
166 157
167#ifndef CONFIG_INLINE_WRITE_TRYLOCK 158#ifndef CONFIG_INLINE_SPIN_LOCK_BH
168int __lockfunc _write_trylock(rwlock_t *lock) 159void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
169{ 160{
170 return __write_trylock(lock); 161 __raw_spin_lock_bh(lock);
171} 162}
172EXPORT_SYMBOL(_write_trylock); 163EXPORT_SYMBOL(_raw_spin_lock_bh);
173#endif 164#endif
174 165
175#ifndef CONFIG_INLINE_READ_LOCK 166#ifndef CONFIG_INLINE_SPIN_UNLOCK
176void __lockfunc _read_lock(rwlock_t *lock) 167void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
177{ 168{
178 __read_lock(lock); 169 __raw_spin_unlock(lock);
179} 170}
180EXPORT_SYMBOL(_read_lock); 171EXPORT_SYMBOL(_raw_spin_unlock);
181#endif 172#endif
182 173
183#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE 174#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
184unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) 175void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
185{ 176{
186 return __spin_lock_irqsave(lock); 177 __raw_spin_unlock_irqrestore(lock, flags);
187} 178}
188EXPORT_SYMBOL(_spin_lock_irqsave); 179EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
189#endif 180#endif
190 181
191#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ 182#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
192void __lockfunc _spin_lock_irq(spinlock_t *lock) 183void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
193{ 184{
194 __spin_lock_irq(lock); 185 __raw_spin_unlock_irq(lock);
195} 186}
196EXPORT_SYMBOL(_spin_lock_irq); 187EXPORT_SYMBOL(_raw_spin_unlock_irq);
197#endif 188#endif
198 189
199#ifndef CONFIG_INLINE_SPIN_LOCK_BH 190#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
200void __lockfunc _spin_lock_bh(spinlock_t *lock) 191void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
201{ 192{
202 __spin_lock_bh(lock); 193 __raw_spin_unlock_bh(lock);
203} 194}
204EXPORT_SYMBOL(_spin_lock_bh); 195EXPORT_SYMBOL(_raw_spin_unlock_bh);
205#endif 196#endif
206 197
207#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE 198#ifndef CONFIG_INLINE_READ_TRYLOCK
208unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) 199int __lockfunc _raw_read_trylock(rwlock_t *lock)
209{ 200{
210 return __read_lock_irqsave(lock); 201 return __raw_read_trylock(lock);
211} 202}
212EXPORT_SYMBOL(_read_lock_irqsave); 203EXPORT_SYMBOL(_raw_read_trylock);
213#endif 204#endif
214 205
215#ifndef CONFIG_INLINE_READ_LOCK_IRQ 206#ifndef CONFIG_INLINE_READ_LOCK
216void __lockfunc _read_lock_irq(rwlock_t *lock) 207void __lockfunc _raw_read_lock(rwlock_t *lock)
217{ 208{
218 __read_lock_irq(lock); 209 __raw_read_lock(lock);
219} 210}
220EXPORT_SYMBOL(_read_lock_irq); 211EXPORT_SYMBOL(_raw_read_lock);
221#endif 212#endif
222 213
223#ifndef CONFIG_INLINE_READ_LOCK_BH 214#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
224void __lockfunc _read_lock_bh(rwlock_t *lock) 215unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
225{ 216{
226 __read_lock_bh(lock); 217 return __raw_read_lock_irqsave(lock);
227} 218}
228EXPORT_SYMBOL(_read_lock_bh); 219EXPORT_SYMBOL(_raw_read_lock_irqsave);
229#endif 220#endif
230 221
231#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE 222#ifndef CONFIG_INLINE_READ_LOCK_IRQ
232unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) 223void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
233{ 224{
234 return __write_lock_irqsave(lock); 225 __raw_read_lock_irq(lock);
235} 226}
236EXPORT_SYMBOL(_write_lock_irqsave); 227EXPORT_SYMBOL(_raw_read_lock_irq);
237#endif 228#endif
238 229
239#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ 230#ifndef CONFIG_INLINE_READ_LOCK_BH
240void __lockfunc _write_lock_irq(rwlock_t *lock) 231void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
241{ 232{
242 __write_lock_irq(lock); 233 __raw_read_lock_bh(lock);
243} 234}
244EXPORT_SYMBOL(_write_lock_irq); 235EXPORT_SYMBOL(_raw_read_lock_bh);
245#endif 236#endif
246 237
247#ifndef CONFIG_INLINE_WRITE_LOCK_BH 238#ifndef CONFIG_INLINE_READ_UNLOCK
248void __lockfunc _write_lock_bh(rwlock_t *lock) 239void __lockfunc _raw_read_unlock(rwlock_t *lock)
249{ 240{
250 __write_lock_bh(lock); 241 __raw_read_unlock(lock);
251} 242}
252EXPORT_SYMBOL(_write_lock_bh); 243EXPORT_SYMBOL(_raw_read_unlock);
253#endif 244#endif
254 245
255#ifndef CONFIG_INLINE_SPIN_LOCK 246#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
256void __lockfunc _spin_lock(spinlock_t *lock) 247void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
257{ 248{
258 __spin_lock(lock); 249 __raw_read_unlock_irqrestore(lock, flags);
259} 250}
260EXPORT_SYMBOL(_spin_lock); 251EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
261#endif 252#endif
262 253
263#ifndef CONFIG_INLINE_WRITE_LOCK 254#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
264void __lockfunc _write_lock(rwlock_t *lock) 255void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
265{ 256{
266 __write_lock(lock); 257 __raw_read_unlock_irq(lock);
267} 258}
268EXPORT_SYMBOL(_write_lock); 259EXPORT_SYMBOL(_raw_read_unlock_irq);
269#endif 260#endif
270 261
271#ifndef CONFIG_INLINE_SPIN_UNLOCK 262#ifndef CONFIG_INLINE_READ_UNLOCK_BH
272void __lockfunc _spin_unlock(spinlock_t *lock) 263void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
273{ 264{
274 __spin_unlock(lock); 265 __raw_read_unlock_bh(lock);
275} 266}
276EXPORT_SYMBOL(_spin_unlock); 267EXPORT_SYMBOL(_raw_read_unlock_bh);
277#endif 268#endif
278 269
279#ifndef CONFIG_INLINE_WRITE_UNLOCK 270#ifndef CONFIG_INLINE_WRITE_TRYLOCK
280void __lockfunc _write_unlock(rwlock_t *lock) 271int __lockfunc _raw_write_trylock(rwlock_t *lock)
281{ 272{
282 __write_unlock(lock); 273 return __raw_write_trylock(lock);
283} 274}
284EXPORT_SYMBOL(_write_unlock); 275EXPORT_SYMBOL(_raw_write_trylock);
285#endif 276#endif
286 277
287#ifndef CONFIG_INLINE_READ_UNLOCK 278#ifndef CONFIG_INLINE_WRITE_LOCK
288void __lockfunc _read_unlock(rwlock_t *lock) 279void __lockfunc _raw_write_lock(rwlock_t *lock)
289{ 280{
290 __read_unlock(lock); 281 __raw_write_lock(lock);
291} 282}
292EXPORT_SYMBOL(_read_unlock); 283EXPORT_SYMBOL(_raw_write_lock);
293#endif 284#endif
294 285
295#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE 286#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
296void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) 287unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
297{ 288{
298 __spin_unlock_irqrestore(lock, flags); 289 return __raw_write_lock_irqsave(lock);
299} 290}
300EXPORT_SYMBOL(_spin_unlock_irqrestore); 291EXPORT_SYMBOL(_raw_write_lock_irqsave);
301#endif 292#endif
302 293
303#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ 294#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
304void __lockfunc _spin_unlock_irq(spinlock_t *lock) 295void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
305{ 296{
306 __spin_unlock_irq(lock); 297 __raw_write_lock_irq(lock);
307} 298}
308EXPORT_SYMBOL(_spin_unlock_irq); 299EXPORT_SYMBOL(_raw_write_lock_irq);
309#endif 300#endif
310 301
311#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH 302#ifndef CONFIG_INLINE_WRITE_LOCK_BH
312void __lockfunc _spin_unlock_bh(spinlock_t *lock) 303void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
313{ 304{
314 __spin_unlock_bh(lock); 305 __raw_write_lock_bh(lock);
315} 306}
316EXPORT_SYMBOL(_spin_unlock_bh); 307EXPORT_SYMBOL(_raw_write_lock_bh);
317#endif 308#endif
318 309
319#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE 310#ifndef CONFIG_INLINE_WRITE_UNLOCK
320void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 311void __lockfunc _raw_write_unlock(rwlock_t *lock)
321{ 312{
322 __read_unlock_irqrestore(lock, flags); 313 __raw_write_unlock(lock);
323} 314}
324EXPORT_SYMBOL(_read_unlock_irqrestore); 315EXPORT_SYMBOL(_raw_write_unlock);
325#endif 316#endif
326 317
327#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ 318#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
328void __lockfunc _read_unlock_irq(rwlock_t *lock) 319void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
329{ 320{
330 __read_unlock_irq(lock); 321 __raw_write_unlock_irqrestore(lock, flags);
331} 322}
332EXPORT_SYMBOL(_read_unlock_irq); 323EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
333#endif 324#endif
334 325
335#ifndef CONFIG_INLINE_READ_UNLOCK_BH 326#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
336void __lockfunc _read_unlock_bh(rwlock_t *lock) 327void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
337{ 328{
338 __read_unlock_bh(lock); 329 __raw_write_unlock_irq(lock);
339} 330}
340EXPORT_SYMBOL(_read_unlock_bh); 331EXPORT_SYMBOL(_raw_write_unlock_irq);
341#endif 332#endif
342 333
343#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE 334#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
344void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 335void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
345{ 336{
346 __write_unlock_irqrestore(lock, flags); 337 __raw_write_unlock_bh(lock);
347} 338}
348EXPORT_SYMBOL(_write_unlock_irqrestore); 339EXPORT_SYMBOL(_raw_write_unlock_bh);
349#endif 340#endif
350 341
351#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ 342#ifdef CONFIG_DEBUG_LOCK_ALLOC
352void __lockfunc _write_unlock_irq(rwlock_t *lock) 343
344void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
353{ 345{
354 __write_unlock_irq(lock); 346 preempt_disable();
347 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
348 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
355} 349}
356EXPORT_SYMBOL(_write_unlock_irq); 350EXPORT_SYMBOL(_raw_spin_lock_nested);
357#endif
358 351
359#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH 352unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
360void __lockfunc _write_unlock_bh(rwlock_t *lock) 353 int subclass)
361{ 354{
362 __write_unlock_bh(lock); 355 unsigned long flags;
356
357 local_irq_save(flags);
358 preempt_disable();
359 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
360 LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
361 do_raw_spin_lock_flags, &flags);
362 return flags;
363} 363}
364EXPORT_SYMBOL(_write_unlock_bh); 364EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
365#endif
366 365
367#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH 366void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
368int __lockfunc _spin_trylock_bh(spinlock_t *lock) 367 struct lockdep_map *nest_lock)
369{ 368{
370 return __spin_trylock_bh(lock); 369 preempt_disable();
370 spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
371 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
371} 372}
372EXPORT_SYMBOL(_spin_trylock_bh); 373EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
374
373#endif 375#endif
374 376
375notrace int in_lock_functions(unsigned long addr) 377notrace int in_lock_functions(unsigned long addr)
diff --git a/kernel/sys.c b/kernel/sys.c
index 585d6cd10040..26a6b73a6b85 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -162,6 +162,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
162 if (niceval > 19) 162 if (niceval > 19)
163 niceval = 19; 163 niceval = 19;
164 164
165 rcu_read_lock();
165 read_lock(&tasklist_lock); 166 read_lock(&tasklist_lock);
166 switch (which) { 167 switch (which) {
167 case PRIO_PROCESS: 168 case PRIO_PROCESS:
@@ -189,16 +190,17 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
189 !(user = find_user(who))) 190 !(user = find_user(who)))
190 goto out_unlock; /* No processes for this user */ 191 goto out_unlock; /* No processes for this user */
191 192
192 do_each_thread(g, p) 193 do_each_thread(g, p) {
193 if (__task_cred(p)->uid == who) 194 if (__task_cred(p)->uid == who)
194 error = set_one_prio(p, niceval, error); 195 error = set_one_prio(p, niceval, error);
195 while_each_thread(g, p); 196 } while_each_thread(g, p);
196 if (who != cred->uid) 197 if (who != cred->uid)
197 free_uid(user); /* For find_user() */ 198 free_uid(user); /* For find_user() */
198 break; 199 break;
199 } 200 }
200out_unlock: 201out_unlock:
201 read_unlock(&tasklist_lock); 202 read_unlock(&tasklist_lock);
203 rcu_read_unlock();
202out: 204out:
203 return error; 205 return error;
204} 206}
@@ -252,13 +254,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
252 !(user = find_user(who))) 254 !(user = find_user(who)))
253 goto out_unlock; /* No processes for this user */ 255 goto out_unlock; /* No processes for this user */
254 256
255 do_each_thread(g, p) 257 do_each_thread(g, p) {
256 if (__task_cred(p)->uid == who) { 258 if (__task_cred(p)->uid == who) {
257 niceval = 20 - task_nice(p); 259 niceval = 20 - task_nice(p);
258 if (niceval > retval) 260 if (niceval > retval)
259 retval = niceval; 261 retval = niceval;
260 } 262 }
261 while_each_thread(g, p); 263 } while_each_thread(g, p);
262 if (who != cred->uid) 264 if (who != cred->uid)
263 free_uid(user); /* for find_user() */ 265 free_uid(user); /* for find_user() */
264 break; 266 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 554ac4894f0f..8a68b2448468 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1051,7 +1051,7 @@ static struct ctl_table vm_table[] = {
1051 .extra2 = &one_hundred, 1051 .extra2 = &one_hundred,
1052 }, 1052 },
1053#ifdef CONFIG_HUGETLB_PAGE 1053#ifdef CONFIG_HUGETLB_PAGE
1054 { 1054 {
1055 .procname = "nr_hugepages", 1055 .procname = "nr_hugepages",
1056 .data = NULL, 1056 .data = NULL,
1057 .maxlen = sizeof(unsigned long), 1057 .maxlen = sizeof(unsigned long),
@@ -1059,7 +1059,18 @@ static struct ctl_table vm_table[] = {
1059 .proc_handler = hugetlb_sysctl_handler, 1059 .proc_handler = hugetlb_sysctl_handler,
1060 .extra1 = (void *)&hugetlb_zero, 1060 .extra1 = (void *)&hugetlb_zero,
1061 .extra2 = (void *)&hugetlb_infinity, 1061 .extra2 = (void *)&hugetlb_infinity,
1062 }, 1062 },
1063#ifdef CONFIG_NUMA
1064 {
1065 .procname = "nr_hugepages_mempolicy",
1066 .data = NULL,
1067 .maxlen = sizeof(unsigned long),
1068 .mode = 0644,
1069 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1070 .extra1 = (void *)&hugetlb_zero,
1071 .extra2 = (void *)&hugetlb_infinity,
1072 },
1073#endif
1063 { 1074 {
1064 .procname = "hugetlb_shm_group", 1075 .procname = "hugetlb_shm_group",
1065 .data = &sysctl_hugetlb_shm_group, 1076 .data = &sysctl_hugetlb_shm_group,
@@ -1120,7 +1131,8 @@ static struct ctl_table vm_table[] = {
1120 .data = &sysctl_max_map_count, 1131 .data = &sysctl_max_map_count,
1121 .maxlen = sizeof(sysctl_max_map_count), 1132 .maxlen = sizeof(sysctl_max_map_count),
1122 .mode = 0644, 1133 .mode = 0644,
1123 .proc_handler = proc_dointvec 1134 .proc_handler = proc_dointvec_minmax,
1135 .extra1 = &zero,
1124 }, 1136 },
1125#else 1137#else
1126 { 1138 {
@@ -1202,6 +1214,7 @@ static struct ctl_table vm_table[] = {
1202 .proc_handler = proc_dointvec_jiffies, 1214 .proc_handler = proc_dointvec_jiffies,
1203 }, 1215 },
1204#endif 1216#endif
1217#ifdef CONFIG_MMU
1205 { 1218 {
1206 .procname = "mmap_min_addr", 1219 .procname = "mmap_min_addr",
1207 .data = &dac_mmap_min_addr, 1220 .data = &dac_mmap_min_addr,
@@ -1209,6 +1222,7 @@ static struct ctl_table vm_table[] = {
1209 .mode = 0644, 1222 .mode = 0644,
1210 .proc_handler = mmap_min_addr_handler, 1223 .proc_handler = mmap_min_addr_handler,
1211 }, 1224 },
1225#endif
1212#ifdef CONFIG_NUMA 1226#ifdef CONFIG_NUMA
1213 { 1227 {
1214 .procname = "numa_zonelist_order", 1228 .procname = "numa_zonelist_order",
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b75dbf40f573..8f5d16e0707a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1399,6 +1399,13 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
1399{ 1399{
1400 int i; 1400 int i;
1401 1401
1402 /*
1403 * CTL_KERN/KERN_VERSION is used by older glibc and cannot
1404 * ever go away.
1405 */
1406 if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
1407 return;
1408
1402 if (printk_ratelimit()) { 1409 if (printk_ratelimit()) {
1403 printk(KERN_INFO 1410 printk(KERN_INFO
1404 "warning: process `%s' used the deprecated sysctl " 1411 "warning: process `%s' used the deprecated sysctl "
@@ -1410,6 +1417,35 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
1410 return; 1417 return;
1411} 1418}
1412 1419
1420#define WARN_ONCE_HASH_BITS 8
1421#define WARN_ONCE_HASH_SIZE (1<<WARN_ONCE_HASH_BITS)
1422
1423static DECLARE_BITMAP(warn_once_bitmap, WARN_ONCE_HASH_SIZE);
1424
1425#define FNV32_OFFSET 2166136261U
1426#define FNV32_PRIME 0x01000193
1427
1428/*
1429 * Print each legacy sysctl (approximately) only once.
1430 * To avoid making the tables non-const use a external
1431 * hash-table instead.
1432 * Worst case hash collision: 6, but very rarely.
1433 * NOTE! We don't use the SMP-safe bit tests. We simply
1434 * don't care enough.
1435 */
1436static void warn_on_bintable(const int *name, int nlen)
1437{
1438 int i;
1439 u32 hash = FNV32_OFFSET;
1440
1441 for (i = 0; i < nlen; i++)
1442 hash = (hash ^ name[i]) * FNV32_PRIME;
1443 hash %= WARN_ONCE_HASH_SIZE;
1444 if (__test_and_set_bit(hash, warn_once_bitmap))
1445 return;
1446 deprecated_sysctl_warning(name, nlen);
1447}
1448
1413static ssize_t do_sysctl(int __user *args_name, int nlen, 1449static ssize_t do_sysctl(int __user *args_name, int nlen,
1414 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) 1450 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1415{ 1451{
@@ -1424,7 +1460,7 @@ static ssize_t do_sysctl(int __user *args_name, int nlen,
1424 if (get_user(name[i], args_name + i)) 1460 if (get_user(name[i], args_name + i))
1425 return -EFAULT; 1461 return -EFAULT;
1426 1462
1427 deprecated_sysctl_warning(name, nlen); 1463 warn_on_bintable(name, nlen);
1428 1464
1429 return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen); 1465 return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
1430} 1466}
diff --git a/kernel/time.c b/kernel/time.c
index c6324d96009e..804798005d19 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -136,6 +136,7 @@ static inline void warp_clock(void)
136 write_seqlock_irq(&xtime_lock); 136 write_seqlock_irq(&xtime_lock);
137 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 137 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
138 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 138 xtime.tv_sec += sys_tz.tz_minuteswest * 60;
139 update_xtime_cache(0);
139 write_sequnlock_irq(&xtime_lock); 140 write_sequnlock_irq(&xtime_lock);
140 clock_was_set(); 141 clock_was_set();
141} 142}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 20a8920029ee..6f740d9f0948 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -30,7 +30,7 @@ static LIST_HEAD(clockevents_released);
30static RAW_NOTIFIER_HEAD(clockevents_chain); 30static RAW_NOTIFIER_HEAD(clockevents_chain);
31 31
32/* Protection for the above */ 32/* Protection for the above */
33static DEFINE_SPINLOCK(clockevents_lock); 33static DEFINE_RAW_SPINLOCK(clockevents_lock);
34 34
35/** 35/**
36 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds 36 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -141,9 +141,9 @@ int clockevents_register_notifier(struct notifier_block *nb)
141 unsigned long flags; 141 unsigned long flags;
142 int ret; 142 int ret;
143 143
144 spin_lock_irqsave(&clockevents_lock, flags); 144 raw_spin_lock_irqsave(&clockevents_lock, flags);
145 ret = raw_notifier_chain_register(&clockevents_chain, nb); 145 ret = raw_notifier_chain_register(&clockevents_chain, nb);
146 spin_unlock_irqrestore(&clockevents_lock, flags); 146 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
147 147
148 return ret; 148 return ret;
149} 149}
@@ -185,13 +185,13 @@ void clockevents_register_device(struct clock_event_device *dev)
185 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 185 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
186 BUG_ON(!dev->cpumask); 186 BUG_ON(!dev->cpumask);
187 187
188 spin_lock_irqsave(&clockevents_lock, flags); 188 raw_spin_lock_irqsave(&clockevents_lock, flags);
189 189
190 list_add(&dev->list, &clockevent_devices); 190 list_add(&dev->list, &clockevent_devices);
191 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); 191 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
192 clockevents_notify_released(); 192 clockevents_notify_released();
193 193
194 spin_unlock_irqrestore(&clockevents_lock, flags); 194 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
195} 195}
196EXPORT_SYMBOL_GPL(clockevents_register_device); 196EXPORT_SYMBOL_GPL(clockevents_register_device);
197 197
@@ -238,10 +238,11 @@ void clockevents_exchange_device(struct clock_event_device *old,
238 */ 238 */
239void clockevents_notify(unsigned long reason, void *arg) 239void clockevents_notify(unsigned long reason, void *arg)
240{ 240{
241 struct list_head *node, *tmp; 241 struct clock_event_device *dev, *tmp;
242 unsigned long flags; 242 unsigned long flags;
243 int cpu;
243 244
244 spin_lock_irqsave(&clockevents_lock, flags); 245 raw_spin_lock_irqsave(&clockevents_lock, flags);
245 clockevents_do_notify(reason, arg); 246 clockevents_do_notify(reason, arg);
246 247
247 switch (reason) { 248 switch (reason) {
@@ -250,13 +251,24 @@ void clockevents_notify(unsigned long reason, void *arg)
250 * Unregister the clock event devices which were 251 * Unregister the clock event devices which were
251 * released from the users in the notify chain. 252 * released from the users in the notify chain.
252 */ 253 */
253 list_for_each_safe(node, tmp, &clockevents_released) 254 list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
254 list_del(node); 255 list_del(&dev->list);
256 /*
257 * Now check whether the CPU has left unused per cpu devices
258 */
259 cpu = *((int *)arg);
260 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
261 if (cpumask_test_cpu(cpu, dev->cpumask) &&
262 cpumask_weight(dev->cpumask) == 1) {
263 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
264 list_del(&dev->list);
265 }
266 }
255 break; 267 break;
256 default: 268 default:
257 break; 269 break;
258 } 270 }
259 spin_unlock_irqrestore(&clockevents_lock, flags); 271 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
260} 272}
261EXPORT_SYMBOL_GPL(clockevents_notify); 273EXPORT_SYMBOL_GPL(clockevents_notify);
262#endif 274#endif
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index c2ec25087a35..b3bafd5fc66d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31/* FIXME: Use cpumask_var_t. */
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33static DECLARE_BITMAP(tmpmask, NR_CPUS); 33static DECLARE_BITMAP(tmpmask, NR_CPUS);
34static DEFINE_SPINLOCK(tick_broadcast_lock); 34static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35static int tick_broadcast_force; 35static int tick_broadcast_force;
36 36
37#ifdef CONFIG_TICK_ONESHOT 37#ifdef CONFIG_TICK_ONESHOT
@@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
96 unsigned long flags; 96 unsigned long flags;
97 int ret = 0; 97 int ret = 0;
98 98
99 spin_lock_irqsave(&tick_broadcast_lock, flags); 99 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
100 100
101 /* 101 /*
102 * Devices might be registered with both periodic and oneshot 102 * Devices might be registered with both periodic and oneshot
@@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
122 tick_broadcast_clear_oneshot(cpu); 122 tick_broadcast_clear_oneshot(cpu);
123 } 123 }
124 } 124 }
125 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 125 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
126 return ret; 126 return ret;
127} 127}
128 128
@@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask)
161 */ 161 */
162static void tick_do_periodic_broadcast(void) 162static void tick_do_periodic_broadcast(void)
163{ 163{
164 spin_lock(&tick_broadcast_lock); 164 raw_spin_lock(&tick_broadcast_lock);
165 165
166 cpumask_and(to_cpumask(tmpmask), 166 cpumask_and(to_cpumask(tmpmask),
167 cpu_online_mask, tick_get_broadcast_mask()); 167 cpu_online_mask, tick_get_broadcast_mask());
168 tick_do_broadcast(to_cpumask(tmpmask)); 168 tick_do_broadcast(to_cpumask(tmpmask));
169 169
170 spin_unlock(&tick_broadcast_lock); 170 raw_spin_unlock(&tick_broadcast_lock);
171} 171}
172 172
173/* 173/*
@@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
212 unsigned long flags; 212 unsigned long flags;
213 int cpu, bc_stopped; 213 int cpu, bc_stopped;
214 214
215 spin_lock_irqsave(&tick_broadcast_lock, flags); 215 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
216 216
217 cpu = smp_processor_id(); 217 cpu = smp_processor_id();
218 td = &per_cpu(tick_cpu_device, cpu); 218 td = &per_cpu(tick_cpu_device, cpu);
@@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
263 tick_broadcast_setup_oneshot(bc); 263 tick_broadcast_setup_oneshot(bc);
264 } 264 }
265out: 265out:
266 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 266 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
267} 267}
268 268
269/* 269/*
@@ -299,7 +299,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
299 unsigned long flags; 299 unsigned long flags;
300 unsigned int cpu = *cpup; 300 unsigned int cpu = *cpup;
301 301
302 spin_lock_irqsave(&tick_broadcast_lock, flags); 302 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
303 303
304 bc = tick_broadcast_device.evtdev; 304 bc = tick_broadcast_device.evtdev;
305 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 305 cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
@@ -309,7 +309,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
309 clockevents_shutdown(bc); 309 clockevents_shutdown(bc);
310 } 310 }
311 311
312 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 312 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
313} 313}
314 314
315void tick_suspend_broadcast(void) 315void tick_suspend_broadcast(void)
@@ -317,13 +317,13 @@ void tick_suspend_broadcast(void)
317 struct clock_event_device *bc; 317 struct clock_event_device *bc;
318 unsigned long flags; 318 unsigned long flags;
319 319
320 spin_lock_irqsave(&tick_broadcast_lock, flags); 320 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
321 321
322 bc = tick_broadcast_device.evtdev; 322 bc = tick_broadcast_device.evtdev;
323 if (bc) 323 if (bc)
324 clockevents_shutdown(bc); 324 clockevents_shutdown(bc);
325 325
326 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 326 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
327} 327}
328 328
329int tick_resume_broadcast(void) 329int tick_resume_broadcast(void)
@@ -332,7 +332,7 @@ int tick_resume_broadcast(void)
332 unsigned long flags; 332 unsigned long flags;
333 int broadcast = 0; 333 int broadcast = 0;
334 334
335 spin_lock_irqsave(&tick_broadcast_lock, flags); 335 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
336 336
337 bc = tick_broadcast_device.evtdev; 337 bc = tick_broadcast_device.evtdev;
338 338
@@ -351,7 +351,7 @@ int tick_resume_broadcast(void)
351 break; 351 break;
352 } 352 }
353 } 353 }
354 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 354 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
355 355
356 return broadcast; 356 return broadcast;
357} 357}
@@ -405,7 +405,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
405 ktime_t now, next_event; 405 ktime_t now, next_event;
406 int cpu; 406 int cpu;
407 407
408 spin_lock(&tick_broadcast_lock); 408 raw_spin_lock(&tick_broadcast_lock);
409again: 409again:
410 dev->next_event.tv64 = KTIME_MAX; 410 dev->next_event.tv64 = KTIME_MAX;
411 next_event.tv64 = KTIME_MAX; 411 next_event.tv64 = KTIME_MAX;
@@ -443,7 +443,7 @@ again:
443 if (tick_broadcast_set_event(next_event, 0)) 443 if (tick_broadcast_set_event(next_event, 0))
444 goto again; 444 goto again;
445 } 445 }
446 spin_unlock(&tick_broadcast_lock); 446 raw_spin_unlock(&tick_broadcast_lock);
447} 447}
448 448
449/* 449/*
@@ -457,7 +457,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
457 unsigned long flags; 457 unsigned long flags;
458 int cpu; 458 int cpu;
459 459
460 spin_lock_irqsave(&tick_broadcast_lock, flags); 460 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
461 461
462 /* 462 /*
463 * Periodic mode does not care about the enter/exit of power 463 * Periodic mode does not care about the enter/exit of power
@@ -492,7 +492,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
492 } 492 }
493 493
494out: 494out:
495 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 495 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
496} 496}
497 497
498/* 498/*
@@ -563,13 +563,13 @@ void tick_broadcast_switch_to_oneshot(void)
563 struct clock_event_device *bc; 563 struct clock_event_device *bc;
564 unsigned long flags; 564 unsigned long flags;
565 565
566 spin_lock_irqsave(&tick_broadcast_lock, flags); 566 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
567 567
568 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 568 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
569 bc = tick_broadcast_device.evtdev; 569 bc = tick_broadcast_device.evtdev;
570 if (bc) 570 if (bc)
571 tick_broadcast_setup_oneshot(bc); 571 tick_broadcast_setup_oneshot(bc);
572 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 572 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
573} 573}
574 574
575 575
@@ -581,7 +581,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
581 unsigned long flags; 581 unsigned long flags;
582 unsigned int cpu = *cpup; 582 unsigned int cpu = *cpup;
583 583
584 spin_lock_irqsave(&tick_broadcast_lock, flags); 584 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
585 585
586 /* 586 /*
587 * Clear the broadcast mask flag for the dead cpu, but do not 587 * Clear the broadcast mask flag for the dead cpu, but do not
@@ -589,7 +589,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
589 */ 589 */
590 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 590 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
591 591
592 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 592 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
593} 593}
594 594
595/* 595/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 83c4417b6a3c..b6b898d2eeef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
34ktime_t tick_next_period; 34ktime_t tick_next_period;
35ktime_t tick_period; 35ktime_t tick_period;
36int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; 36int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
37DEFINE_SPINLOCK(tick_device_lock); 37static DEFINE_RAW_SPINLOCK(tick_device_lock);
38 38
39/* 39/*
40 * Debugging: see timer_list.c 40 * Debugging: see timer_list.c
@@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
209 int cpu, ret = NOTIFY_OK; 209 int cpu, ret = NOTIFY_OK;
210 unsigned long flags; 210 unsigned long flags;
211 211
212 spin_lock_irqsave(&tick_device_lock, flags); 212 raw_spin_lock_irqsave(&tick_device_lock, flags);
213 213
214 cpu = smp_processor_id(); 214 cpu = smp_processor_id();
215 if (!cpumask_test_cpu(cpu, newdev->cpumask)) 215 if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
268 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 268 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
269 tick_oneshot_notify(); 269 tick_oneshot_notify();
270 270
271 spin_unlock_irqrestore(&tick_device_lock, flags); 271 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
272 return NOTIFY_STOP; 272 return NOTIFY_STOP;
273 273
274out_bc: 274out_bc:
@@ -278,7 +278,7 @@ out_bc:
278 if (tick_check_broadcast_device(newdev)) 278 if (tick_check_broadcast_device(newdev))
279 ret = NOTIFY_STOP; 279 ret = NOTIFY_STOP;
280 280
281 spin_unlock_irqrestore(&tick_device_lock, flags); 281 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
282 282
283 return ret; 283 return ret;
284} 284}
@@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup)
311 struct clock_event_device *dev = td->evtdev; 311 struct clock_event_device *dev = td->evtdev;
312 unsigned long flags; 312 unsigned long flags;
313 313
314 spin_lock_irqsave(&tick_device_lock, flags); 314 raw_spin_lock_irqsave(&tick_device_lock, flags);
315 td->mode = TICKDEV_MODE_PERIODIC; 315 td->mode = TICKDEV_MODE_PERIODIC;
316 if (dev) { 316 if (dev) {
317 /* 317 /*
@@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup)
322 clockevents_exchange_device(dev, NULL); 322 clockevents_exchange_device(dev, NULL);
323 td->evtdev = NULL; 323 td->evtdev = NULL;
324 } 324 }
325 spin_unlock_irqrestore(&tick_device_lock, flags); 325 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
326} 326}
327 327
328static void tick_suspend(void) 328static void tick_suspend(void)
@@ -330,9 +330,9 @@ static void tick_suspend(void)
330 struct tick_device *td = &__get_cpu_var(tick_cpu_device); 330 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
331 unsigned long flags; 331 unsigned long flags;
332 332
333 spin_lock_irqsave(&tick_device_lock, flags); 333 raw_spin_lock_irqsave(&tick_device_lock, flags);
334 clockevents_shutdown(td->evtdev); 334 clockevents_shutdown(td->evtdev);
335 spin_unlock_irqrestore(&tick_device_lock, flags); 335 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
336} 336}
337 337
338static void tick_resume(void) 338static void tick_resume(void)
@@ -341,7 +341,7 @@ static void tick_resume(void)
341 unsigned long flags; 341 unsigned long flags;
342 int broadcast = tick_resume_broadcast(); 342 int broadcast = tick_resume_broadcast();
343 343
344 spin_lock_irqsave(&tick_device_lock, flags); 344 raw_spin_lock_irqsave(&tick_device_lock, flags);
345 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); 345 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
346 346
347 if (!broadcast) { 347 if (!broadcast) {
@@ -350,7 +350,7 @@ static void tick_resume(void)
350 else 350 else
351 tick_resume_oneshot(); 351 tick_resume_oneshot();
352 } 352 }
353 spin_unlock_irqrestore(&tick_device_lock, flags); 353 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
354} 354}
355 355
356/* 356/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b1c05bf75ee0..290eefbc1f60 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,7 +6,6 @@
6#define TICK_DO_TIMER_BOOT -2 6#define TICK_DO_TIMER_BOOT -2
7 7
8DECLARE_PER_CPU(struct tick_device, tick_cpu_device); 8DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
9extern spinlock_t tick_device_lock;
10extern ktime_t tick_next_period; 9extern ktime_t tick_next_period;
11extern ktime_t tick_period; 10extern ktime_t tick_period;
12extern int tick_do_timer_cpu __read_mostly; 11extern int tick_do_timer_cpu __read_mostly;
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 96ff643a5a59..12f5c55090be 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync,
89 * source time 89 * source time
90 */ 90 */
91 sample.offset = 91 sample.offset =
92 ktime_to_ns(ktime_add(end, start)) / 2 - 92 (ktime_to_ns(end) + ktime_to_ns(start)) / 2 -
93 ts; 93 ts;
94 94
95 /* simple insertion sort based on duration */ 95 /* simple insertion sort based on duration */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index af4135f05825..7faaa32fbf4f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,6 +165,13 @@ struct timespec raw_time;
165/* flag for if timekeeping is suspended */ 165/* flag for if timekeeping is suspended */
166int __read_mostly timekeeping_suspended; 166int __read_mostly timekeeping_suspended;
167 167
168static struct timespec xtime_cache __attribute__ ((aligned (16)));
169void update_xtime_cache(u64 nsec)
170{
171 xtime_cache = xtime;
172 timespec_add_ns(&xtime_cache, nsec);
173}
174
168/* must hold xtime_lock */ 175/* must hold xtime_lock */
169void timekeeping_leap_insert(int leapsecond) 176void timekeeping_leap_insert(int leapsecond)
170{ 177{
@@ -325,6 +332,8 @@ int do_settimeofday(struct timespec *tv)
325 332
326 xtime = *tv; 333 xtime = *tv;
327 334
335 update_xtime_cache(0);
336
328 timekeeper.ntp_error = 0; 337 timekeeper.ntp_error = 0;
329 ntp_clear(); 338 ntp_clear();
330 339
@@ -550,6 +559,7 @@ void __init timekeeping_init(void)
550 } 559 }
551 set_normalized_timespec(&wall_to_monotonic, 560 set_normalized_timespec(&wall_to_monotonic,
552 -boot.tv_sec, -boot.tv_nsec); 561 -boot.tv_sec, -boot.tv_nsec);
562 update_xtime_cache(0);
553 total_sleep_time.tv_sec = 0; 563 total_sleep_time.tv_sec = 0;
554 total_sleep_time.tv_nsec = 0; 564 total_sleep_time.tv_nsec = 0;
555 write_sequnlock_irqrestore(&xtime_lock, flags); 565 write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -583,6 +593,7 @@ static int timekeeping_resume(struct sys_device *dev)
583 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); 593 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
584 total_sleep_time = timespec_add_safe(total_sleep_time, ts); 594 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
585 } 595 }
596 update_xtime_cache(0);
586 /* re-base the last cycle value */ 597 /* re-base the last cycle value */
587 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 598 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
588 timekeeper.ntp_error = 0; 599 timekeeper.ntp_error = 0;
@@ -722,6 +733,7 @@ static void timekeeping_adjust(s64 offset)
722 timekeeper.ntp_error_shift; 733 timekeeper.ntp_error_shift;
723} 734}
724 735
736
725/** 737/**
726 * logarithmic_accumulation - shifted accumulation of cycles 738 * logarithmic_accumulation - shifted accumulation of cycles
727 * 739 *
@@ -765,6 +777,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
765 return offset; 777 return offset;
766} 778}
767 779
780
768/** 781/**
769 * update_wall_time - Uses the current clocksource to increment the wall time 782 * update_wall_time - Uses the current clocksource to increment the wall time
770 * 783 *
@@ -774,6 +787,7 @@ void update_wall_time(void)
774{ 787{
775 struct clocksource *clock; 788 struct clocksource *clock;
776 cycle_t offset; 789 cycle_t offset;
790 u64 nsecs;
777 int shift = 0, maxshift; 791 int shift = 0, maxshift;
778 792
779 /* Make sure we're fully resumed: */ 793 /* Make sure we're fully resumed: */
@@ -839,6 +853,9 @@ void update_wall_time(void)
839 timekeeper.ntp_error += timekeeper.xtime_nsec << 853 timekeeper.ntp_error += timekeeper.xtime_nsec <<
840 timekeeper.ntp_error_shift; 854 timekeeper.ntp_error_shift;
841 855
856 nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
857 update_xtime_cache(nsecs);
858
842 /* check to see if there is a new clocksource to use */ 859 /* check to see if there is a new clocksource to use */
843 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); 860 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
844} 861}
@@ -875,13 +892,13 @@ void monotonic_to_bootbased(struct timespec *ts)
875 892
876unsigned long get_seconds(void) 893unsigned long get_seconds(void)
877{ 894{
878 return xtime.tv_sec; 895 return xtime_cache.tv_sec;
879} 896}
880EXPORT_SYMBOL(get_seconds); 897EXPORT_SYMBOL(get_seconds);
881 898
882struct timespec __current_kernel_time(void) 899struct timespec __current_kernel_time(void)
883{ 900{
884 return xtime; 901 return xtime_cache;
885} 902}
886 903
887struct timespec current_kernel_time(void) 904struct timespec current_kernel_time(void)
@@ -891,7 +908,8 @@ struct timespec current_kernel_time(void)
891 908
892 do { 909 do {
893 seq = read_seqbegin(&xtime_lock); 910 seq = read_seqbegin(&xtime_lock);
894 now = xtime; 911
912 now = xtime_cache;
895 } while (read_seqretry(&xtime_lock, seq)); 913 } while (read_seqretry(&xtime_lock, seq));
896 914
897 return now; 915 return now;
@@ -905,7 +923,8 @@ struct timespec get_monotonic_coarse(void)
905 923
906 do { 924 do {
907 seq = read_seqbegin(&xtime_lock); 925 seq = read_seqbegin(&xtime_lock);
908 now = xtime; 926
927 now = xtime_cache;
909 mono = wall_to_monotonic; 928 mono = wall_to_monotonic;
910 } while (read_seqretry(&xtime_lock, seq)); 929 } while (read_seqretry(&xtime_lock, seq));
911 930
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 9d80db4747d4..bdfb8dd1050c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
84 84
85next_one: 85next_one:
86 i = 0; 86 i = 0;
87 spin_lock_irqsave(&base->cpu_base->lock, flags); 87 raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
88 88
89 curr = base->first; 89 curr = base->first;
90 /* 90 /*
@@ -100,13 +100,13 @@ next_one:
100 100
101 timer = rb_entry(curr, struct hrtimer, node); 101 timer = rb_entry(curr, struct hrtimer, node);
102 tmp = *timer; 102 tmp = *timer;
103 spin_unlock_irqrestore(&base->cpu_base->lock, flags); 103 raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
104 104
105 print_timer(m, timer, &tmp, i, now); 105 print_timer(m, timer, &tmp, i, now);
106 next++; 106 next++;
107 goto next_one; 107 goto next_one;
108 } 108 }
109 spin_unlock_irqrestore(&base->cpu_base->lock, flags); 109 raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
110} 110}
111 111
112static void 112static void
@@ -237,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
237#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 237#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
238 print_tickdevice(m, tick_get_broadcast_device(), -1); 238 print_tickdevice(m, tick_get_broadcast_device(), -1);
239 SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 239 SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
240 tick_get_broadcast_mask()->bits[0]); 240 cpumask_bits(tick_get_broadcast_mask())[0]);
241#ifdef CONFIG_TICK_ONESHOT 241#ifdef CONFIG_TICK_ONESHOT
242 SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", 242 SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
243 tick_get_broadcast_oneshot_mask()->bits[0]); 243 cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
244#endif 244#endif
245 SEQ_printf(m, "\n"); 245 SEQ_printf(m, "\n");
246#endif 246#endif
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 63b117e9eba1..2f3b585b8d7d 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock);
86/* 86/*
87 * Per-CPU lookup locks for fast hash lookup: 87 * Per-CPU lookup locks for fast hash lookup:
88 */ 88 */
89static DEFINE_PER_CPU(spinlock_t, tstats_lookup_lock); 89static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
90 90
91/* 91/*
92 * Mutex to serialize state changes with show-stats activities: 92 * Mutex to serialize state changes with show-stats activities:
@@ -238,7 +238,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
238 /* 238 /*
239 * It doesnt matter which lock we take: 239 * It doesnt matter which lock we take:
240 */ 240 */
241 spinlock_t *lock; 241 raw_spinlock_t *lock;
242 struct entry *entry, input; 242 struct entry *entry, input;
243 unsigned long flags; 243 unsigned long flags;
244 244
@@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
253 input.pid = pid; 253 input.pid = pid;
254 input.timer_flag = timer_flag; 254 input.timer_flag = timer_flag;
255 255
256 spin_lock_irqsave(lock, flags); 256 raw_spin_lock_irqsave(lock, flags);
257 if (!timer_stats_active) 257 if (!timer_stats_active)
258 goto out_unlock; 258 goto out_unlock;
259 259
@@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
264 atomic_inc(&overflow_count); 264 atomic_inc(&overflow_count);
265 265
266 out_unlock: 266 out_unlock:
267 spin_unlock_irqrestore(lock, flags); 267 raw_spin_unlock_irqrestore(lock, flags);
268} 268}
269 269
270static void print_name_offset(struct seq_file *m, unsigned long addr) 270static void print_name_offset(struct seq_file *m, unsigned long addr)
@@ -348,10 +348,11 @@ static void sync_access(void)
348 int cpu; 348 int cpu;
349 349
350 for_each_online_cpu(cpu) { 350 for_each_online_cpu(cpu) {
351 spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); 351 raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
352 spin_lock_irqsave(lock, flags); 352
353 raw_spin_lock_irqsave(lock, flags);
353 /* nothing */ 354 /* nothing */
354 spin_unlock_irqrestore(lock, flags); 355 raw_spin_unlock_irqrestore(lock, flags);
355 } 356 }
356} 357}
357 358
@@ -409,7 +410,7 @@ void __init init_timer_stats(void)
409 int cpu; 410 int cpu;
410 411
411 for_each_possible_cpu(cpu) 412 for_each_possible_cpu(cpu)
412 spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); 413 raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
413} 414}
414 415
415static int __init init_tstats_procfs(void) 416static int __init init_tstats_procfs(void)
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..c61a7949387f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
656 656
657 debug_activate(timer, expires); 657 debug_activate(timer, expires);
658 658
659 new_base = __get_cpu_var(tvec_bases);
660
661 cpu = smp_processor_id(); 659 cpu = smp_processor_id();
662 660
663#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 661#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
@@ -1200,6 +1198,7 @@ void update_process_times(int user_tick)
1200 run_local_timers(); 1198 run_local_timers();
1201 rcu_check_callbacks(cpu, user_tick); 1199 rcu_check_callbacks(cpu, user_tick);
1202 printk_tick(); 1200 printk_tick();
1201 perf_event_do_pending();
1203 scheduler_tick(); 1202 scheduler_tick();
1204 run_posix_cpu_timers(p); 1203 run_posix_cpu_timers(p);
1205} 1204}
@@ -1211,8 +1210,6 @@ static void run_timer_softirq(struct softirq_action *h)
1211{ 1210{
1212 struct tvec_base *base = __get_cpu_var(tvec_bases); 1211 struct tvec_base *base = __get_cpu_var(tvec_bases);
1213 1212
1214 perf_event_do_pending();
1215
1216 hrtimer_run_pending(); 1213 hrtimer_run_pending();
1217 1214
1218 if (time_after_eq(jiffies, base->timer_jiffies)) 1215 if (time_after_eq(jiffies, base->timer_jiffies))
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..6c22d8a2f289 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,17 +12,17 @@ config NOP_TRACER
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help 14 help
15 See Documentation/trace/ftrace-implementation.txt 15 See Documentation/trace/ftrace-design.txt
16 16
17config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
18 bool 18 bool
19 help 19 help
20 See Documentation/trace/ftrace-implementation.txt 20 See Documentation/trace/ftrace-design.txt
21 21
22config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
23 bool 23 bool
24 help 24 help
25 See Documentation/trace/ftrace-implementation.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 34config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 35 bool
36 help 36 help
37 See Documentation/trace/ftrace-implementation.txt 37 See Documentation/trace/ftrace-design.txt
38 38
39config HAVE_DYNAMIC_FTRACE 39config HAVE_DYNAMIC_FTRACE
40 bool 40 bool
41 help 41 help
42 See Documentation/trace/ftrace-implementation.txt 42 See Documentation/trace/ftrace-design.txt
43 43
44config HAVE_FTRACE_MCOUNT_RECORD 44config HAVE_FTRACE_MCOUNT_RECORD
45 bool 45 bool
46 help 46 help
47 See Documentation/trace/ftrace-implementation.txt 47 See Documentation/trace/ftrace-design.txt
48 48
49config HAVE_HW_BRANCH_TRACER 49config HAVE_HW_BRANCH_TRACER
50 bool 50 bool
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER
52config HAVE_SYSCALL_TRACEPOINTS 52config HAVE_SYSCALL_TRACEPOINTS
53 bool 53 bool
54 help 54 help
55 See Documentation/trace/ftrace-implementation.txt 55 See Documentation/trace/ftrace-design.txt
56 56
57config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
58 bool 58 bool
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 83# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 84# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options. 86# hiding of the automatic options.
87 87
88config TRACING 88config TRACING
89 bool 89 bool
@@ -119,7 +119,7 @@ menuconfig FTRACE
119 bool "Tracers" 119 bool "Tracers"
120 default y if DEBUG_KERNEL 120 default y if DEBUG_KERNEL
121 help 121 help
122 Enable the kernel tracing infrastructure. 122 Enable the kernel tracing infrastructure.
123 123
124if FTRACE 124if FTRACE
125 125
@@ -133,7 +133,7 @@ config FUNCTION_TRACER
133 help 133 help
134 Enable the kernel to trace every kernel function. This is done 134 Enable the kernel to trace every kernel function. This is done
135 by using a compiler feature to insert a small, 5-byte No-Operation 135 by using a compiler feature to insert a small, 5-byte No-Operation
136 instruction to the beginning of every kernel function, which NOP 136 instruction at the beginning of every kernel function, which NOP
137 sequence is then dynamically patched into a tracer call when 137 sequence is then dynamically patched into a tracer call when
138 tracing is enabled by the administrator. If it's runtime disabled 138 tracing is enabled by the administrator. If it's runtime disabled
139 (the bootup default), then the overhead of the instructions is very 139 (the bootup default), then the overhead of the instructions is very
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER
150 and its entry. 150 and its entry.
151 Its first purpose is to trace the duration of functions and 151 Its first purpose is to trace the duration of functions and
152 draw a call graph for each thread with some information like 152 draw a call graph for each thread with some information like
153 the return value. This is done by setting the current return 153 the return value. This is done by setting the current return
154 address on the current task structure into a stack of calls. 154 address on the current task structure into a stack of calls.
155 155
156 156
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER
173 173
174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
175 175
176 (Note that kernel size and overhead increases with this option 176 (Note that kernel size and overhead increase with this option
177 enabled. This option and the preempt-off timing option can be 177 enabled. This option and the preempt-off timing option can be
178 used together or separately.) 178 used together or separately.)
179 179
@@ -186,7 +186,7 @@ config PREEMPT_TRACER
186 select TRACER_MAX_TRACE 186 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP 187 select RING_BUFFER_ALLOW_SWAP
188 help 188 help
189 This option measures the time spent in preemption off critical 189 This option measures the time spent in preemption-off critical
190 sections, with microsecond accuracy. 190 sections, with microsecond accuracy.
191 191
192 The default measurement method is a maximum search, which is 192 The default measurement method is a maximum search, which is
@@ -195,7 +195,7 @@ config PREEMPT_TRACER
195 195
196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
197 197
198 (Note that kernel size and overhead increases with this option 198 (Note that kernel size and overhead increase with this option
199 enabled. This option and the irqs-off timing option can be 199 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 200 used together or separately.)
201 201
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS
222 depends on !GENERIC_TRACER 222 depends on !GENERIC_TRACER
223 select TRACING 223 select TRACING
224 help 224 help
225 This tracer hooks to various trace points in the kernel 225 This tracer hooks to various trace points in the kernel,
226 allowing the user to pick and choose which trace point they 226 allowing the user to pick and choose which trace point they
227 want to trace. It also includes the sched_switch tracer plugin. 227 want to trace. It also includes the sched_switch tracer plugin.
228 228
@@ -265,19 +265,19 @@ choice
265 The likely/unlikely profiler only looks at the conditions that 265 The likely/unlikely profiler only looks at the conditions that
266 are annotated with a likely or unlikely macro. 266 are annotated with a likely or unlikely macro.
267 267
268 The "all branch" profiler will profile every if statement in the 268 The "all branch" profiler will profile every if-statement in the
269 kernel. This profiler will also enable the likely/unlikely 269 kernel. This profiler will also enable the likely/unlikely
270 profiler as well. 270 profiler.
271 271
272 Either of the above profilers add a bit of overhead to the system. 272 Either of the above profilers adds a bit of overhead to the system.
273 If unsure choose "No branch profiling". 273 If unsure, choose "No branch profiling".
274 274
275config BRANCH_PROFILE_NONE 275config BRANCH_PROFILE_NONE
276 bool "No branch profiling" 276 bool "No branch profiling"
277 help 277 help
278 No branch profiling. Branch profiling adds a bit of overhead. 278 No branch profiling. Branch profiling adds a bit of overhead.
279 Only enable it if you want to analyse the branching behavior. 279 Only enable it if you want to analyse the branching behavior.
280 Otherwise keep it disabled. 280 Otherwise keep it disabled.
281 281
282config PROFILE_ANNOTATED_BRANCHES 282config PROFILE_ANNOTATED_BRANCHES
283 bool "Trace likely/unlikely profiler" 283 bool "Trace likely/unlikely profiler"
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES
288 288
289 /sys/kernel/debug/tracing/profile_annotated_branch 289 /sys/kernel/debug/tracing/profile_annotated_branch
290 290
291 Note: this will add a significant overhead, only turn this 291 Note: this will add a significant overhead; only turn this
292 on if you need to profile the system's use of these macros. 292 on if you need to profile the system's use of these macros.
293 293
294config PROFILE_ALL_BRANCHES 294config PROFILE_ALL_BRANCHES
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES
305 305
306 This configuration, when enabled, will impose a great overhead 306 This configuration, when enabled, will impose a great overhead
307 on the system. This should only be enabled when the system 307 on the system. This should only be enabled when the system
308 is to be analyzed 308 is to be analyzed in much detail.
309endchoice 309endchoice
310 310
311config TRACING_BRANCHES 311config TRACING_BRANCHES
@@ -335,7 +335,7 @@ config POWER_TRACER
335 depends on X86 335 depends on X86
336 select GENERIC_TRACER 336 select GENERIC_TRACER
337 help 337 help
338 This tracer helps developers to analyze and optimize the kernels 338 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER
391 select GENERIC_TRACER 391 select GENERIC_TRACER
392 help 392 help
393 This tracer records all branches on the system in a circular 393 This tracer records all branches on the system in a circular
394 buffer giving access to the last N branches for each cpu. 394 buffer, giving access to the last N branches for each cpu.
395 395
396config KMEMTRACE 396config KMEMTRACE
397 bool "Trace SLAB allocations" 397 bool "Trace SLAB allocations"
398 select GENERIC_TRACER 398 select GENERIC_TRACER
399 help 399 help
400 kmemtrace provides tracing for slab allocator functions, such as 400 kmemtrace provides tracing for slab allocator functions, such as
401 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 401 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
402 data is then fed to the userspace application in order to analyse 402 data is then fed to the userspace application in order to analyse
403 allocation hotspots, internal fragmentation and so on, making it 403 allocation hotspots, internal fragmentation and so on, making it
404 possible to see how well an allocator performs, as well as debug 404 possible to see how well an allocator performs, as well as debug
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER
417 bool "Trace workqueues" 417 bool "Trace workqueues"
418 select GENERIC_TRACER 418 select GENERIC_TRACER
419 help 419 help
420 The workqueue tracer provides some statistical informations 420 The workqueue tracer provides some statistical information
421 about each cpu workqueue thread such as the number of the 421 about each cpu workqueue thread such as the number of the
422 works inserted and executed since their creation. It can help 422 works inserted and executed since their creation. It can help
423 to evaluate the amount of work each of them have to perform. 423 to evaluate the amount of work each of them has to perform.
424 For example it can help a developer to decide whether he should 424 For example it can help a developer to decide whether he should
425 choose a per cpu workqueue instead of a singlethreaded one. 425 choose a per-cpu workqueue instead of a singlethreaded one.
426 426
427config BLK_DEV_IO_TRACE 427config BLK_DEV_IO_TRACE
428 bool "Support for tracing block io actions" 428 bool "Support for tracing block IO actions"
429 depends on SYSFS 429 depends on SYSFS
430 depends on BLOCK 430 depends on BLOCK
431 select RELAY 431 select RELAY
@@ -456,15 +456,15 @@ config KPROBE_EVENT
456 select TRACING 456 select TRACING
457 default y 457 default y
458 help 458 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly 459 This allows the user to add tracing events (similar to tracepoints)
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt 460 on the fly via the ftrace interface. See
461 for more details. 461 Documentation/trace/kprobetrace.txt for more details.
462 462
463 Those events can be inserted wherever kprobes can probe, and record 463 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values. 464 various register and memory values.
465 465
466 This option is also required by perf-probe subcommand of perf tools. If 466 This option is also required by perf-probe subcommand of perf tools.
467 you want to use perf tools, this option is strongly recommended. 467 If you want to use perf tools, this option is strongly recommended.
468 468
469config DYNAMIC_FTRACE 469config DYNAMIC_FTRACE
470 bool "enable/disable ftrace tracepoints dynamically" 470 bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE
472 depends on HAVE_DYNAMIC_FTRACE 472 depends on HAVE_DYNAMIC_FTRACE
473 default y 473 default y
474 help 474 help
475 This option will modify all the calls to ftrace dynamically 475 This option will modify all the calls to ftrace dynamically
476 (will patch them out of the binary image and replaces them 476 (will patch them out of the binary image and replace them
477 with a No-Op instruction) as they are called. A table is 477 with a No-Op instruction) as they are called. A table is
478 created to dynamically enable them again. 478 created to dynamically enable them again.
479 479
480 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 480 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
481 has native performance as long as no tracing is active. 481 otherwise has native performance as long as no tracing is active.
482 482
483 The changes to the code are done by a kernel thread that 483 The changes to the code are done by a kernel thread that
484 wakes up once a second and checks to see if any ftrace calls 484 wakes up once a second and checks to see if any ftrace calls
485 were made. If so, it runs stop_machine (stops all CPUS) 485 were made. If so, it runs stop_machine (stops all CPUS)
486 and modifies the code to jump over the call to ftrace. 486 and modifies the code to jump over the call to ftrace.
487 487
488config FUNCTION_PROFILER 488config FUNCTION_PROFILER
489 bool "Kernel function profiler" 489 bool "Kernel function profiler"
490 depends on FUNCTION_TRACER 490 depends on FUNCTION_TRACER
491 default n 491 default n
492 help 492 help
493 This option enables the kernel function profiler. A file is created 493 This option enables the kernel function profiler. A file is created
494 in debugfs called function_profile_enabled which defaults to zero. 494 in debugfs called function_profile_enabled which defaults to zero.
495 When a 1 is echoed into this file profiling begins, and when a 495 When a 1 is echoed into this file profiling begins, and when a
496 zero is entered, profiling stops. A file in the trace_stats 496 zero is entered, profiling stops. A "functions" file is created in
497 directory called functions, that show the list of functions that 497 the trace_stats directory; this file shows the list of functions that
498 have been hit and their counters. 498 have been hit and their counters.
499 499
500 If in doubt, say N 500 If in doubt, say N.
501 501
502config FTRACE_MCOUNT_RECORD 502config FTRACE_MCOUNT_RECORD
503 def_bool y 503 def_bool y
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK
556 tristate "Ring buffer benchmark stress tester" 556 tristate "Ring buffer benchmark stress tester"
557 depends on RING_BUFFER 557 depends on RING_BUFFER
558 help 558 help
559 This option creates a test to stress the ring buffer and bench mark it. 559 This option creates a test to stress the ring buffer and benchmark it.
560 It creates its own ring buffer such that it will not interfer with 560 It creates its own ring buffer such that it will not interfere with
561 any other users of the ring buffer (such as ftrace). It then creates 561 any other users of the ring buffer (such as ftrace). It then creates
562 a producer and consumer that will run for 10 seconds and sleep for 562 a producer and consumer that will run for 10 seconds and sleep for
563 10 seconds. Each interval it will print out the number of events 563 10 seconds. Each interval it will print out the number of events
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK
566 It does not disable interrupts or raise its priority, so it may be 566 It does not disable interrupts or raise its priority, so it may be
567 affected by processes that are running. 567 affected by processes that are running.
568 568
569 If unsure, say N 569 If unsure, say N.
570 570
571endif # FTRACE 571endif # FTRACE
572 572
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e51a1bcb7bed..1e6640f80454 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1690static int ftrace_match(char *str, char *regex, int len, int type) 1690static int ftrace_match(char *str, char *regex, int len, int type)
1691{ 1691{
1692 int matched = 0; 1692 int matched = 0;
1693 char *ptr; 1693 int slen;
1694 1694
1695 switch (type) { 1695 switch (type) {
1696 case MATCH_FULL: 1696 case MATCH_FULL:
@@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1706 matched = 1; 1706 matched = 1;
1707 break; 1707 break;
1708 case MATCH_END_ONLY: 1708 case MATCH_END_ONLY:
1709 ptr = strstr(str, regex); 1709 slen = strlen(str);
1710 if (ptr && (ptr[len] == 0)) 1710 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1711 matched = 1; 1711 matched = 1;
1712 break; 1712 break;
1713 } 1713 }
@@ -1724,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1724 return ftrace_match(str, regex, len, type); 1724 return ftrace_match(str, regex, len, type);
1725} 1725}
1726 1726
1727static void ftrace_match_records(char *buff, int len, int enable) 1727static int ftrace_match_records(char *buff, int len, int enable)
1728{ 1728{
1729 unsigned int search_len; 1729 unsigned int search_len;
1730 struct ftrace_page *pg; 1730 struct ftrace_page *pg;
@@ -1733,6 +1733,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1733 char *search; 1733 char *search;
1734 int type; 1734 int type;
1735 int not; 1735 int not;
1736 int found = 0;
1736 1737
1737 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1738 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1738 type = filter_parse_regex(buff, len, &search, &not); 1739 type = filter_parse_regex(buff, len, &search, &not);
@@ -1750,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1750 rec->flags &= ~flag; 1751 rec->flags &= ~flag;
1751 else 1752 else
1752 rec->flags |= flag; 1753 rec->flags |= flag;
1754 found = 1;
1753 } 1755 }
1754 /* 1756 /*
1755 * Only enable filtering if we have a function that 1757 * Only enable filtering if we have a function that
@@ -1759,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1759 ftrace_filtered = 1; 1761 ftrace_filtered = 1;
1760 } while_for_each_ftrace_rec(); 1762 } while_for_each_ftrace_rec();
1761 mutex_unlock(&ftrace_lock); 1763 mutex_unlock(&ftrace_lock);
1764
1765 return found;
1762} 1766}
1763 1767
1764static int 1768static int
@@ -1780,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1780 return 1; 1784 return 1;
1781} 1785}
1782 1786
1783static void ftrace_match_module_records(char *buff, char *mod, int enable) 1787static int ftrace_match_module_records(char *buff, char *mod, int enable)
1784{ 1788{
1785 unsigned search_len = 0; 1789 unsigned search_len = 0;
1786 struct ftrace_page *pg; 1790 struct ftrace_page *pg;
@@ -1789,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1789 char *search = buff; 1793 char *search = buff;
1790 unsigned long flag; 1794 unsigned long flag;
1791 int not = 0; 1795 int not = 0;
1796 int found = 0;
1792 1797
1793 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1798 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1794 1799
@@ -1819,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1819 rec->flags &= ~flag; 1824 rec->flags &= ~flag;
1820 else 1825 else
1821 rec->flags |= flag; 1826 rec->flags |= flag;
1827 found = 1;
1822 } 1828 }
1823 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1829 if (enable && (rec->flags & FTRACE_FL_FILTER))
1824 ftrace_filtered = 1; 1830 ftrace_filtered = 1;
1825 1831
1826 } while_for_each_ftrace_rec(); 1832 } while_for_each_ftrace_rec();
1827 mutex_unlock(&ftrace_lock); 1833 mutex_unlock(&ftrace_lock);
1834
1835 return found;
1828} 1836}
1829 1837
1830/* 1838/*
@@ -1853,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1853 if (!strlen(mod)) 1861 if (!strlen(mod))
1854 return -EINVAL; 1862 return -EINVAL;
1855 1863
1856 ftrace_match_module_records(func, mod, enable); 1864 if (ftrace_match_module_records(func, mod, enable))
1857 return 0; 1865 return 0;
1866 return -EINVAL;
1858} 1867}
1859 1868
1860static struct ftrace_func_command ftrace_mod_cmd = { 1869static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2151,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2151 func = strsep(&next, ":"); 2160 func = strsep(&next, ":");
2152 2161
2153 if (!next) { 2162 if (!next) {
2154 ftrace_match_records(func, len, enable); 2163 if (ftrace_match_records(func, len, enable))
2155 return 0; 2164 return 0;
2165 return ret;
2156 } 2166 }
2157 2167
2158 /* command found */ 2168 /* command found */
@@ -2198,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2198 !trace_parser_cont(parser)) { 2208 !trace_parser_cont(parser)) {
2199 ret = ftrace_process_regex(parser->buffer, 2209 ret = ftrace_process_regex(parser->buffer,
2200 parser->idx, enable); 2210 parser->idx, enable);
2211 trace_parser_clear(parser);
2201 if (ret) 2212 if (ret)
2202 goto out_unlock; 2213 goto out_unlock;
2203
2204 trace_parser_clear(parser);
2205 } 2214 }
2206 2215
2207 ret = read; 2216 ret = read;
@@ -2543,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2543 exists = true; 2552 exists = true;
2544 break; 2553 break;
2545 } 2554 }
2546 if (!exists) { 2555 if (!exists)
2547 array[(*idx)++] = rec->ip; 2556 array[(*idx)++] = rec->ip;
2548 found = 1; 2557 found = 1;
2549 }
2550 } 2558 }
2551 } while_for_each_ftrace_rec(); 2559 } while_for_each_ftrace_rec();
2552 2560
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
14#define CREATE_TRACE_POINTS 14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 15#include <trace/events/power.h>
16 16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca4956ab5e..edefe3b2801b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -423,7 +423,7 @@ struct ring_buffer_per_cpu {
423 int cpu; 423 int cpu;
424 struct ring_buffer *buffer; 424 struct ring_buffer *buffer;
425 spinlock_t reader_lock; /* serialize readers */ 425 spinlock_t reader_lock; /* serialize readers */
426 raw_spinlock_t lock; 426 arch_spinlock_t lock;
427 struct lock_class_key lock_key; 427 struct lock_class_key lock_key;
428 struct list_head *pages; 428 struct list_head *pages;
429 struct buffer_page *head_page; /* read from head */ 429 struct buffer_page *head_page; /* read from head */
@@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
998 cpu_buffer->buffer = buffer; 998 cpu_buffer->buffer = buffer;
999 spin_lock_init(&cpu_buffer->reader_lock); 999 spin_lock_init(&cpu_buffer->reader_lock);
1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1001 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1001 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1002 1002
1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1004 GFP_KERNEL, cpu_to_node(cpu)); 1004 GFP_KERNEL, cpu_to_node(cpu));
@@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1193 struct list_head *p; 1193 struct list_head *p;
1194 unsigned i; 1194 unsigned i;
1195 1195
1196 atomic_inc(&cpu_buffer->record_disabled);
1197 synchronize_sched();
1198
1199 spin_lock_irq(&cpu_buffer->reader_lock); 1196 spin_lock_irq(&cpu_buffer->reader_lock);
1200 rb_head_page_deactivate(cpu_buffer); 1197 rb_head_page_deactivate(cpu_buffer);
1201 1198
@@ -1211,12 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1211 return; 1208 return;
1212 1209
1213 rb_reset_cpu(cpu_buffer); 1210 rb_reset_cpu(cpu_buffer);
1214 spin_unlock_irq(&cpu_buffer->reader_lock);
1215
1216 rb_check_pages(cpu_buffer); 1211 rb_check_pages(cpu_buffer);
1217 1212
1218 atomic_dec(&cpu_buffer->record_disabled); 1213 spin_unlock_irq(&cpu_buffer->reader_lock);
1219
1220} 1214}
1221 1215
1222static void 1216static void
@@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1227 struct list_head *p; 1221 struct list_head *p;
1228 unsigned i; 1222 unsigned i;
1229 1223
1230 atomic_inc(&cpu_buffer->record_disabled);
1231 synchronize_sched();
1232
1233 spin_lock_irq(&cpu_buffer->reader_lock); 1224 spin_lock_irq(&cpu_buffer->reader_lock);
1234 rb_head_page_deactivate(cpu_buffer); 1225 rb_head_page_deactivate(cpu_buffer);
1235 1226
@@ -1242,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1242 list_add_tail(&bpage->list, cpu_buffer->pages); 1233 list_add_tail(&bpage->list, cpu_buffer->pages);
1243 } 1234 }
1244 rb_reset_cpu(cpu_buffer); 1235 rb_reset_cpu(cpu_buffer);
1245 spin_unlock_irq(&cpu_buffer->reader_lock);
1246
1247 rb_check_pages(cpu_buffer); 1236 rb_check_pages(cpu_buffer);
1248 1237
1249 atomic_dec(&cpu_buffer->record_disabled); 1238 spin_unlock_irq(&cpu_buffer->reader_lock);
1250} 1239}
1251 1240
1252/** 1241/**
@@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1254 * @buffer: the buffer to resize. 1243 * @buffer: the buffer to resize.
1255 * @size: the new size. 1244 * @size: the new size.
1256 * 1245 *
1257 * The tracer is responsible for making sure that the buffer is
1258 * not being used while changing the size.
1259 * Note: We may be able to change the above requirement by using
1260 * RCU synchronizations.
1261 *
1262 * Minimum size is 2 * BUF_PAGE_SIZE. 1246 * Minimum size is 2 * BUF_PAGE_SIZE.
1263 * 1247 *
1264 * Returns -1 on failure. 1248 * Returns -1 on failure.
@@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1290 if (size == buffer_size) 1274 if (size == buffer_size)
1291 return size; 1275 return size;
1292 1276
1277 atomic_inc(&buffer->record_disabled);
1278
1279 /* Make sure all writers are done with this buffer. */
1280 synchronize_sched();
1281
1293 mutex_lock(&buffer->mutex); 1282 mutex_lock(&buffer->mutex);
1294 get_online_cpus(); 1283 get_online_cpus();
1295 1284
@@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1352 put_online_cpus(); 1341 put_online_cpus();
1353 mutex_unlock(&buffer->mutex); 1342 mutex_unlock(&buffer->mutex);
1354 1343
1344 atomic_dec(&buffer->record_disabled);
1345
1355 return size; 1346 return size;
1356 1347
1357 free_pages: 1348 free_pages:
@@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1361 } 1352 }
1362 put_online_cpus(); 1353 put_online_cpus();
1363 mutex_unlock(&buffer->mutex); 1354 mutex_unlock(&buffer->mutex);
1355 atomic_dec(&buffer->record_disabled);
1364 return -ENOMEM; 1356 return -ENOMEM;
1365 1357
1366 /* 1358 /*
@@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1370 out_fail: 1362 out_fail:
1371 put_online_cpus(); 1363 put_online_cpus();
1372 mutex_unlock(&buffer->mutex); 1364 mutex_unlock(&buffer->mutex);
1365 atomic_dec(&buffer->record_disabled);
1373 return -1; 1366 return -1;
1374} 1367}
1375EXPORT_SYMBOL_GPL(ring_buffer_resize); 1368EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -2834,7 +2827,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2834 int ret; 2827 int ret;
2835 2828
2836 local_irq_save(flags); 2829 local_irq_save(flags);
2837 __raw_spin_lock(&cpu_buffer->lock); 2830 arch_spin_lock(&cpu_buffer->lock);
2838 2831
2839 again: 2832 again:
2840 /* 2833 /*
@@ -2876,7 +2869,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2876 * Splice the empty reader page into the list around the head. 2869 * Splice the empty reader page into the list around the head.
2877 */ 2870 */
2878 reader = rb_set_head_page(cpu_buffer); 2871 reader = rb_set_head_page(cpu_buffer);
2879 cpu_buffer->reader_page->list.next = reader->list.next; 2872 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2880 cpu_buffer->reader_page->list.prev = reader->list.prev; 2873 cpu_buffer->reader_page->list.prev = reader->list.prev;
2881 2874
2882 /* 2875 /*
@@ -2913,7 +2906,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 * 2906 *
2914 * Now make the new head point back to the reader page. 2907 * Now make the new head point back to the reader page.
2915 */ 2908 */
2916 reader->list.next->prev = &cpu_buffer->reader_page->list; 2909 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2917 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2910 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2918 2911
2919 /* Finally update the reader page to the new head */ 2912 /* Finally update the reader page to the new head */
@@ -2923,7 +2916,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2923 goto again; 2916 goto again;
2924 2917
2925 out: 2918 out:
2926 __raw_spin_unlock(&cpu_buffer->lock); 2919 arch_spin_unlock(&cpu_buffer->lock);
2927 local_irq_restore(flags); 2920 local_irq_restore(flags);
2928 2921
2929 return reader; 2922 return reader;
@@ -3286,9 +3279,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3286 synchronize_sched(); 3279 synchronize_sched();
3287 3280
3288 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3289 __raw_spin_lock(&cpu_buffer->lock); 3282 arch_spin_lock(&cpu_buffer->lock);
3290 rb_iter_reset(iter); 3283 rb_iter_reset(iter);
3291 __raw_spin_unlock(&cpu_buffer->lock); 3284 arch_spin_unlock(&cpu_buffer->lock);
3292 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3293 3286
3294 return iter; 3287 return iter;
@@ -3408,11 +3401,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3408 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3401 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3409 goto out; 3402 goto out;
3410 3403
3411 __raw_spin_lock(&cpu_buffer->lock); 3404 arch_spin_lock(&cpu_buffer->lock);
3412 3405
3413 rb_reset_cpu(cpu_buffer); 3406 rb_reset_cpu(cpu_buffer);
3414 3407
3415 __raw_spin_unlock(&cpu_buffer->lock); 3408 arch_spin_unlock(&cpu_buffer->lock);
3416 3409
3417 out: 3410 out:
3418 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3411 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c82dfd92fdfd..0df1b0f2cb9e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
313 "bin", 313 "bin",
314 "block", 314 "block",
315 "stacktrace", 315 "stacktrace",
316 "sched-tree",
317 "trace_printk", 316 "trace_printk",
318 "ftrace_preempt", 317 "ftrace_preempt",
319 "branch", 318 "branch",
@@ -493,15 +492,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 492 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 493 * needs its own lock.
495 * 494 *
496 * This is defined as a raw_spinlock_t in order to help 495 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 496 * with performance when lockdep debugging is enabled.
498 * 497 *
499 * It is also used in other places outside the update_max_tr 498 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 499 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 500 * CONFIG_TRACER_MAX_TRACE.
502 */ 501 */
503static raw_spinlock_t ftrace_max_lock = 502static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
505 504
506#ifdef CONFIG_TRACER_MAX_TRACE 505#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 506unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +554,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 554 return;
556 555
557 WARN_ON_ONCE(!irqs_disabled()); 556 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 557 arch_spin_lock(&ftrace_max_lock);
559 558
560 tr->buffer = max_tr.buffer; 559 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 560 max_tr.buffer = buf;
562 561
563 __update_max_tr(tr, tsk, cpu); 562 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 563 arch_spin_unlock(&ftrace_max_lock);
565} 564}
566 565
567/** 566/**
@@ -581,7 +580,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 580 return;
582 581
583 WARN_ON_ONCE(!irqs_disabled()); 582 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 583 arch_spin_lock(&ftrace_max_lock);
585 584
586 ftrace_disable_cpu(); 585 ftrace_disable_cpu();
587 586
@@ -603,7 +602,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 602 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 603
605 __update_max_tr(tr, tsk, cpu); 604 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 605 arch_spin_unlock(&ftrace_max_lock);
607} 606}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 607#endif /* CONFIG_TRACER_MAX_TRACE */
609 608
@@ -802,7 +801,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 801static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 802static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 803static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 804static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 805
807/* temporary disable recording */ 806/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 807static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +914,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 914 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 915 * so if we miss here, then better luck next time.
917 */ 916 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 917 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 918 return;
920 919
921 idx = map_pid_to_cmdline[tsk->pid]; 920 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +939,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 939
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 940 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 941
943 __raw_spin_unlock(&trace_cmdline_lock); 942 arch_spin_unlock(&trace_cmdline_lock);
944} 943}
945 944
946void trace_find_cmdline(int pid, char comm[]) 945void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +957,14 @@ void trace_find_cmdline(int pid, char comm[])
958 } 957 }
959 958
960 preempt_disable(); 959 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 960 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 961 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 962 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 963 strcpy(comm, saved_cmdlines[map]);
965 else 964 else
966 strcpy(comm, "<...>"); 965 strcpy(comm, "<...>");
967 966
968 __raw_spin_unlock(&trace_cmdline_lock); 967 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 968 preempt_enable();
970} 969}
971 970
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1150 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1151}
1153 1152
1153/**
1154 * trace_dump_stack - record a stack back trace in the trace buffer
1155 */
1156void trace_dump_stack(void)
1157{
1158 unsigned long flags;
1159
1160 if (tracing_disabled || tracing_selftest_running)
1161 return;
1162
1163 local_save_flags(flags);
1164
1165 /* skipping 3 traces, seems to get us at the caller of this function */
1166 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1167}
1168
1154void 1169void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1170ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1171{
@@ -1251,8 +1266,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1266 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1267int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1268{
1254 static raw_spinlock_t trace_buf_lock = 1269 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1270 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1271 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1272
1258 struct ftrace_event_call *call = &event_bprint; 1273 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1298,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1298
1284 /* Lockdep uses trace_printk for lock tracing */ 1299 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1300 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1301 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1302 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1303
1289 if (len > TRACE_BUF_SIZE || len < 0) 1304 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1319,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1304 ring_buffer_unlock_commit(buffer, event); 1319 ring_buffer_unlock_commit(buffer, event);
1305 1320
1306out_unlock: 1321out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1322 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1323 local_irq_restore(flags);
1309 1324
1310out: 1325out:
@@ -1334,7 +1349,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1349int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1350 unsigned long ip, const char *fmt, va_list args)
1336{ 1351{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1352 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1353 static char trace_buf[TRACE_BUF_SIZE];
1339 1354
1340 struct ftrace_event_call *call = &event_print; 1355 struct ftrace_event_call *call = &event_print;
@@ -1360,7 +1375,7 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1375
1361 pause_graph_tracing(); 1376 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1377 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1378 arch_spin_lock(&trace_buf_lock);
1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1379 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 1380
1366 size = sizeof(*entry) + len + 1; 1381 size = sizeof(*entry) + len + 1;
@@ -1378,7 +1393,7 @@ int trace_array_vprintk(struct trace_array *tr,
1378 ring_buffer_unlock_commit(buffer, event); 1393 ring_buffer_unlock_commit(buffer, event);
1379 1394
1380 out_unlock: 1395 out_unlock:
1381 __raw_spin_unlock(&trace_buf_lock); 1396 arch_spin_unlock(&trace_buf_lock);
1382 raw_local_irq_restore(irq_flags); 1397 raw_local_irq_restore(irq_flags);
1383 unpause_graph_tracing(); 1398 unpause_graph_tracing();
1384 out: 1399 out:
@@ -2279,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2279 mutex_lock(&tracing_cpumask_update_lock); 2294 mutex_lock(&tracing_cpumask_update_lock);
2280 2295
2281 local_irq_disable(); 2296 local_irq_disable();
2282 __raw_spin_lock(&ftrace_max_lock); 2297 arch_spin_lock(&ftrace_max_lock);
2283 for_each_tracing_cpu(cpu) { 2298 for_each_tracing_cpu(cpu) {
2284 /* 2299 /*
2285 * Increase/decrease the disabled counter if we are 2300 * Increase/decrease the disabled counter if we are
@@ -2294,7 +2309,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2294 atomic_dec(&global_trace.data[cpu]->disabled); 2309 atomic_dec(&global_trace.data[cpu]->disabled);
2295 } 2310 }
2296 } 2311 }
2297 __raw_spin_unlock(&ftrace_max_lock); 2312 arch_spin_unlock(&ftrace_max_lock);
2298 local_irq_enable(); 2313 local_irq_enable();
2299 2314
2300 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2315 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2316,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
2316 .write = tracing_cpumask_write, 2331 .write = tracing_cpumask_write,
2317}; 2332};
2318 2333
2319static ssize_t 2334static int tracing_trace_options_show(struct seq_file *m, void *v)
2320tracing_trace_options_read(struct file *filp, char __user *ubuf,
2321 size_t cnt, loff_t *ppos)
2322{ 2335{
2323 struct tracer_opt *trace_opts; 2336 struct tracer_opt *trace_opts;
2324 u32 tracer_flags; 2337 u32 tracer_flags;
2325 int len = 0;
2326 char *buf;
2327 int r = 0;
2328 int i; 2338 int i;
2329 2339
2330
2331 /* calculate max size */
2332 for (i = 0; trace_options[i]; i++) {
2333 len += strlen(trace_options[i]);
2334 len += 3; /* "no" and newline */
2335 }
2336
2337 mutex_lock(&trace_types_lock); 2340 mutex_lock(&trace_types_lock);
2338 tracer_flags = current_trace->flags->val; 2341 tracer_flags = current_trace->flags->val;
2339 trace_opts = current_trace->flags->opts; 2342 trace_opts = current_trace->flags->opts;
2340 2343
2341 /*
2342 * Increase the size with names of options specific
2343 * of the current tracer.
2344 */
2345 for (i = 0; trace_opts[i].name; i++) {
2346 len += strlen(trace_opts[i].name);
2347 len += 3; /* "no" and newline */
2348 }
2349
2350 /* +1 for \0 */
2351 buf = kmalloc(len + 1, GFP_KERNEL);
2352 if (!buf) {
2353 mutex_unlock(&trace_types_lock);
2354 return -ENOMEM;
2355 }
2356
2357 for (i = 0; trace_options[i]; i++) { 2344 for (i = 0; trace_options[i]; i++) {
2358 if (trace_flags & (1 << i)) 2345 if (trace_flags & (1 << i))
2359 r += sprintf(buf + r, "%s\n", trace_options[i]); 2346 seq_printf(m, "%s\n", trace_options[i]);
2360 else 2347 else
2361 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2348 seq_printf(m, "no%s\n", trace_options[i]);
2362 } 2349 }
2363 2350
2364 for (i = 0; trace_opts[i].name; i++) { 2351 for (i = 0; trace_opts[i].name; i++) {
2365 if (tracer_flags & trace_opts[i].bit) 2352 if (tracer_flags & trace_opts[i].bit)
2366 r += sprintf(buf + r, "%s\n", 2353 seq_printf(m, "%s\n", trace_opts[i].name);
2367 trace_opts[i].name);
2368 else 2354 else
2369 r += sprintf(buf + r, "no%s\n", 2355 seq_printf(m, "no%s\n", trace_opts[i].name);
2370 trace_opts[i].name);
2371 } 2356 }
2372 mutex_unlock(&trace_types_lock); 2357 mutex_unlock(&trace_types_lock);
2373 2358
2374 WARN_ON(r >= len + 1); 2359 return 0;
2360}
2375 2361
2376 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2362static int __set_tracer_option(struct tracer *trace,
2363 struct tracer_flags *tracer_flags,
2364 struct tracer_opt *opts, int neg)
2365{
2366 int ret;
2377 2367
2378 kfree(buf); 2368 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2379 return r; 2369 if (ret)
2370 return ret;
2371
2372 if (neg)
2373 tracer_flags->val &= ~opts->bit;
2374 else
2375 tracer_flags->val |= opts->bit;
2376 return 0;
2380} 2377}
2381 2378
2382/* Try to assign a tracer specific option */ 2379/* Try to assign a tracer specific option */
@@ -2384,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2384{ 2381{
2385 struct tracer_flags *tracer_flags = trace->flags; 2382 struct tracer_flags *tracer_flags = trace->flags;
2386 struct tracer_opt *opts = NULL; 2383 struct tracer_opt *opts = NULL;
2387 int ret = 0, i = 0; 2384 int i;
2388 int len;
2389 2385
2390 for (i = 0; tracer_flags->opts[i].name; i++) { 2386 for (i = 0; tracer_flags->opts[i].name; i++) {
2391 opts = &tracer_flags->opts[i]; 2387 opts = &tracer_flags->opts[i];
2392 len = strlen(opts->name);
2393 2388
2394 if (strncmp(cmp, opts->name, len) == 0) { 2389 if (strcmp(cmp, opts->name) == 0)
2395 ret = trace->set_flag(tracer_flags->val, 2390 return __set_tracer_option(trace, trace->flags,
2396 opts->bit, !neg); 2391 opts, neg);
2397 break;
2398 }
2399 } 2392 }
2400 /* Not found */
2401 if (!tracer_flags->opts[i].name)
2402 return -EINVAL;
2403
2404 /* Refused to handle */
2405 if (ret)
2406 return ret;
2407
2408 if (neg)
2409 tracer_flags->val &= ~opts->bit;
2410 else
2411 tracer_flags->val |= opts->bit;
2412 2393
2413 return 0; 2394 return -EINVAL;
2414} 2395}
2415 2396
2416static void set_tracer_flags(unsigned int mask, int enabled) 2397static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2430,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2430 size_t cnt, loff_t *ppos) 2411 size_t cnt, loff_t *ppos)
2431{ 2412{
2432 char buf[64]; 2413 char buf[64];
2433 char *cmp = buf; 2414 char *cmp;
2434 int neg = 0; 2415 int neg = 0;
2435 int ret; 2416 int ret;
2436 int i; 2417 int i;
@@ -2442,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2442 return -EFAULT; 2423 return -EFAULT;
2443 2424
2444 buf[cnt] = 0; 2425 buf[cnt] = 0;
2426 cmp = strstrip(buf);
2445 2427
2446 if (strncmp(buf, "no", 2) == 0) { 2428 if (strncmp(cmp, "no", 2) == 0) {
2447 neg = 1; 2429 neg = 1;
2448 cmp += 2; 2430 cmp += 2;
2449 } 2431 }
2450 2432
2451 for (i = 0; trace_options[i]; i++) { 2433 for (i = 0; trace_options[i]; i++) {
2452 int len = strlen(trace_options[i]); 2434 if (strcmp(cmp, trace_options[i]) == 0) {
2453
2454 if (strncmp(cmp, trace_options[i], len) == 0) {
2455 set_tracer_flags(1 << i, !neg); 2435 set_tracer_flags(1 << i, !neg);
2456 break; 2436 break;
2457 } 2437 }
@@ -2471,9 +2451,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2471 return cnt; 2451 return cnt;
2472} 2452}
2473 2453
2454static int tracing_trace_options_open(struct inode *inode, struct file *file)
2455{
2456 if (tracing_disabled)
2457 return -ENODEV;
2458 return single_open(file, tracing_trace_options_show, NULL);
2459}
2460
2474static const struct file_operations tracing_iter_fops = { 2461static const struct file_operations tracing_iter_fops = {
2475 .open = tracing_open_generic, 2462 .open = tracing_trace_options_open,
2476 .read = tracing_trace_options_read, 2463 .read = seq_read,
2464 .llseek = seq_lseek,
2465 .release = single_release,
2477 .write = tracing_trace_options_write, 2466 .write = tracing_trace_options_write,
2478}; 2467};
2479 2468
@@ -3133,7 +3122,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3133 __free_page(spd->pages[idx]); 3122 __free_page(spd->pages[idx]);
3134} 3123}
3135 3124
3136static struct pipe_buf_operations tracing_pipe_buf_ops = { 3125static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3137 .can_merge = 0, 3126 .can_merge = 0,
3138 .map = generic_pipe_buf_map, 3127 .map = generic_pipe_buf_map,
3139 .unmap = generic_pipe_buf_unmap, 3128 .unmap = generic_pipe_buf_unmap,
@@ -3392,21 +3381,18 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3392 return cnt; 3381 return cnt;
3393} 3382}
3394 3383
3395static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3384static int tracing_clock_show(struct seq_file *m, void *v)
3396 size_t cnt, loff_t *ppos)
3397{ 3385{
3398 char buf[64];
3399 int bufiter = 0;
3400 int i; 3386 int i;
3401 3387
3402 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3388 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3403 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3389 seq_printf(m,
3404 "%s%s%s%s", i ? " " : "", 3390 "%s%s%s%s", i ? " " : "",
3405 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3391 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3406 i == trace_clock_id ? "]" : ""); 3392 i == trace_clock_id ? "]" : "");
3407 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3393 seq_putc(m, '\n');
3408 3394
3409 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3395 return 0;
3410} 3396}
3411 3397
3412static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3398static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3448,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3448 return cnt; 3434 return cnt;
3449} 3435}
3450 3436
3437static int tracing_clock_open(struct inode *inode, struct file *file)
3438{
3439 if (tracing_disabled)
3440 return -ENODEV;
3441 return single_open(file, tracing_clock_show, NULL);
3442}
3443
3451static const struct file_operations tracing_max_lat_fops = { 3444static const struct file_operations tracing_max_lat_fops = {
3452 .open = tracing_open_generic, 3445 .open = tracing_open_generic,
3453 .read = tracing_max_lat_read, 3446 .read = tracing_max_lat_read,
@@ -3486,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
3486}; 3479};
3487 3480
3488static const struct file_operations trace_clock_fops = { 3481static const struct file_operations trace_clock_fops = {
3489 .open = tracing_open_generic, 3482 .open = tracing_clock_open,
3490 .read = tracing_clock_read, 3483 .read = seq_read,
3484 .llseek = seq_lseek,
3485 .release = single_release,
3491 .write = tracing_clock_write, 3486 .write = tracing_clock_write,
3492}; 3487};
3493 3488
@@ -3617,7 +3612,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3617} 3612}
3618 3613
3619/* Pipe buffer operations for a buffer. */ 3614/* Pipe buffer operations for a buffer. */
3620static struct pipe_buf_operations buffer_pipe_buf_ops = { 3615static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3621 .can_merge = 0, 3616 .can_merge = 0,
3622 .map = generic_pipe_buf_map, 3617 .map = generic_pipe_buf_map,
3623 .unmap = generic_pipe_buf_unmap, 3618 .unmap = generic_pipe_buf_unmap,
@@ -3948,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3948 if (ret < 0) 3943 if (ret < 0)
3949 return ret; 3944 return ret;
3950 3945
3951 ret = 0; 3946 if (val != 0 && val != 1)
3952 switch (val) { 3947 return -EINVAL;
3953 case 0:
3954 /* do nothing if already cleared */
3955 if (!(topt->flags->val & topt->opt->bit))
3956 break;
3957
3958 mutex_lock(&trace_types_lock);
3959 if (current_trace->set_flag)
3960 ret = current_trace->set_flag(topt->flags->val,
3961 topt->opt->bit, 0);
3962 mutex_unlock(&trace_types_lock);
3963 if (ret)
3964 return ret;
3965 topt->flags->val &= ~topt->opt->bit;
3966 break;
3967 case 1:
3968 /* do nothing if already set */
3969 if (topt->flags->val & topt->opt->bit)
3970 break;
3971 3948
3949 if (!!(topt->flags->val & topt->opt->bit) != val) {
3972 mutex_lock(&trace_types_lock); 3950 mutex_lock(&trace_types_lock);
3973 if (current_trace->set_flag) 3951 ret = __set_tracer_option(current_trace, topt->flags,
3974 ret = current_trace->set_flag(topt->flags->val, 3952 topt->opt, !val);
3975 topt->opt->bit, 1);
3976 mutex_unlock(&trace_types_lock); 3953 mutex_unlock(&trace_types_lock);
3977 if (ret) 3954 if (ret)
3978 return ret; 3955 return ret;
3979 topt->flags->val |= topt->opt->bit;
3980 break;
3981
3982 default:
3983 return -EINVAL;
3984 } 3956 }
3985 3957
3986 *ppos += cnt; 3958 *ppos += cnt;
@@ -4307,8 +4279,8 @@ trace_printk_seq(struct trace_seq *s)
4307 4279
4308static void __ftrace_dump(bool disable_tracing) 4280static void __ftrace_dump(bool disable_tracing)
4309{ 4281{
4310 static raw_spinlock_t ftrace_dump_lock = 4282 static arch_spinlock_t ftrace_dump_lock =
4311 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4283 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4312 /* use static because iter can be a bit big for the stack */ 4284 /* use static because iter can be a bit big for the stack */
4313 static struct trace_iterator iter; 4285 static struct trace_iterator iter;
4314 unsigned int old_userobj; 4286 unsigned int old_userobj;
@@ -4318,7 +4290,7 @@ static void __ftrace_dump(bool disable_tracing)
4318 4290
4319 /* only one dump */ 4291 /* only one dump */
4320 local_irq_save(flags); 4292 local_irq_save(flags);
4321 __raw_spin_lock(&ftrace_dump_lock); 4293 arch_spin_lock(&ftrace_dump_lock);
4322 if (dump_ran) 4294 if (dump_ran)
4323 goto out; 4295 goto out;
4324 4296
@@ -4393,7 +4365,7 @@ static void __ftrace_dump(bool disable_tracing)
4393 } 4365 }
4394 4366
4395 out: 4367 out:
4396 __raw_spin_unlock(&ftrace_dump_lock); 4368 arch_spin_unlock(&ftrace_dump_lock);
4397 local_irq_restore(flags); 4369 local_irq_restore(flags);
4398} 4370}
4399 4371
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a52bed2eedd8..4df6a77eb196 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -597,18 +597,17 @@ enum trace_iterator_flags {
597 TRACE_ITER_BIN = 0x40, 597 TRACE_ITER_BIN = 0x40,
598 TRACE_ITER_BLOCK = 0x80, 598 TRACE_ITER_BLOCK = 0x80,
599 TRACE_ITER_STACKTRACE = 0x100, 599 TRACE_ITER_STACKTRACE = 0x100,
600 TRACE_ITER_SCHED_TREE = 0x200, 600 TRACE_ITER_PRINTK = 0x200,
601 TRACE_ITER_PRINTK = 0x400, 601 TRACE_ITER_PREEMPTONLY = 0x400,
602 TRACE_ITER_PREEMPTONLY = 0x800, 602 TRACE_ITER_BRANCH = 0x800,
603 TRACE_ITER_BRANCH = 0x1000, 603 TRACE_ITER_ANNOTATE = 0x1000,
604 TRACE_ITER_ANNOTATE = 0x2000, 604 TRACE_ITER_USERSTACKTRACE = 0x2000,
605 TRACE_ITER_USERSTACKTRACE = 0x4000, 605 TRACE_ITER_SYM_USEROBJ = 0x4000,
606 TRACE_ITER_SYM_USEROBJ = 0x8000, 606 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
607 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 607 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
608 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 608 TRACE_ITER_LATENCY_FMT = 0x20000,
609 TRACE_ITER_LATENCY_FMT = 0x40000, 609 TRACE_ITER_SLEEP_TIME = 0x40000,
610 TRACE_ITER_SLEEP_TIME = 0x80000, 610 TRACE_ITER_GRAPH_TIME = 0x80000,
611 TRACE_ITER_GRAPH_TIME = 0x100000,
612}; 611};
613 612
614/* 613/*
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f386ba..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -71,10 +71,10 @@ u64 notrace trace_clock(void)
71/* keep prev_time and lock in the same cacheline. */ 71/* keep prev_time and lock in the same cacheline. */
72static struct { 72static struct {
73 u64 prev_time; 73 u64 prev_time;
74 raw_spinlock_t lock; 74 arch_spinlock_t lock;
75} trace_clock_struct ____cacheline_aligned_in_smp = 75} trace_clock_struct ____cacheline_aligned_in_smp =
76 { 76 {
77 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
78 }; 78 };
79 79
80u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
@@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void)
94 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
95 goto out; 95 goto out;
96 96
97 __raw_spin_lock(&trace_clock_struct.lock); 97 arch_spin_lock(&trace_clock_struct.lock);
98 98
99 /* 99 /*
100 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
@@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void)
106 106
107 trace_clock_struct.prev_time = now; 107 trace_clock_struct.prev_time = now;
108 108
109 __raw_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
110 110
111 out: 111 out:
112 raw_local_irq_restore(flags); 112 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
25 char *buf; 25 char *buf;
26 int ret = -ENOMEM; 26 int ret = -ENOMEM;
27 27
28 if (atomic_inc_return(&event->profile_count)) 28 if (event->profile_count++ > 0)
29 return 0; 29 return 0;
30 30
31 if (!total_profile_count) { 31 if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
56 perf_trace_buf = NULL; 56 perf_trace_buf = NULL;
57 } 57 }
58fail_buf: 58fail_buf:
59 atomic_dec(&event->profile_count); 59 event->profile_count--;
60 60
61 return ret; 61 return ret;
62} 62}
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
83{ 83{
84 char *buf, *nmi_buf; 84 char *buf, *nmi_buf;
85 85
86 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (--event->profile_count > 0)
87 return; 87 return;
88 88
89 event->profile_disable(event); 89 event->profile_disable(event);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 78 if (ret) \
79 return ret; 79 return ret;
80 80
81int trace_define_common_fields(struct ftrace_event_call *call) 81static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 82{
83 int ret; 83 int ret;
84 struct trace_entry ent; 84 struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 91
92 return ret; 92 return ret;
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 94
96void trace_destroy_fields(struct ftrace_event_call *call) 95void trace_destroy_fields(struct ftrace_event_call *call)
97{ 96{
@@ -105,9 +104,25 @@ void trace_destroy_fields(struct ftrace_event_call *call)
105 } 104 }
106} 105}
107 106
108static void ftrace_event_enable_disable(struct ftrace_event_call *call, 107int trace_event_raw_init(struct ftrace_event_call *call)
108{
109 int id;
110
111 id = register_ftrace_event(call->event);
112 if (!id)
113 return -ENODEV;
114 call->id = id;
115 INIT_LIST_HEAD(&call->fields);
116
117 return 0;
118}
119EXPORT_SYMBOL_GPL(trace_event_raw_init);
120
121static int ftrace_event_enable_disable(struct ftrace_event_call *call,
109 int enable) 122 int enable)
110{ 123{
124 int ret = 0;
125
111 switch (enable) { 126 switch (enable) {
112 case 0: 127 case 0:
113 if (call->enabled) { 128 if (call->enabled) {
@@ -118,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
118 break; 133 break;
119 case 1: 134 case 1:
120 if (!call->enabled) { 135 if (!call->enabled) {
121 call->enabled = 1;
122 tracing_start_cmdline_record(); 136 tracing_start_cmdline_record();
123 call->regfunc(call); 137 ret = call->regfunc(call);
138 if (ret) {
139 tracing_stop_cmdline_record();
140 pr_info("event trace: Could not enable event "
141 "%s\n", call->name);
142 break;
143 }
144 call->enabled = 1;
124 } 145 }
125 break; 146 break;
126 } 147 }
148
149 return ret;
127} 150}
128 151
129static void ftrace_clear_events(void) 152static void ftrace_clear_events(void)
@@ -402,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
402 case 0: 425 case 0:
403 case 1: 426 case 1:
404 mutex_lock(&event_mutex); 427 mutex_lock(&event_mutex);
405 ftrace_event_enable_disable(call, val); 428 ret = ftrace_event_enable_disable(call, val);
406 mutex_unlock(&event_mutex); 429 mutex_unlock(&event_mutex);
407 break; 430 break;
408 431
@@ -412,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
412 435
413 *ppos += cnt; 436 *ppos += cnt;
414 437
415 return cnt; 438 return ret ? ret : cnt;
416} 439}
417 440
418static ssize_t 441static ssize_t
@@ -913,7 +936,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
913 id); 936 id);
914 937
915 if (call->define_fields) { 938 if (call->define_fields) {
916 ret = call->define_fields(call); 939 ret = trace_define_common_fields(call);
940 if (!ret)
941 ret = call->define_fields(call);
917 if (ret < 0) { 942 if (ret < 0) {
918 pr_warning("Could not initialize trace point" 943 pr_warning("Could not initialize trace point"
919 " events/%s\n", call->name); 944 " events/%s\n", call->name);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..e42af9aad69f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211{ 211{
212 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
213 int cmp, match; 213 int cmp, match;
214 int len = strlen(*addr) + 1; /* including tailing '\0' */
214 215
215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); 216 cmp = pred->regex.match(*addr, &pred->regex, len);
216 217
217 match = cmp ^ pred->not; 218 match = cmp ^ pred->not;
218 219
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
251 return 0; 252 return 0;
252} 253}
253 254
254/* Basic regex callbacks */ 255/*
256 * regex_match_foo - Basic regex callbacks
257 *
258 * @str: the string to be searched
259 * @r: the regex structure containing the pattern string
260 * @len: the length of the string to be searched (including '\0')
261 *
262 * Note:
263 * - @str might not be NULL-terminated if it's of type DYN_STRING
264 * or STATIC_STRING
265 */
266
255static int regex_match_full(char *str, struct regex *r, int len) 267static int regex_match_full(char *str, struct regex *r, int len)
256{ 268{
257 if (strncmp(str, r->pattern, len) == 0) 269 if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
261 273
262static int regex_match_front(char *str, struct regex *r, int len) 274static int regex_match_front(char *str, struct regex *r, int len)
263{ 275{
264 if (strncmp(str, r->pattern, len) == 0) 276 if (strncmp(str, r->pattern, r->len) == 0)
265 return 1; 277 return 1;
266 return 0; 278 return 0;
267} 279}
268 280
269static int regex_match_middle(char *str, struct regex *r, int len) 281static int regex_match_middle(char *str, struct regex *r, int len)
270{ 282{
271 if (strstr(str, r->pattern)) 283 if (strnstr(str, r->pattern, len))
272 return 1; 284 return 1;
273 return 0; 285 return 0;
274} 286}
275 287
276static int regex_match_end(char *str, struct regex *r, int len) 288static int regex_match_end(char *str, struct regex *r, int len)
277{ 289{
278 char *ptr = strstr(str, r->pattern); 290 int strlen = len - 1;
279 291
280 if (ptr && (ptr[r->len] == 0)) 292 if (strlen >= r->len &&
293 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
281 return 1; 294 return 1;
282 return 0; 295 return 0;
283} 296}
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
781 pred->regex.field_len = field->size; 794 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING) 795 } else if (field->filter_type == FILTER_DYN_STRING)
783 fn = filter_pred_strloc; 796 fn = filter_pred_strloc;
784 else { 797 else
785 fn = filter_pred_pchar; 798 fn = filter_pred_pchar;
786 pred->regex.field_len = strlen(pred->regex.pattern);
787 }
788 } else { 799 } else {
789 if (field->is_signed) 800 if (field->is_signed)
790 ret = strict_strtoll(pred->regex.pattern, 0, &val); 801 ret = strict_strtoll(pred->regex.pattern, 0, &val);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
160 offsetof(typeof(field), item), \ 160 offsetof(typeof(field), item), \
161 sizeof(field.item), 0, FILTER_OTHER); \ 161 sizeof(field.item), \
162 is_signed_type(type), FILTER_OTHER); \
162 if (ret) \ 163 if (ret) \
163 return ret; 164 return ret;
164 165
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
168 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 169 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
169 offsetof(typeof(field), \ 170 offsetof(typeof(field), \
170 container.item), \ 171 container.item), \
171 sizeof(field.container.item), 0, \ 172 sizeof(field.container.item), \
172 FILTER_OTHER); \ 173 is_signed_type(type), FILTER_OTHER); \
173 if (ret) \ 174 if (ret) \
174 return ret; 175 return ret;
175 176
@@ -184,10 +185,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
184 struct struct_name field; \ 185 struct struct_name field; \
185 int ret; \ 186 int ret; \
186 \ 187 \
187 ret = trace_define_common_fields(event_call); \
188 if (ret) \
189 return ret; \
190 \
191 tstruct; \ 188 tstruct; \
192 \ 189 \
193 return ret; \ 190 return ret; \
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b52d397e57eb..6ea90c0e2c96 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -282,6 +282,18 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri, 282static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs); 283 struct pt_regs *regs);
284 284
285/* Check the name is good for event/group */
286static int check_event_name(const char *name)
287{
288 if (!isalpha(*name) && *name != '_')
289 return 0;
290 while (*++name != '\0') {
291 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
292 return 0;
293 }
294 return 1;
295}
296
285/* 297/*
286 * Allocate new trace_probe and initialize it (including kprobes). 298 * Allocate new trace_probe and initialize it (including kprobes).
287 */ 299 */
@@ -293,10 +305,11 @@ static struct trace_probe *alloc_trace_probe(const char *group,
293 int nargs, int is_return) 305 int nargs, int is_return)
294{ 306{
295 struct trace_probe *tp; 307 struct trace_probe *tp;
308 int ret = -ENOMEM;
296 309
297 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 310 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
298 if (!tp) 311 if (!tp)
299 return ERR_PTR(-ENOMEM); 312 return ERR_PTR(ret);
300 313
301 if (symbol) { 314 if (symbol) {
302 tp->symbol = kstrdup(symbol, GFP_KERNEL); 315 tp->symbol = kstrdup(symbol, GFP_KERNEL);
@@ -312,14 +325,20 @@ static struct trace_probe *alloc_trace_probe(const char *group,
312 else 325 else
313 tp->rp.kp.pre_handler = kprobe_dispatcher; 326 tp->rp.kp.pre_handler = kprobe_dispatcher;
314 327
315 if (!event) 328 if (!event || !check_event_name(event)) {
329 ret = -EINVAL;
316 goto error; 330 goto error;
331 }
332
317 tp->call.name = kstrdup(event, GFP_KERNEL); 333 tp->call.name = kstrdup(event, GFP_KERNEL);
318 if (!tp->call.name) 334 if (!tp->call.name)
319 goto error; 335 goto error;
320 336
321 if (!group) 337 if (!group || !check_event_name(group)) {
338 ret = -EINVAL;
322 goto error; 339 goto error;
340 }
341
323 tp->call.system = kstrdup(group, GFP_KERNEL); 342 tp->call.system = kstrdup(group, GFP_KERNEL);
324 if (!tp->call.system) 343 if (!tp->call.system)
325 goto error; 344 goto error;
@@ -330,7 +349,7 @@ error:
330 kfree(tp->call.name); 349 kfree(tp->call.name);
331 kfree(tp->symbol); 350 kfree(tp->symbol);
332 kfree(tp); 351 kfree(tp);
333 return ERR_PTR(-ENOMEM); 352 return ERR_PTR(ret);
334} 353}
335 354
336static void free_probe_arg(struct probe_arg *arg) 355static void free_probe_arg(struct probe_arg *arg)
@@ -695,10 +714,10 @@ static int create_trace_probe(int argc, char **argv)
695 if (!event) { 714 if (!event) {
696 /* Make a new event name */ 715 /* Make a new event name */
697 if (symbol) 716 if (symbol)
698 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", 717 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
699 is_return ? 'r' : 'p', symbol, offset); 718 is_return ? 'r' : 'p', symbol, offset);
700 else 719 else
701 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", 720 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
702 is_return ? 'r' : 'p', addr); 721 is_return ? 'r' : 'p', addr);
703 event = buf; 722 event = buf;
704 } 723 }
@@ -1132,10 +1151,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1132 struct kprobe_trace_entry field; 1151 struct kprobe_trace_entry field;
1133 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1152 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1134 1153
1135 ret = trace_define_common_fields(event_call);
1136 if (ret)
1137 return ret;
1138
1139 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1154 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1140 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); 1155 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1141 /* Set argument names as fields */ 1156 /* Set argument names as fields */
@@ -1150,10 +1165,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1150 struct kretprobe_trace_entry field; 1165 struct kretprobe_trace_entry field;
1151 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1166 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1152 1167
1153 ret = trace_define_common_fields(event_call);
1154 if (ret)
1155 return ret;
1156
1157 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1168 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1158 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1169 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1159 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); 1170 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
@@ -1190,10 +1201,11 @@ static int __probe_event_show_format(struct trace_seq *s,
1190#undef SHOW_FIELD 1201#undef SHOW_FIELD
1191#define SHOW_FIELD(type, item, name) \ 1202#define SHOW_FIELD(type, item, name) \
1192 do { \ 1203 do { \
1193 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ 1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
1194 "offset:%u;\tsize:%u;\n", name, \ 1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1195 (unsigned int)offsetof(typeof(field), item),\ 1206 (unsigned int)offsetof(typeof(field), item),\
1196 (unsigned int)sizeof(type)); \ 1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1197 if (!ret) \ 1209 if (!ret) \
1198 return 0; \ 1210 return 0; \
1199 } while (0) 1211 } while (0)
@@ -1453,7 +1465,6 @@ static int register_probe_event(struct trace_probe *tp)
1453 call->unregfunc = probe_event_disable; 1465 call->unregfunc = probe_event_disable;
1454 1466
1455#ifdef CONFIG_EVENT_PROFILE 1467#ifdef CONFIG_EVENT_PROFILE
1456 atomic_set(&call->profile_count, -1);
1457 call->profile_enable = probe_profile_enable; 1468 call->profile_enable = probe_profile_enable;
1458 call->profile_disable = probe_profile_disable; 1469 call->profile_disable = probe_profile_disable;
1459#endif 1470#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index acb87d4a4ac1..94103cdcf9d8 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,12 +26,13 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27 27
28#include "trace_output.h" 28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h" 29#include "trace.h"
31 30
32#include <linux/hw_breakpoint.h> 31#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h> 32#include <asm/hw_breakpoint.h>
34 33
34#include <asm/atomic.h>
35
35/* 36/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number 37 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers. 38 * of available hardware breakpoint registers.
@@ -44,7 +45,7 @@ struct trace_ksym {
44 struct perf_event **ksym_hbp; 45 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr; 46 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER 47#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter; 48 atomic64_t counter;
48#endif 49#endif
49 struct hlist_node ksym_hlist; 50 struct hlist_node ksym_hlist;
50}; 51};
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
69 70
70 rcu_read_lock(); 71 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { 72 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) && 73 if (entry->attr.bp_addr == hbp_hit_addr) {
73 (entry->counter <= MAX_UL_INT)) { 74 atomic64_inc(&entry->counter);
74 entry->counter++;
75 break; 75 break;
76 } 76 }
77 } 77 }
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
197 entry->attr.bp_addr = addr; 197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4; 198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199 199
200 ret = -EAGAIN;
201 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, 200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
202 ksym_hbp_handler); 201 ksym_hbp_handler);
203 202
@@ -236,7 +235,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
236 mutex_lock(&ksym_tracer_mutex); 235 mutex_lock(&ksym_tracer_mutex);
237 236
238 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
239 ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); 238 ret = trace_seq_printf(s, "%pS:",
239 (void *)(unsigned long)entry->attr.bp_addr);
240 if (entry->attr.bp_type == HW_BREAKPOINT_R) 240 if (entry->attr.bp_type == HW_BREAKPOINT_R)
241 ret = trace_seq_puts(s, "r--\n"); 241 ret = trace_seq_puts(s, "r--\n");
242 else if (entry->attr.bp_type == HW_BREAKPOINT_W) 242 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
@@ -278,21 +278,20 @@ static ssize_t ksym_trace_filter_write(struct file *file,
278{ 278{
279 struct trace_ksym *entry; 279 struct trace_ksym *entry;
280 struct hlist_node *node; 280 struct hlist_node *node;
281 char *input_string, *ksymname = NULL; 281 char *buf, *input_string, *ksymname = NULL;
282 unsigned long ksym_addr = 0; 282 unsigned long ksym_addr = 0;
283 int ret, op, changed = 0; 283 int ret, op, changed = 0;
284 284
285 input_string = kzalloc(count + 1, GFP_KERNEL); 285 buf = kzalloc(count + 1, GFP_KERNEL);
286 if (!input_string) 286 if (!buf)
287 return -ENOMEM; 287 return -ENOMEM;
288 288
289 if (copy_from_user(input_string, buffer, count)) { 289 ret = -EFAULT;
290 kfree(input_string); 290 if (copy_from_user(buf, buffer, count))
291 return -EFAULT; 291 goto out;
292 }
293 input_string[count] = '\0';
294 292
295 strstrip(input_string); 293 buf[count] = '\0';
294 input_string = strstrip(buf);
296 295
297 /* 296 /*
298 * Clear all breakpoints if: 297 * Clear all breakpoints if:
@@ -303,15 +302,13 @@ static ssize_t ksym_trace_filter_write(struct file *file,
303 if (!input_string[0] || !strcmp(input_string, "0") || 302 if (!input_string[0] || !strcmp(input_string, "0") ||
304 !strcmp(input_string, "*:---")) { 303 !strcmp(input_string, "*:---")) {
305 __ksym_trace_reset(); 304 __ksym_trace_reset();
306 kfree(input_string); 305 ret = 0;
307 return count; 306 goto out;
308 } 307 }
309 308
310 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); 309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
311 if (ret < 0) { 310 if (ret < 0)
312 kfree(input_string); 311 goto out;
313 return ret;
314 }
315 312
316 mutex_lock(&ksym_tracer_mutex); 313 mutex_lock(&ksym_tracer_mutex);
317 314
@@ -322,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
322 if (entry->attr.bp_type != op) 319 if (entry->attr.bp_type != op)
323 changed = 1; 320 changed = 1;
324 else 321 else
325 goto out; 322 goto out_unlock;
326 break; 323 break;
327 } 324 }
328 } 325 }
@@ -337,28 +334,24 @@ static ssize_t ksym_trace_filter_write(struct file *file,
337 if (IS_ERR(entry->ksym_hbp)) 334 if (IS_ERR(entry->ksym_hbp))
338 ret = PTR_ERR(entry->ksym_hbp); 335 ret = PTR_ERR(entry->ksym_hbp);
339 else 336 else
340 goto out; 337 goto out_unlock;
341 } 338 }
342 /* Error or "symbol:---" case: drop it */ 339 /* Error or "symbol:---" case: drop it */
343 ksym_filter_entry_count--; 340 ksym_filter_entry_count--;
344 hlist_del_rcu(&(entry->ksym_hlist)); 341 hlist_del_rcu(&(entry->ksym_hlist));
345 synchronize_rcu(); 342 synchronize_rcu();
346 kfree(entry); 343 kfree(entry);
347 goto out; 344 goto out_unlock;
348 } else { 345 } else {
349 /* Check for malformed request: (4) */ 346 /* Check for malformed request: (4) */
350 if (op == 0) 347 if (op)
351 goto out; 348 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
352 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
353 } 349 }
354out: 350out_unlock:
355 mutex_unlock(&ksym_tracer_mutex); 351 mutex_unlock(&ksym_tracer_mutex);
356 352out:
357 kfree(input_string); 353 kfree(buf);
358 354 return !ret ? count : ret;
359 if (!ret)
360 ret = count;
361 return ret;
362} 355}
363 356
364static const struct file_operations ksym_tracing_fops = { 357static const struct file_operations ksym_tracing_fops = {
@@ -450,102 +443,77 @@ struct tracer ksym_tracer __read_mostly =
450 .print_line = ksym_trace_output 443 .print_line = ksym_trace_output
451}; 444};
452 445
453__init static int init_ksym_trace(void)
454{
455 struct dentry *d_tracer;
456 struct dentry *entry;
457
458 d_tracer = tracing_init_dentry();
459 ksym_filter_entry_count = 0;
460
461 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
462 NULL, &ksym_tracing_fops);
463 if (!entry)
464 pr_warning("Could not create debugfs "
465 "'ksym_trace_filter' file\n");
466
467 return register_tracer(&ksym_tracer);
468}
469device_initcall(init_ksym_trace);
470
471
472#ifdef CONFIG_PROFILE_KSYM_TRACER 446#ifdef CONFIG_PROFILE_KSYM_TRACER
473static int ksym_tracer_stat_headers(struct seq_file *m) 447static int ksym_profile_show(struct seq_file *m, void *v)
474{ 448{
449 struct hlist_node *node;
450 struct trace_ksym *entry;
451 int access_type = 0;
452 char fn_name[KSYM_NAME_LEN];
453
475 seq_puts(m, " Access Type "); 454 seq_puts(m, " Access Type ");
476 seq_puts(m, " Symbol Counter\n"); 455 seq_puts(m, " Symbol Counter\n");
477 seq_puts(m, " ----------- "); 456 seq_puts(m, " ----------- ");
478 seq_puts(m, " ------ -------\n"); 457 seq_puts(m, " ------ -------\n");
479 return 0;
480}
481 458
482static int ksym_tracer_stat_show(struct seq_file *m, void *v) 459 rcu_read_lock();
483{ 460 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
484 struct hlist_node *stat = v;
485 struct trace_ksym *entry;
486 int access_type = 0;
487 char fn_name[KSYM_NAME_LEN];
488 461
489 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); 462 access_type = entry->attr.bp_type;
490 463
491 access_type = entry->attr.bp_type; 464 switch (access_type) {
465 case HW_BREAKPOINT_R:
466 seq_puts(m, " R ");
467 break;
468 case HW_BREAKPOINT_W:
469 seq_puts(m, " W ");
470 break;
471 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
472 seq_puts(m, " RW ");
473 break;
474 default:
475 seq_puts(m, " NA ");
476 }
492 477
493 switch (access_type) { 478 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
494 case HW_BREAKPOINT_R: 479 seq_printf(m, " %-36s", fn_name);
495 seq_puts(m, " R "); 480 else
496 break; 481 seq_printf(m, " %-36s", "<NA>");
497 case HW_BREAKPOINT_W: 482 seq_printf(m, " %15llu\n",
498 seq_puts(m, " W "); 483 (unsigned long long)atomic64_read(&entry->counter));
499 break;
500 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
501 seq_puts(m, " RW ");
502 break;
503 default:
504 seq_puts(m, " NA ");
505 } 484 }
506 485 rcu_read_unlock();
507 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
508 seq_printf(m, " %-36s", fn_name);
509 else
510 seq_printf(m, " %-36s", "<NA>");
511 seq_printf(m, " %15lu\n", entry->counter);
512 486
513 return 0; 487 return 0;
514} 488}
515 489
516static void *ksym_tracer_stat_start(struct tracer_stat *trace) 490static int ksym_profile_open(struct inode *node, struct file *file)
517{ 491{
518 return ksym_filter_head.first; 492 return single_open(file, ksym_profile_show, NULL);
519} 493}
520 494
521static void * 495static const struct file_operations ksym_profile_fops = {
522ksym_tracer_stat_next(void *v, int idx) 496 .open = ksym_profile_open,
523{ 497 .read = seq_read,
524 struct hlist_node *stat = v; 498 .llseek = seq_lseek,
525 499 .release = single_release,
526 return stat->next;
527}
528
529static struct tracer_stat ksym_tracer_stats = {
530 .name = "ksym_tracer",
531 .stat_start = ksym_tracer_stat_start,
532 .stat_next = ksym_tracer_stat_next,
533 .stat_headers = ksym_tracer_stat_headers,
534 .stat_show = ksym_tracer_stat_show
535}; 500};
501#endif /* CONFIG_PROFILE_KSYM_TRACER */
536 502
537__init static int ksym_tracer_stat_init(void) 503__init static int init_ksym_trace(void)
538{ 504{
539 int ret; 505 struct dentry *d_tracer;
540 506
541 ret = register_stat_tracer(&ksym_tracer_stats); 507 d_tracer = tracing_init_dentry();
542 if (ret) {
543 printk(KERN_WARNING "Warning: could not register "
544 "ksym tracer stats\n");
545 return 1;
546 }
547 508
548 return 0; 509 trace_create_file("ksym_trace_filter", 0644, d_tracer,
510 NULL, &ksym_tracing_fops);
511
512#ifdef CONFIG_PROFILE_KSYM_TRACER
513 trace_create_file("ksym_profile", 0444, d_tracer,
514 NULL, &ksym_profile_fops);
515#endif
516
517 return register_tracer(&ksym_tracer);
549} 518}
550fs_initcall(ksym_tracer_stat_init); 519device_initcall(init_ksym_trace);
551#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index dc98309e839a..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
67 67
68 /* Don't allow flipping of max traces now */ 68 /* Don't allow flipping of max traces now */
69 local_irq_save(flags); 69 local_irq_save(flags);
70 __raw_spin_lock(&ftrace_max_lock); 70 arch_spin_lock(&ftrace_max_lock);
71 71
72 cnt = ring_buffer_entries(tr->buffer); 72 cnt = ring_buffer_entries(tr->buffer);
73 73
@@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85 break; 85 break;
86 } 86 }
87 tracing_on(); 87 tracing_on();
88 __raw_spin_unlock(&ftrace_max_lock); 88 arch_spin_unlock(&ftrace_max_lock);
89 local_irq_restore(flags); 89 local_irq_restore(flags);
90 90
91 if (count) 91 if (count)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 171 return ret;
172 172
173 local_irq_save(flags); 173 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 174 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 175 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 176 arch_spin_unlock(&max_stack_lock);
177 local_irq_restore(flags); 177 local_irq_restore(flags);
178 178
179 return count; 179 return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
207static void *t_start(struct seq_file *m, loff_t *pos) 207static void *t_start(struct seq_file *m, loff_t *pos)
208{ 208{
209 local_irq_disable(); 209 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 210 arch_spin_lock(&max_stack_lock);
211 211
212 if (*pos == 0) 212 if (*pos == 0)
213 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 217
218static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
219{ 219{
220 __raw_spin_unlock(&max_stack_lock); 220 arch_spin_unlock(&max_stack_lock);
221 local_irq_enable(); 221 local_irq_enable();
222} 222}
223 223
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
217 int i; 217 int i;
218 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
219 219
220 ret = trace_define_common_fields(call);
221 if (ret)
222 return ret;
223
224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
225 if (ret) 221 if (ret)
226 return ret; 222 return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
241 struct syscall_trace_exit trace; 237 struct syscall_trace_exit trace;
242 int ret; 238 int ret;
243 239
244 ret = trace_define_common_fields(call);
245 if (ret)
246 return ret;
247
248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
249 if (ret) 241 if (ret)
250 return ret; 242 return ret;
@@ -333,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
333 mutex_lock(&syscall_trace_lock); 325 mutex_lock(&syscall_trace_lock);
334 if (!sys_refcount_enter) 326 if (!sys_refcount_enter)
335 ret = register_trace_sys_enter(ftrace_syscall_enter); 327 ret = register_trace_sys_enter(ftrace_syscall_enter);
336 if (ret) { 328 if (!ret) {
337 pr_info("event trace: Could not activate"
338 "syscall entry trace point");
339 } else {
340 set_bit(num, enabled_enter_syscalls); 329 set_bit(num, enabled_enter_syscalls);
341 sys_refcount_enter++; 330 sys_refcount_enter++;
342 } 331 }
@@ -370,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
370 mutex_lock(&syscall_trace_lock); 359 mutex_lock(&syscall_trace_lock);
371 if (!sys_refcount_exit) 360 if (!sys_refcount_exit)
372 ret = register_trace_sys_exit(ftrace_syscall_exit); 361 ret = register_trace_sys_exit(ftrace_syscall_exit);
373 if (ret) { 362 if (!ret) {
374 pr_info("event trace: Could not activate"
375 "syscall exit trace point");
376 } else {
377 set_bit(num, enabled_exit_syscalls); 363 set_bit(num, enabled_exit_syscalls);
378 sys_refcount_exit++; 364 sys_refcount_exit++;
379 } 365 }
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f6693969287d..a7974a552ca9 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = {
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
95 .address = backtrace_address, 95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
96}; 97};
97 98
98static int 99static int