aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cgroup.c55
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/exit.c16
-rw-r--r--kernel/fork.c50
-rw-r--r--kernel/freezer.c9
-rw-r--r--kernel/futex.c62
-rw-r--r--kernel/hrtimer.c4
-rw-r--r--kernel/irq/irqdesc.c40
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/irq_work.c18
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/latencytop.c23
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/perf_event.c84
-rw-r--r--kernel/power/Kconfig5
-rw-r--r--kernel/power/Makefile6
-rw-r--r--kernel/power/hibernate.c11
-rw-r--r--kernel/power/nvs.c136
-rw-r--r--kernel/power/process.c8
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/power/swap.c7
-rw-r--r--kernel/printk.c52
-rw-r--r--kernel/rcutiny.c3
-rw-r--r--kernel/rcutree.c4
-rw-r--r--kernel/sched.c47
-rw-r--r--kernel/sched_autogroup.c8
-rw-r--r--kernel/smp.c19
-rw-r--r--kernel/softirq.c63
-rw-r--r--kernel/srcu.c15
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/sysctl.c31
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/taskstats.c7
-rw-r--r--kernel/time.c4
-rw-r--r--kernel/time/clocksource.c3
-rw-r--r--kernel/time/ntp.c425
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/tick-oneshot.c4
-rw-r--r--kernel/time/timekeeping.c43
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c37
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace_entries.h2
-rw-r--r--kernel/trace/trace_selftest.c2
-rw-r--r--kernel/trace/trace_syscalls.c33
-rw-r--r--kernel/user_namespace.c15
-rw-r--r--kernel/watchdog.c36
-rw-r--r--kernel/workqueue.c60
52 files changed, 969 insertions, 526 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b5ff083fa22..353d3fe8ba33 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
43obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 43obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
44obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 44obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
45obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 45obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
46obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o 46obj-$(CONFIG_SMP) += smp.o
47ifneq ($(CONFIG_SMP),y) 47ifneq ($(CONFIG_SMP),y)
48obj-y += up.o 48obj-y += up.o
49endif 49endif
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
100obj-$(CONFIG_TRACING) += trace/ 100obj-$(CONFIG_TRACING) += trace/
101obj-$(CONFIG_X86_DS) += trace/ 101obj-$(CONFIG_X86_DS) += trace/
102obj-$(CONFIG_RING_BUFFER) += trace/ 102obj-$(CONFIG_RING_BUFFER) += trace/
103obj-$(CONFIG_TRACEPOINTS) += trace/
103obj-$(CONFIG_SMP) += sched_cpupri.o 104obj-$(CONFIG_SMP) += sched_cpupri.o
104obj-$(CONFIG_IRQ_WORK) += irq_work.o 105obj-$(CONFIG_IRQ_WORK) += irq_work.o
105obj-$(CONFIG_PERF_EVENTS) += perf_event.o 106obj-$(CONFIG_PERF_EVENTS) += perf_event.o
@@ -121,7 +122,7 @@ $(obj)/configs.o: $(obj)/config_data.h
121# config_data.h contains the same information as ikconfig.h but gzipped. 122# config_data.h contains the same information as ikconfig.h but gzipped.
122# Info from config_data can be extracted from /proc/config* 123# Info from config_data can be extracted from /proc/config*
123targets += config_data.gz 124targets += config_data.gz
124$(obj)/config_data.gz: .config FORCE 125$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
125 $(call if_changed,gzip) 126 $(call if_changed,gzip)
126 127
127quiet_cmd_ikconfiggz = IKCFG $@ 128quiet_cmd_ikconfiggz = IKCFG $@
diff --git a/kernel/audit.c b/kernel/audit.c
index 77770a034d59..e4956244ae50 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -400,7 +400,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
400 if (err < 0) { 400 if (err < 0) {
401 BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ 401 BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */
402 printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); 402 printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
403 audit_log_lost("auditd dissapeared\n"); 403 audit_log_lost("auditd disappeared\n");
404 audit_pid = 0; 404 audit_pid = 0;
405 /* we might get lucky and get this in the next auditd */ 405 /* we might get lucky and get this in the next auditd */
406 audit_hold_skb(skb); 406 audit_hold_skb(skb);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 66a416b42c18..b24d7027b83c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
764 */ 764 */
765 765
766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); 766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
767static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
767static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); 768static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
768static int cgroup_populate_dir(struct cgroup *cgrp); 769static int cgroup_populate_dir(struct cgroup *cgrp);
769static const struct inode_operations cgroup_dir_inode_operations; 770static const struct inode_operations cgroup_dir_inode_operations;
@@ -860,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
860 iput(inode); 861 iput(inode);
861} 862}
862 863
864static int cgroup_delete(const struct dentry *d)
865{
866 return 1;
867}
868
863static void remove_dir(struct dentry *d) 869static void remove_dir(struct dentry *d)
864{ 870{
865 struct dentry *parent = dget(d->d_parent); 871 struct dentry *parent = dget(d->d_parent);
@@ -874,25 +880,29 @@ static void cgroup_clear_directory(struct dentry *dentry)
874 struct list_head *node; 880 struct list_head *node;
875 881
876 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 882 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
877 spin_lock(&dcache_lock); 883 spin_lock(&dentry->d_lock);
878 node = dentry->d_subdirs.next; 884 node = dentry->d_subdirs.next;
879 while (node != &dentry->d_subdirs) { 885 while (node != &dentry->d_subdirs) {
880 struct dentry *d = list_entry(node, struct dentry, d_u.d_child); 886 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
887
888 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
881 list_del_init(node); 889 list_del_init(node);
882 if (d->d_inode) { 890 if (d->d_inode) {
883 /* This should never be called on a cgroup 891 /* This should never be called on a cgroup
884 * directory with child cgroups */ 892 * directory with child cgroups */
885 BUG_ON(d->d_inode->i_mode & S_IFDIR); 893 BUG_ON(d->d_inode->i_mode & S_IFDIR);
886 d = dget_locked(d); 894 dget_dlock(d);
887 spin_unlock(&dcache_lock); 895 spin_unlock(&d->d_lock);
896 spin_unlock(&dentry->d_lock);
888 d_delete(d); 897 d_delete(d);
889 simple_unlink(dentry->d_inode, d); 898 simple_unlink(dentry->d_inode, d);
890 dput(d); 899 dput(d);
891 spin_lock(&dcache_lock); 900 spin_lock(&dentry->d_lock);
892 } 901 } else
902 spin_unlock(&d->d_lock);
893 node = dentry->d_subdirs.next; 903 node = dentry->d_subdirs.next;
894 } 904 }
895 spin_unlock(&dcache_lock); 905 spin_unlock(&dentry->d_lock);
896} 906}
897 907
898/* 908/*
@@ -900,11 +910,16 @@ static void cgroup_clear_directory(struct dentry *dentry)
900 */ 910 */
901static void cgroup_d_remove_dir(struct dentry *dentry) 911static void cgroup_d_remove_dir(struct dentry *dentry)
902{ 912{
913 struct dentry *parent;
914
903 cgroup_clear_directory(dentry); 915 cgroup_clear_directory(dentry);
904 916
905 spin_lock(&dcache_lock); 917 parent = dentry->d_parent;
918 spin_lock(&parent->d_lock);
919 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
906 list_del_init(&dentry->d_u.d_child); 920 list_del_init(&dentry->d_u.d_child);
907 spin_unlock(&dcache_lock); 921 spin_unlock(&dentry->d_lock);
922 spin_unlock(&parent->d_lock);
908 remove_dir(dentry); 923 remove_dir(dentry);
909} 924}
910 925
@@ -1440,6 +1455,11 @@ static int cgroup_set_super(struct super_block *sb, void *data)
1440 1455
1441static int cgroup_get_rootdir(struct super_block *sb) 1456static int cgroup_get_rootdir(struct super_block *sb)
1442{ 1457{
1458 static const struct dentry_operations cgroup_dops = {
1459 .d_iput = cgroup_diput,
1460 .d_delete = cgroup_delete,
1461 };
1462
1443 struct inode *inode = 1463 struct inode *inode =
1444 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); 1464 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1445 struct dentry *dentry; 1465 struct dentry *dentry;
@@ -1457,6 +1477,8 @@ static int cgroup_get_rootdir(struct super_block *sb)
1457 return -ENOMEM; 1477 return -ENOMEM;
1458 } 1478 }
1459 sb->s_root = dentry; 1479 sb->s_root = dentry;
1480 /* for everything else we want ->d_op set */
1481 sb->s_d_op = &cgroup_dops;
1460 return 0; 1482 return 0;
1461} 1483}
1462 1484
@@ -2180,12 +2202,20 @@ static const struct file_operations cgroup_file_operations = {
2180}; 2202};
2181 2203
2182static const struct inode_operations cgroup_dir_inode_operations = { 2204static const struct inode_operations cgroup_dir_inode_operations = {
2183 .lookup = simple_lookup, 2205 .lookup = cgroup_lookup,
2184 .mkdir = cgroup_mkdir, 2206 .mkdir = cgroup_mkdir,
2185 .rmdir = cgroup_rmdir, 2207 .rmdir = cgroup_rmdir,
2186 .rename = cgroup_rename, 2208 .rename = cgroup_rename,
2187}; 2209};
2188 2210
2211static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2212{
2213 if (dentry->d_name.len > NAME_MAX)
2214 return ERR_PTR(-ENAMETOOLONG);
2215 d_add(dentry, NULL);
2216 return NULL;
2217}
2218
2189/* 2219/*
2190 * Check if a file is a control file 2220 * Check if a file is a control file
2191 */ 2221 */
@@ -2199,10 +2229,6 @@ static inline struct cftype *__file_cft(struct file *file)
2199static int cgroup_create_file(struct dentry *dentry, mode_t mode, 2229static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2200 struct super_block *sb) 2230 struct super_block *sb)
2201{ 2231{
2202 static const struct dentry_operations cgroup_dops = {
2203 .d_iput = cgroup_diput,
2204 };
2205
2206 struct inode *inode; 2232 struct inode *inode;
2207 2233
2208 if (!dentry) 2234 if (!dentry)
@@ -2228,7 +2254,6 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2228 inode->i_size = 0; 2254 inode->i_size = 0;
2229 inode->i_fop = &cgroup_file_operations; 2255 inode->i_fop = &cgroup_file_operations;
2230 } 2256 }
2231 dentry->d_op = &cgroup_dops;
2232 d_instantiate(dentry, inode); 2257 d_instantiate(dentry, inode);
2233 dget(dentry); /* Extra count - pin the dentry in core */ 2258 dget(dentry); /* Extra count - pin the dentry in core */
2234 return 0; 2259 return 0;
@@ -3638,9 +3663,7 @@ again:
3638 list_del(&cgrp->sibling); 3663 list_del(&cgrp->sibling);
3639 cgroup_unlock_hierarchy(cgrp->root); 3664 cgroup_unlock_hierarchy(cgrp->root);
3640 3665
3641 spin_lock(&cgrp->dentry->d_lock);
3642 d = dget(cgrp->dentry); 3666 d = dget(cgrp->dentry);
3643 spin_unlock(&d->d_lock);
3644 3667
3645 cgroup_d_remove_dir(d); 3668 cgroup_d_remove_dir(d);
3646 dput(d); 3669 dput(d);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index a6e729766821..bd3e8e29caa3 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2914,7 +2914,7 @@ static void __init kdb_cmd_init(void)
2914 } 2914 }
2915} 2915}
2916 2916
2917/* Intialize kdb_printf, breakpoint tables and kdb state */ 2917/* Initialize kdb_printf, breakpoint tables and kdb state */
2918void __init kdb_init(int lvl) 2918void __init kdb_init(int lvl)
2919{ 2919{
2920 static int kdb_init_lvl = KDB_NOT_INITIALIZED; 2920 static int kdb_init_lvl = KDB_NOT_INITIALIZED;
diff --git a/kernel/exit.c b/kernel/exit.c
index 676149a4ac5f..f9a45ebcc7b1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -69,7 +69,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
69 69
70 list_del_rcu(&p->tasks); 70 list_del_rcu(&p->tasks);
71 list_del_init(&p->sibling); 71 list_del_init(&p->sibling);
72 __get_cpu_var(process_counts)--; 72 __this_cpu_dec(process_counts);
73 } 73 }
74 list_del_rcu(&p->thread_group); 74 list_del_rcu(&p->thread_group);
75} 75}
@@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code)
994 exit_fs(tsk); 994 exit_fs(tsk);
995 check_stack_usage(); 995 check_stack_usage();
996 exit_thread(); 996 exit_thread();
997
998 /*
999 * Flush inherited counters to the parent - before the parent
1000 * gets woken up by child-exit notifications.
1001 *
1002 * because of cgroup mode, must be called before cgroup_exit()
1003 */
1004 perf_event_exit_task(tsk);
1005
997 cgroup_exit(tsk, 1); 1006 cgroup_exit(tsk, 1);
998 1007
999 if (group_dead) 1008 if (group_dead)
@@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code)
1007 * FIXME: do that only when needed, using sched_exit tracepoint 1016 * FIXME: do that only when needed, using sched_exit tracepoint
1008 */ 1017 */
1009 flush_ptrace_hw_breakpoint(tsk); 1018 flush_ptrace_hw_breakpoint(tsk);
1010 /*
1011 * Flush inherited counters to the parent - before the parent
1012 * gets woken up by child-exit notifications.
1013 */
1014 perf_event_exit_task(tsk);
1015 1019
1016 exit_notify(tsk, group_dead); 1020 exit_notify(tsk, group_dead);
1017#ifdef CONFIG_NUMA 1021#ifdef CONFIG_NUMA
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d164e25b0f0..25e429152ddc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
66#include <linux/posix-timers.h> 66#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h> 67#include <linux/user-return-notifier.h>
68#include <linux/oom.h> 68#include <linux/oom.h>
69#include <linux/khugepaged.h>
69 70
70#include <asm/pgtable.h> 71#include <asm/pgtable.h>
71#include <asm/pgalloc.h> 72#include <asm/pgalloc.h>
@@ -169,15 +170,14 @@ EXPORT_SYMBOL(free_task);
169static inline void free_signal_struct(struct signal_struct *sig) 170static inline void free_signal_struct(struct signal_struct *sig)
170{ 171{
171 taskstats_tgid_free(sig); 172 taskstats_tgid_free(sig);
173 sched_autogroup_exit(sig);
172 kmem_cache_free(signal_cachep, sig); 174 kmem_cache_free(signal_cachep, sig);
173} 175}
174 176
175static inline void put_signal_struct(struct signal_struct *sig) 177static inline void put_signal_struct(struct signal_struct *sig)
176{ 178{
177 if (atomic_dec_and_test(&sig->sigcnt)) { 179 if (atomic_dec_and_test(&sig->sigcnt))
178 sched_autogroup_exit(sig);
179 free_signal_struct(sig); 180 free_signal_struct(sig);
180 }
181} 181}
182 182
183void __put_task_struct(struct task_struct *tsk) 183void __put_task_struct(struct task_struct *tsk)
@@ -331,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
331 retval = ksm_fork(mm, oldmm); 331 retval = ksm_fork(mm, oldmm);
332 if (retval) 332 if (retval)
333 goto out; 333 goto out;
334 retval = khugepaged_fork(mm, oldmm);
335 if (retval)
336 goto out;
334 337
335 prev = NULL; 338 prev = NULL;
336 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 339 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
@@ -530,6 +533,9 @@ void __mmdrop(struct mm_struct *mm)
530 mm_free_pgd(mm); 533 mm_free_pgd(mm);
531 destroy_context(mm); 534 destroy_context(mm);
532 mmu_notifier_mm_destroy(mm); 535 mmu_notifier_mm_destroy(mm);
536#ifdef CONFIG_TRANSPARENT_HUGEPAGE
537 VM_BUG_ON(mm->pmd_huge_pte);
538#endif
533 free_mm(mm); 539 free_mm(mm);
534} 540}
535EXPORT_SYMBOL_GPL(__mmdrop); 541EXPORT_SYMBOL_GPL(__mmdrop);
@@ -544,6 +550,7 @@ void mmput(struct mm_struct *mm)
544 if (atomic_dec_and_test(&mm->mm_users)) { 550 if (atomic_dec_and_test(&mm->mm_users)) {
545 exit_aio(mm); 551 exit_aio(mm);
546 ksm_exit(mm); 552 ksm_exit(mm);
553 khugepaged_exit(mm); /* must run before exit_mmap */
547 exit_mmap(mm); 554 exit_mmap(mm);
548 set_mm_exe_file(mm, NULL); 555 set_mm_exe_file(mm, NULL);
549 if (!list_empty(&mm->mmlist)) { 556 if (!list_empty(&mm->mmlist)) {
@@ -670,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
670 mm->token_priority = 0; 677 mm->token_priority = 0;
671 mm->last_interval = 0; 678 mm->last_interval = 0;
672 679
680#ifdef CONFIG_TRANSPARENT_HUGEPAGE
681 mm->pmd_huge_pte = NULL;
682#endif
683
673 if (!mm_init(mm, tsk)) 684 if (!mm_init(mm, tsk))
674 goto fail_nomem; 685 goto fail_nomem;
675 686
@@ -911,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
911 922
912 sig->oom_adj = current->signal->oom_adj; 923 sig->oom_adj = current->signal->oom_adj;
913 sig->oom_score_adj = current->signal->oom_score_adj; 924 sig->oom_score_adj = current->signal->oom_score_adj;
925 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
914 926
915 mutex_init(&sig->cred_guard_mutex); 927 mutex_init(&sig->cred_guard_mutex);
916 928
@@ -1286,7 +1298,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1286 attach_pid(p, PIDTYPE_SID, task_session(current)); 1298 attach_pid(p, PIDTYPE_SID, task_session(current));
1287 list_add_tail(&p->sibling, &p->real_parent->children); 1299 list_add_tail(&p->sibling, &p->real_parent->children);
1288 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1300 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1289 __get_cpu_var(process_counts)++; 1301 __this_cpu_inc(process_counts);
1290 } 1302 }
1291 attach_pid(p, PIDTYPE_PID, pid); 1303 attach_pid(p, PIDTYPE_PID, pid);
1292 nr_threads++; 1304 nr_threads++;
@@ -1318,7 +1330,7 @@ bad_fork_cleanup_mm:
1318 } 1330 }
1319bad_fork_cleanup_signal: 1331bad_fork_cleanup_signal:
1320 if (!(clone_flags & CLONE_THREAD)) 1332 if (!(clone_flags & CLONE_THREAD))
1321 put_signal_struct(p->signal); 1333 free_signal_struct(p->signal);
1322bad_fork_cleanup_sighand: 1334bad_fork_cleanup_sighand:
1323 __cleanup_sighand(p->sighand); 1335 __cleanup_sighand(p->sighand);
1324bad_fork_cleanup_fs: 1336bad_fork_cleanup_fs:
@@ -1411,23 +1423,6 @@ long do_fork(unsigned long clone_flags,
1411 } 1423 }
1412 1424
1413 /* 1425 /*
1414 * We hope to recycle these flags after 2.6.26
1415 */
1416 if (unlikely(clone_flags & CLONE_STOPPED)) {
1417 static int __read_mostly count = 100;
1418
1419 if (count > 0 && printk_ratelimit()) {
1420 char comm[TASK_COMM_LEN];
1421
1422 count--;
1423 printk(KERN_INFO "fork(): process `%s' used deprecated "
1424 "clone flags 0x%lx\n",
1425 get_task_comm(comm, current),
1426 clone_flags & CLONE_STOPPED);
1427 }
1428 }
1429
1430 /*
1431 * When called from kernel_thread, don't do user tracing stuff. 1426 * When called from kernel_thread, don't do user tracing stuff.
1432 */ 1427 */
1433 if (likely(user_mode(regs))) 1428 if (likely(user_mode(regs)))
@@ -1465,16 +1460,7 @@ long do_fork(unsigned long clone_flags,
1465 */ 1460 */
1466 p->flags &= ~PF_STARTING; 1461 p->flags &= ~PF_STARTING;
1467 1462
1468 if (unlikely(clone_flags & CLONE_STOPPED)) { 1463 wake_up_new_task(p, clone_flags);
1469 /*
1470 * We'll start up with an immediate SIGSTOP.
1471 */
1472 sigaddset(&p->pending.signal, SIGSTOP);
1473 set_tsk_thread_flag(p, TIF_SIGPENDING);
1474 __set_task_state(p, TASK_STOPPED);
1475 } else {
1476 wake_up_new_task(p, clone_flags);
1477 }
1478 1464
1479 tracehook_report_clone_complete(trace, regs, 1465 tracehook_report_clone_complete(trace, regs,
1480 clone_flags, nr, p); 1466 clone_flags, nr, p);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index bd1d42b17cb2..66ecd2ead215 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -104,8 +104,13 @@ bool freeze_task(struct task_struct *p, bool sig_only)
104 } 104 }
105 105
106 if (should_send_signal(p)) { 106 if (should_send_signal(p)) {
107 if (!signal_pending(p)) 107 fake_signal_wake_up(p);
108 fake_signal_wake_up(p); 108 /*
109 * fake_signal_wake_up() goes through p's scheduler
110 * lock and guarantees that TASK_STOPPED/TRACED ->
111 * TASK_RUNNING transition can't race with task state
112 * testing in try_to_freeze_tasks().
113 */
109 } else if (sig_only) { 114 } else if (sig_only) {
110 return false; 115 return false;
111 } else { 116 } else {
diff --git a/kernel/futex.c b/kernel/futex.c
index 3019b92e6917..b766d28accd6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
233{ 233{
234 unsigned long address = (unsigned long)uaddr; 234 unsigned long address = (unsigned long)uaddr;
235 struct mm_struct *mm = current->mm; 235 struct mm_struct *mm = current->mm;
236 struct page *page; 236 struct page *page, *page_head;
237 int err; 237 int err;
238 238
239 /* 239 /*
@@ -265,11 +265,46 @@ again:
265 if (err < 0) 265 if (err < 0)
266 return err; 266 return err;
267 267
268 page = compound_head(page); 268#ifdef CONFIG_TRANSPARENT_HUGEPAGE
269 lock_page(page); 269 page_head = page;
270 if (!page->mapping) { 270 if (unlikely(PageTail(page))) {
271 unlock_page(page);
272 put_page(page); 271 put_page(page);
272 /* serialize against __split_huge_page_splitting() */
273 local_irq_disable();
274 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
275 page_head = compound_head(page);
276 /*
277 * page_head is valid pointer but we must pin
278 * it before taking the PG_lock and/or
279 * PG_compound_lock. The moment we re-enable
280 * irqs __split_huge_page_splitting() can
281 * return and the head page can be freed from
282 * under us. We can't take the PG_lock and/or
283 * PG_compound_lock on a page that could be
284 * freed from under us.
285 */
286 if (page != page_head) {
287 get_page(page_head);
288 put_page(page);
289 }
290 local_irq_enable();
291 } else {
292 local_irq_enable();
293 goto again;
294 }
295 }
296#else
297 page_head = compound_head(page);
298 if (page != page_head) {
299 get_page(page_head);
300 put_page(page);
301 }
302#endif
303
304 lock_page(page_head);
305 if (!page_head->mapping) {
306 unlock_page(page_head);
307 put_page(page_head);
273 goto again; 308 goto again;
274 } 309 }
275 310
@@ -280,20 +315,20 @@ again:
280 * it's a read-only handle, it's expected that futexes attach to 315 * it's a read-only handle, it's expected that futexes attach to
281 * the object not the particular process. 316 * the object not the particular process.
282 */ 317 */
283 if (PageAnon(page)) { 318 if (PageAnon(page_head)) {
284 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ 319 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
285 key->private.mm = mm; 320 key->private.mm = mm;
286 key->private.address = address; 321 key->private.address = address;
287 } else { 322 } else {
288 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ 323 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
289 key->shared.inode = page->mapping->host; 324 key->shared.inode = page_head->mapping->host;
290 key->shared.pgoff = page->index; 325 key->shared.pgoff = page_head->index;
291 } 326 }
292 327
293 get_futex_key_refs(key); 328 get_futex_key_refs(key);
294 329
295 unlock_page(page); 330 unlock_page(page_head);
296 put_page(page); 331 put_page(page_head);
297 return 0; 332 return 0;
298} 333}
299 334
@@ -791,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
791 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 826 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
792 827
793 /* 828 /*
794 * This happens when we have stolen the lock and the original 829 * It is possible that the next waiter (the one that brought
795 * pending owner did not enqueue itself back on the rt_mutex. 830 * this owner to the kernel) timed out and is no longer
796 * Thats not a tragedy. We know that way, that a lock waiter 831 * waiting on the lock.
797 * is on the fly. We make the futex_q waiter the pending owner.
798 */ 832 */
799 if (!new_owner) 833 if (!new_owner)
800 new_owner = this->task; 834 new_owner = this->task;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f2429fc3438c..0c8d7c048615 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -497,7 +497,7 @@ static inline int hrtimer_is_hres_enabled(void)
497 */ 497 */
498static inline int hrtimer_hres_active(void) 498static inline int hrtimer_hres_active(void)
499{ 499{
500 return __get_cpu_var(hrtimer_bases).hres_active; 500 return __this_cpu_read(hrtimer_bases.hres_active);
501} 501}
502 502
503/* 503/*
@@ -1745,7 +1745,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
1745 } 1745 }
1746 1746
1747 /* 1747 /*
1748 * A NULL parameter means "inifinte" 1748 * A NULL parameter means "infinite"
1749 */ 1749 */
1750 if (!expires) { 1750 if (!expires) {
1751 schedule(); 1751 schedule();
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9988d03797f5..282f20230e67 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; }
72 72
73static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) 73static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
74{ 74{
75 int cpu;
76
75 desc->irq_data.irq = irq; 77 desc->irq_data.irq = irq;
76 desc->irq_data.chip = &no_irq_chip; 78 desc->irq_data.chip = &no_irq_chip;
77 desc->irq_data.chip_data = NULL; 79 desc->irq_data.chip_data = NULL;
@@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
83 desc->irq_count = 0; 85 desc->irq_count = 0;
84 desc->irqs_unhandled = 0; 86 desc->irqs_unhandled = 0;
85 desc->name = NULL; 87 desc->name = NULL;
86 memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); 88 for_each_possible_cpu(cpu)
89 *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
87 desc_smp_init(desc, node); 90 desc_smp_init(desc, node);
88} 91}
89 92
@@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
133 if (!desc) 136 if (!desc)
134 return NULL; 137 return NULL;
135 /* allocate based on nr_cpu_ids */ 138 /* allocate based on nr_cpu_ids */
136 desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), 139 desc->kstat_irqs = alloc_percpu(unsigned int);
137 gfp, node);
138 if (!desc->kstat_irqs) 140 if (!desc->kstat_irqs)
139 goto err_desc; 141 goto err_desc;
140 142
@@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
149 return desc; 151 return desc;
150 152
151err_kstat: 153err_kstat:
152 kfree(desc->kstat_irqs); 154 free_percpu(desc->kstat_irqs);
153err_desc: 155err_desc:
154 kfree(desc); 156 kfree(desc);
155 return NULL; 157 return NULL;
@@ -166,7 +168,7 @@ static void free_desc(unsigned int irq)
166 mutex_unlock(&sparse_irq_lock); 168 mutex_unlock(&sparse_irq_lock);
167 169
168 free_masks(desc); 170 free_masks(desc);
169 kfree(desc->kstat_irqs); 171 free_percpu(desc->kstat_irqs);
170 kfree(desc); 172 kfree(desc);
171} 173}
172 174
@@ -234,7 +236,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
234 } 236 }
235}; 237};
236 238
237static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
238int __init early_irq_init(void) 239int __init early_irq_init(void)
239{ 240{
240 int count, i, node = first_online_node; 241 int count, i, node = first_online_node;
@@ -250,7 +251,8 @@ int __init early_irq_init(void)
250 for (i = 0; i < count; i++) { 251 for (i = 0; i < count; i++) {
251 desc[i].irq_data.irq = i; 252 desc[i].irq_data.irq = i;
252 desc[i].irq_data.chip = &no_irq_chip; 253 desc[i].irq_data.chip = &no_irq_chip;
253 desc[i].kstat_irqs = kstat_irqs_all[i]; 254 /* TODO : do this allocation on-demand ... */
255 desc[i].kstat_irqs = alloc_percpu(unsigned int);
254 alloc_masks(desc + i, GFP_KERNEL, node); 256 alloc_masks(desc + i, GFP_KERNEL, node);
255 desc_smp_init(desc + i, node); 257 desc_smp_init(desc + i, node);
256 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 258 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -275,6 +277,22 @@ static void free_desc(unsigned int irq)
275 277
276static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) 278static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
277{ 279{
280#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
281 struct irq_desc *desc;
282 unsigned int i;
283
284 for (i = 0; i < cnt; i++) {
285 desc = irq_to_desc(start + i);
286 if (desc && !desc->kstat_irqs) {
287 unsigned int __percpu *stats = alloc_percpu(unsigned int);
288
289 if (!stats)
290 return -1;
291 if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
292 free_percpu(stats);
293 }
294 }
295#endif
278 return start; 296 return start;
279} 297}
280#endif /* !CONFIG_SPARSE_IRQ */ 298#endif /* !CONFIG_SPARSE_IRQ */
@@ -391,7 +409,9 @@ void dynamic_irq_cleanup(unsigned int irq)
391unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) 409unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
392{ 410{
393 struct irq_desc *desc = irq_to_desc(irq); 411 struct irq_desc *desc = irq_to_desc(irq);
394 return desc ? desc->kstat_irqs[cpu] : 0; 412
413 return desc && desc->kstat_irqs ?
414 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
395} 415}
396 416
397#ifdef CONFIG_GENERIC_HARDIRQS 417#ifdef CONFIG_GENERIC_HARDIRQS
@@ -401,10 +421,10 @@ unsigned int kstat_irqs(unsigned int irq)
401 int cpu; 421 int cpu;
402 int sum = 0; 422 int sum = 0;
403 423
404 if (!desc) 424 if (!desc || !desc->kstat_irqs)
405 return 0; 425 return 0;
406 for_each_possible_cpu(cpu) 426 for_each_possible_cpu(cpu)
407 sum += desc->kstat_irqs[cpu]; 427 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
408 return sum; 428 return sum;
409} 429}
410#endif /* CONFIG_GENERIC_HARDIRQS */ 430#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 91a5fa25054e..0caa59f747dd 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -577,7 +577,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
577 */ 577 */
578static int irq_thread(void *data) 578static int irq_thread(void *data)
579{ 579{
580 static struct sched_param param = { 580 static const struct sched_param param = {
581 .sched_priority = MAX_USER_RT_PRIO/2, 581 .sched_priority = MAX_USER_RT_PRIO/2,
582 }; 582 };
583 struct irqaction *action = data; 583 struct irqaction *action = data;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 90f881904bb1..c58fa7da8aef 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -77,21 +77,21 @@ void __weak arch_irq_work_raise(void)
77 */ 77 */
78static void __irq_work_queue(struct irq_work *entry) 78static void __irq_work_queue(struct irq_work *entry)
79{ 79{
80 struct irq_work **head, *next; 80 struct irq_work *next;
81 81
82 head = &get_cpu_var(irq_work_list); 82 preempt_disable();
83 83
84 do { 84 do {
85 next = *head; 85 next = __this_cpu_read(irq_work_list);
86 /* Can assign non-atomic because we keep the flags set. */ 86 /* Can assign non-atomic because we keep the flags set. */
87 entry->next = next_flags(next, IRQ_WORK_FLAGS); 87 entry->next = next_flags(next, IRQ_WORK_FLAGS);
88 } while (cmpxchg(head, next, entry) != next); 88 } while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
89 89
90 /* The list was empty, raise self-interrupt to start processing. */ 90 /* The list was empty, raise self-interrupt to start processing. */
91 if (!irq_work_next(entry)) 91 if (!irq_work_next(entry))
92 arch_irq_work_raise(); 92 arch_irq_work_raise();
93 93
94 put_cpu_var(irq_work_list); 94 preempt_enable();
95} 95}
96 96
97/* 97/*
@@ -120,16 +120,16 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
120 */ 120 */
121void irq_work_run(void) 121void irq_work_run(void)
122{ 122{
123 struct irq_work *list, **head; 123 struct irq_work *list;
124 124
125 head = &__get_cpu_var(irq_work_list); 125 if (this_cpu_read(irq_work_list) == NULL)
126 if (*head == NULL)
127 return; 126 return;
128 127
129 BUG_ON(!in_irq()); 128 BUG_ON(!in_irq());
130 BUG_ON(!irqs_disabled()); 129 BUG_ON(!irqs_disabled());
131 130
132 list = xchg(head, NULL); 131 list = this_cpu_xchg(irq_work_list, NULL);
132
133 while (list != NULL) { 133 while (list != NULL) {
134 struct irq_work *entry = list; 134 struct irq_work *entry = list;
135 135
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b55045bc7563..ec19b92c7ebd 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -163,7 +163,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
163 * just verifies it is an address we can use. 163 * just verifies it is an address we can use.
164 * 164 *
165 * Since the kernel does everything in page size chunks ensure 165 * Since the kernel does everything in page size chunks ensure
166 * the destination addreses are page aligned. Too many 166 * the destination addresses are page aligned. Too many
167 * special cases crop of when we don't do this. The most 167 * special cases crop of when we don't do this. The most
168 * insidious is getting overlapping destination addresses 168 * insidious is getting overlapping destination addresses
169 * simply because addresses are changed to page size 169 * simply because addresses are changed to page size
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7663e5df0e6f..77981813a1e7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -317,12 +317,12 @@ void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
317/* We have preemption disabled.. so it is safe to use __ versions */ 317/* We have preemption disabled.. so it is safe to use __ versions */
318static inline void set_kprobe_instance(struct kprobe *kp) 318static inline void set_kprobe_instance(struct kprobe *kp)
319{ 319{
320 __get_cpu_var(kprobe_instance) = kp; 320 __this_cpu_write(kprobe_instance, kp);
321} 321}
322 322
323static inline void reset_kprobe_instance(void) 323static inline void reset_kprobe_instance(void)
324{ 324{
325 __get_cpu_var(kprobe_instance) = NULL; 325 __this_cpu_write(kprobe_instance, NULL);
326} 326}
327 327
328/* 328/*
@@ -965,7 +965,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
965static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 965static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
966 int trapnr) 966 int trapnr)
967{ 967{
968 struct kprobe *cur = __get_cpu_var(kprobe_instance); 968 struct kprobe *cur = __this_cpu_read(kprobe_instance);
969 969
970 /* 970 /*
971 * if we faulted "during" the execution of a user specified 971 * if we faulted "during" the execution of a user specified
@@ -980,7 +980,7 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
980 980
981static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 981static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
982{ 982{
983 struct kprobe *cur = __get_cpu_var(kprobe_instance); 983 struct kprobe *cur = __this_cpu_read(kprobe_instance);
984 int ret = 0; 984 int ret = 0;
985 985
986 if (cur && cur->break_handler) { 986 if (cur && cur->break_handler) {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5355cfd44a3f..c55afba990a3 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
148 wait_for_completion(&create.done); 148 wait_for_completion(&create.done);
149 149
150 if (!IS_ERR(create.result)) { 150 if (!IS_ERR(create.result)) {
151 static struct sched_param param = { .sched_priority = 0 }; 151 static const struct sched_param param = { .sched_priority = 0 };
152 va_list args; 152 va_list args;
153 153
154 va_start(args, namefmt); 154 va_start(args, namefmt);
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 17110a4a4fc2..ee74b35e528d 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -241,24 +241,19 @@ static int lstats_show(struct seq_file *m, void *v)
241 seq_puts(m, "Latency Top version : v0.1\n"); 241 seq_puts(m, "Latency Top version : v0.1\n");
242 242
243 for (i = 0; i < MAXLR; i++) { 243 for (i = 0; i < MAXLR; i++) {
244 if (latency_record[i].backtrace[0]) { 244 struct latency_record *lr = &latency_record[i];
245
246 if (lr->backtrace[0]) {
245 int q; 247 int q;
246 seq_printf(m, "%i %lu %lu ", 248 seq_printf(m, "%i %lu %lu",
247 latency_record[i].count, 249 lr->count, lr->time, lr->max);
248 latency_record[i].time,
249 latency_record[i].max);
250 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 250 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
251 char sym[KSYM_SYMBOL_LEN]; 251 unsigned long bt = lr->backtrace[q];
252 char *c; 252 if (!bt)
253 if (!latency_record[i].backtrace[q])
254 break; 253 break;
255 if (latency_record[i].backtrace[q] == ULONG_MAX) 254 if (bt == ULONG_MAX)
256 break; 255 break;
257 sprint_symbol(sym, latency_record[i].backtrace[q]); 256 seq_printf(m, " %ps", (void *)bt);
258 c = strchr(sym, '+');
259 if (c)
260 *c = 0;
261 seq_printf(m, "%s ", sym);
262 } 257 }
263 seq_printf(m, "\n"); 258 seq_printf(m, "\n");
264 } 259 }
diff --git a/kernel/panic.c b/kernel/panic.c
index 4c13b1a88ebb..991bb87a1704 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,6 +34,7 @@ static int pause_on_oops_flag;
34static DEFINE_SPINLOCK(pause_on_oops_lock); 34static DEFINE_SPINLOCK(pause_on_oops_lock);
35 35
36int panic_timeout; 36int panic_timeout;
37EXPORT_SYMBOL_GPL(panic_timeout);
37 38
38ATOMIC_NOTIFIER_HEAD(panic_notifier_list); 39ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
39 40
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 11847bf1e8cc..05ebe841270b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,6 +38,12 @@
38 38
39#include <asm/irq_regs.h> 39#include <asm/irq_regs.h>
40 40
41enum event_type_t {
42 EVENT_FLEXIBLE = 0x1,
43 EVENT_PINNED = 0x2,
44 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
45};
46
41atomic_t perf_task_events __read_mostly; 47atomic_t perf_task_events __read_mostly;
42static atomic_t nr_mmap_events __read_mostly; 48static atomic_t nr_mmap_events __read_mostly;
43static atomic_t nr_comm_events __read_mostly; 49static atomic_t nr_comm_events __read_mostly;
@@ -65,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
65 71
66static atomic64_t perf_event_id; 72static atomic64_t perf_event_id;
67 73
74static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
75 enum event_type_t event_type);
76
77static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
78 enum event_type_t event_type);
79
68void __weak perf_event_print_debug(void) { } 80void __weak perf_event_print_debug(void) { }
69 81
70extern __weak const char *perf_pmu_name(void) 82extern __weak const char *perf_pmu_name(void)
@@ -72,6 +84,11 @@ extern __weak const char *perf_pmu_name(void)
72 return "pmu"; 84 return "pmu";
73} 85}
74 86
87static inline u64 perf_clock(void)
88{
89 return local_clock();
90}
91
75void perf_pmu_disable(struct pmu *pmu) 92void perf_pmu_disable(struct pmu *pmu)
76{ 93{
77 int *count = this_cpu_ptr(pmu->pmu_disable_count); 94 int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -240,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
240 put_ctx(ctx); 257 put_ctx(ctx);
241} 258}
242 259
243static inline u64 perf_clock(void)
244{
245 return local_clock();
246}
247
248/* 260/*
249 * Update the record of the current time in a context. 261 * Update the record of the current time in a context.
250 */ 262 */
@@ -256,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx)
256 ctx->timestamp = now; 268 ctx->timestamp = now;
257} 269}
258 270
271static u64 perf_event_time(struct perf_event *event)
272{
273 struct perf_event_context *ctx = event->ctx;
274 return ctx ? ctx->time : 0;
275}
276
259/* 277/*
260 * Update the total_time_enabled and total_time_running fields for a event. 278 * Update the total_time_enabled and total_time_running fields for a event.
261 */ 279 */
@@ -269,7 +287,7 @@ static void update_event_times(struct perf_event *event)
269 return; 287 return;
270 288
271 if (ctx->is_active) 289 if (ctx->is_active)
272 run_end = ctx->time; 290 run_end = perf_event_time(event);
273 else 291 else
274 run_end = event->tstamp_stopped; 292 run_end = event->tstamp_stopped;
275 293
@@ -278,7 +296,7 @@ static void update_event_times(struct perf_event *event)
278 if (event->state == PERF_EVENT_STATE_INACTIVE) 296 if (event->state == PERF_EVENT_STATE_INACTIVE)
279 run_end = event->tstamp_stopped; 297 run_end = event->tstamp_stopped;
280 else 298 else
281 run_end = ctx->time; 299 run_end = perf_event_time(event);
282 300
283 event->total_time_running = run_end - event->tstamp_running; 301 event->total_time_running = run_end - event->tstamp_running;
284} 302}
@@ -534,6 +552,7 @@ event_sched_out(struct perf_event *event,
534 struct perf_cpu_context *cpuctx, 552 struct perf_cpu_context *cpuctx,
535 struct perf_event_context *ctx) 553 struct perf_event_context *ctx)
536{ 554{
555 u64 tstamp = perf_event_time(event);
537 u64 delta; 556 u64 delta;
538 /* 557 /*
539 * An event which could not be activated because of 558 * An event which could not be activated because of
@@ -545,7 +564,7 @@ event_sched_out(struct perf_event *event,
545 && !event_filter_match(event)) { 564 && !event_filter_match(event)) {
546 delta = ctx->time - event->tstamp_stopped; 565 delta = ctx->time - event->tstamp_stopped;
547 event->tstamp_running += delta; 566 event->tstamp_running += delta;
548 event->tstamp_stopped = ctx->time; 567 event->tstamp_stopped = tstamp;
549 } 568 }
550 569
551 if (event->state != PERF_EVENT_STATE_ACTIVE) 570 if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -556,7 +575,7 @@ event_sched_out(struct perf_event *event,
556 event->pending_disable = 0; 575 event->pending_disable = 0;
557 event->state = PERF_EVENT_STATE_OFF; 576 event->state = PERF_EVENT_STATE_OFF;
558 } 577 }
559 event->tstamp_stopped = ctx->time; 578 event->tstamp_stopped = tstamp;
560 event->pmu->del(event, 0); 579 event->pmu->del(event, 0);
561 event->oncpu = -1; 580 event->oncpu = -1;
562 581
@@ -768,6 +787,8 @@ event_sched_in(struct perf_event *event,
768 struct perf_cpu_context *cpuctx, 787 struct perf_cpu_context *cpuctx,
769 struct perf_event_context *ctx) 788 struct perf_event_context *ctx)
770{ 789{
790 u64 tstamp = perf_event_time(event);
791
771 if (event->state <= PERF_EVENT_STATE_OFF) 792 if (event->state <= PERF_EVENT_STATE_OFF)
772 return 0; 793 return 0;
773 794
@@ -784,9 +805,9 @@ event_sched_in(struct perf_event *event,
784 return -EAGAIN; 805 return -EAGAIN;
785 } 806 }
786 807
787 event->tstamp_running += ctx->time - event->tstamp_stopped; 808 event->tstamp_running += tstamp - event->tstamp_stopped;
788 809
789 event->shadow_ctx_time = ctx->time - ctx->timestamp; 810 event->shadow_ctx_time = tstamp - ctx->timestamp;
790 811
791 if (!is_software_event(event)) 812 if (!is_software_event(event))
792 cpuctx->active_oncpu++; 813 cpuctx->active_oncpu++;
@@ -898,11 +919,13 @@ static int group_can_go_on(struct perf_event *event,
898static void add_event_to_ctx(struct perf_event *event, 919static void add_event_to_ctx(struct perf_event *event,
899 struct perf_event_context *ctx) 920 struct perf_event_context *ctx)
900{ 921{
922 u64 tstamp = perf_event_time(event);
923
901 list_add_event(event, ctx); 924 list_add_event(event, ctx);
902 perf_group_attach(event); 925 perf_group_attach(event);
903 event->tstamp_enabled = ctx->time; 926 event->tstamp_enabled = tstamp;
904 event->tstamp_running = ctx->time; 927 event->tstamp_running = tstamp;
905 event->tstamp_stopped = ctx->time; 928 event->tstamp_stopped = tstamp;
906} 929}
907 930
908/* 931/*
@@ -937,7 +960,7 @@ static void __perf_install_in_context(void *info)
937 960
938 add_event_to_ctx(event, ctx); 961 add_event_to_ctx(event, ctx);
939 962
940 if (event->cpu != -1 && event->cpu != smp_processor_id()) 963 if (!event_filter_match(event))
941 goto unlock; 964 goto unlock;
942 965
943 /* 966 /*
@@ -1042,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event,
1042 struct perf_event_context *ctx) 1065 struct perf_event_context *ctx)
1043{ 1066{
1044 struct perf_event *sub; 1067 struct perf_event *sub;
1068 u64 tstamp = perf_event_time(event);
1045 1069
1046 event->state = PERF_EVENT_STATE_INACTIVE; 1070 event->state = PERF_EVENT_STATE_INACTIVE;
1047 event->tstamp_enabled = ctx->time - event->total_time_enabled; 1071 event->tstamp_enabled = tstamp - event->total_time_enabled;
1048 list_for_each_entry(sub, &event->sibling_list, group_entry) { 1072 list_for_each_entry(sub, &event->sibling_list, group_entry) {
1049 if (sub->state >= PERF_EVENT_STATE_INACTIVE) { 1073 if (sub->state >= PERF_EVENT_STATE_INACTIVE)
1050 sub->tstamp_enabled = 1074 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
1051 ctx->time - sub->total_time_enabled;
1052 }
1053 } 1075 }
1054} 1076}
1055 1077
@@ -1082,7 +1104,7 @@ static void __perf_event_enable(void *info)
1082 goto unlock; 1104 goto unlock;
1083 __perf_event_mark_enabled(event, ctx); 1105 __perf_event_mark_enabled(event, ctx);
1084 1106
1085 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1107 if (!event_filter_match(event))
1086 goto unlock; 1108 goto unlock;
1087 1109
1088 /* 1110 /*
@@ -1193,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1193 return 0; 1215 return 0;
1194} 1216}
1195 1217
1196enum event_type_t {
1197 EVENT_FLEXIBLE = 0x1,
1198 EVENT_PINNED = 0x2,
1199 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
1200};
1201
1202static void ctx_sched_out(struct perf_event_context *ctx, 1218static void ctx_sched_out(struct perf_event_context *ctx,
1203 struct perf_cpu_context *cpuctx, 1219 struct perf_cpu_context *cpuctx,
1204 enum event_type_t event_type) 1220 enum event_type_t event_type)
@@ -1435,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
1435 list_for_each_entry(event, &ctx->pinned_groups, group_entry) { 1451 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1436 if (event->state <= PERF_EVENT_STATE_OFF) 1452 if (event->state <= PERF_EVENT_STATE_OFF)
1437 continue; 1453 continue;
1438 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1454 if (!event_filter_match(event))
1439 continue; 1455 continue;
1440 1456
1441 if (group_can_go_on(event, cpuctx, 1)) 1457 if (group_can_go_on(event, cpuctx, 1))
@@ -1467,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1467 * Listen to the 'cpu' scheduling filter constraint 1483 * Listen to the 'cpu' scheduling filter constraint
1468 * of events: 1484 * of events:
1469 */ 1485 */
1470 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1486 if (!event_filter_match(event))
1471 continue; 1487 continue;
1472 1488
1473 if (group_can_go_on(event, cpuctx, can_add_hw)) { 1489 if (group_can_go_on(event, cpuctx, can_add_hw)) {
@@ -1694,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
1694 if (event->state != PERF_EVENT_STATE_ACTIVE) 1710 if (event->state != PERF_EVENT_STATE_ACTIVE)
1695 continue; 1711 continue;
1696 1712
1697 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1713 if (!event_filter_match(event))
1698 continue; 1714 continue;
1699 1715
1700 hwc = &event->hw; 1716 hwc = &event->hw;
@@ -3893,7 +3909,7 @@ static int perf_event_task_match(struct perf_event *event)
3893 if (event->state < PERF_EVENT_STATE_INACTIVE) 3909 if (event->state < PERF_EVENT_STATE_INACTIVE)
3894 return 0; 3910 return 0;
3895 3911
3896 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3912 if (!event_filter_match(event))
3897 return 0; 3913 return 0;
3898 3914
3899 if (event->attr.comm || event->attr.mmap || 3915 if (event->attr.comm || event->attr.mmap ||
@@ -4030,7 +4046,7 @@ static int perf_event_comm_match(struct perf_event *event)
4030 if (event->state < PERF_EVENT_STATE_INACTIVE) 4046 if (event->state < PERF_EVENT_STATE_INACTIVE)
4031 return 0; 4047 return 0;
4032 4048
4033 if (event->cpu != -1 && event->cpu != smp_processor_id()) 4049 if (!event_filter_match(event))
4034 return 0; 4050 return 0;
4035 4051
4036 if (event->attr.comm) 4052 if (event->attr.comm)
@@ -4178,7 +4194,7 @@ static int perf_event_mmap_match(struct perf_event *event,
4178 if (event->state < PERF_EVENT_STATE_INACTIVE) 4194 if (event->state < PERF_EVENT_STATE_INACTIVE)
4179 return 0; 4195 return 0;
4180 4196
4181 if (event->cpu != -1 && event->cpu != smp_processor_id()) 4197 if (!event_filter_match(event))
4182 return 0; 4198 return 0;
4183 4199
4184 if ((!executable && event->attr.mmap_data) || 4200 if ((!executable && event->attr.mmap_data) ||
@@ -4648,7 +4664,7 @@ int perf_swevent_get_recursion_context(void)
4648} 4664}
4649EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4665EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4650 4666
4651void inline perf_swevent_put_recursion_context(int rctx) 4667inline void perf_swevent_put_recursion_context(int rctx)
4652{ 4668{
4653 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); 4669 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4654 4670
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index a5aff3ebad38..265729966ece 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG
100 depends on PM_ADVANCED_DEBUG 100 depends on PM_ADVANCED_DEBUG
101 default n 101 default n
102 102
103config SUSPEND_NVS
104 bool
105
106config SUSPEND 103config SUSPEND
107 bool "Suspend to RAM and standby" 104 bool "Suspend to RAM and standby"
108 depends on PM && ARCH_SUSPEND_POSSIBLE 105 depends on PM && ARCH_SUSPEND_POSSIBLE
109 select SUSPEND_NVS if HAS_IOMEM
110 default y 106 default y
111 ---help--- 107 ---help---
112 Allow the system to enter sleep states in which main memory is 108 Allow the system to enter sleep states in which main memory is
@@ -140,7 +136,6 @@ config HIBERNATION
140 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 136 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
141 select LZO_COMPRESS 137 select LZO_COMPRESS
142 select LZO_DECOMPRESS 138 select LZO_DECOMPRESS
143 select SUSPEND_NVS if HAS_IOMEM
144 ---help--- 139 ---help---
145 Enable the suspend to disk (STD) functionality, which is usually 140 Enable the suspend to disk (STD) functionality, which is usually
146 called "hibernation" in user interfaces. STD checkpoints the 141 called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index f9063c6b185d..c350e18b53e3 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,7 +1,4 @@
1 1ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
2ifeq ($(CONFIG_PM_DEBUG),y)
3EXTRA_CFLAGS += -DDEBUG
4endif
5 2
6obj-$(CONFIG_PM) += main.o 3obj-$(CONFIG_PM) += main.o
7obj-$(CONFIG_PM_SLEEP) += console.o 4obj-$(CONFIG_PM_SLEEP) += console.o
@@ -10,6 +7,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o
10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 7obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 8obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
12 block_io.o 9 block_io.o
13obj-$(CONFIG_SUSPEND_NVS) += nvs.o
14 10
15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 11obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 048d0b514831..1832bd264219 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -51,18 +51,18 @@ enum {
51 51
52static int hibernation_mode = HIBERNATION_SHUTDOWN; 52static int hibernation_mode = HIBERNATION_SHUTDOWN;
53 53
54static struct platform_hibernation_ops *hibernation_ops; 54static const struct platform_hibernation_ops *hibernation_ops;
55 55
56/** 56/**
57 * hibernation_set_ops - set the global hibernate operations 57 * hibernation_set_ops - set the global hibernate operations
58 * @ops: the hibernation operations to use in subsequent hibernation transitions 58 * @ops: the hibernation operations to use in subsequent hibernation transitions
59 */ 59 */
60 60
61void hibernation_set_ops(struct platform_hibernation_ops *ops) 61void hibernation_set_ops(const struct platform_hibernation_ops *ops)
62{ 62{
63 if (ops && !(ops->begin && ops->end && ops->pre_snapshot 63 if (ops && !(ops->begin && ops->end && ops->pre_snapshot
64 && ops->prepare && ops->finish && ops->enter && ops->pre_restore 64 && ops->prepare && ops->finish && ops->enter && ops->pre_restore
65 && ops->restore_cleanup)) { 65 && ops->restore_cleanup && ops->leave)) {
66 WARN_ON(1); 66 WARN_ON(1);
67 return; 67 return;
68 } 68 }
@@ -278,7 +278,7 @@ static int create_image(int platform_mode)
278 goto Enable_irqs; 278 goto Enable_irqs;
279 } 279 }
280 280
281 if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events()) 281 if (hibernation_test(TEST_CORE) || pm_wakeup_pending())
282 goto Power_up; 282 goto Power_up;
283 283
284 in_suspend = 1; 284 in_suspend = 1;
@@ -516,7 +516,7 @@ int hibernation_platform_enter(void)
516 516
517 local_irq_disable(); 517 local_irq_disable();
518 sysdev_suspend(PMSG_HIBERNATE); 518 sysdev_suspend(PMSG_HIBERNATE);
519 if (!pm_check_wakeup_events()) { 519 if (pm_wakeup_pending()) {
520 error = -EAGAIN; 520 error = -EAGAIN;
521 goto Power_up; 521 goto Power_up;
522 } 522 }
@@ -647,6 +647,7 @@ int hibernate(void)
647 swsusp_free(); 647 swsusp_free();
648 if (!error) 648 if (!error)
649 power_down(); 649 power_down();
650 in_suspend = 0;
650 pm_restore_gfp_mask(); 651 pm_restore_gfp_mask();
651 } else { 652 } else {
652 pr_debug("PM: Image restored successfully.\n"); 653 pr_debug("PM: Image restored successfully.\n");
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c
deleted file mode 100644
index 1836db60bbb6..000000000000
--- a/kernel/power/nvs.c
+++ /dev/null
@@ -1,136 +0,0 @@
1/*
2 * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
3 *
4 * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 *
6 * This file is released under the GPLv2.
7 */
8
9#include <linux/io.h>
10#include <linux/kernel.h>
11#include <linux/list.h>
12#include <linux/mm.h>
13#include <linux/slab.h>
14#include <linux/suspend.h>
15
16/*
17 * Platforms, like ACPI, may want us to save some memory used by them during
18 * suspend and to restore the contents of this memory during the subsequent
19 * resume. The code below implements a mechanism allowing us to do that.
20 */
21
22struct nvs_page {
23 unsigned long phys_start;
24 unsigned int size;
25 void *kaddr;
26 void *data;
27 struct list_head node;
28};
29
30static LIST_HEAD(nvs_list);
31
32/**
33 * suspend_nvs_register - register platform NVS memory region to save
34 * @start - physical address of the region
35 * @size - size of the region
36 *
37 * The NVS region need not be page-aligned (both ends) and we arrange
38 * things so that the data from page-aligned addresses in this region will
39 * be copied into separate RAM pages.
40 */
41int suspend_nvs_register(unsigned long start, unsigned long size)
42{
43 struct nvs_page *entry, *next;
44
45 while (size > 0) {
46 unsigned int nr_bytes;
47
48 entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
49 if (!entry)
50 goto Error;
51
52 list_add_tail(&entry->node, &nvs_list);
53 entry->phys_start = start;
54 nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
55 entry->size = (size < nr_bytes) ? size : nr_bytes;
56
57 start += entry->size;
58 size -= entry->size;
59 }
60 return 0;
61
62 Error:
63 list_for_each_entry_safe(entry, next, &nvs_list, node) {
64 list_del(&entry->node);
65 kfree(entry);
66 }
67 return -ENOMEM;
68}
69
70/**
71 * suspend_nvs_free - free data pages allocated for saving NVS regions
72 */
73void suspend_nvs_free(void)
74{
75 struct nvs_page *entry;
76
77 list_for_each_entry(entry, &nvs_list, node)
78 if (entry->data) {
79 free_page((unsigned long)entry->data);
80 entry->data = NULL;
81 if (entry->kaddr) {
82 iounmap(entry->kaddr);
83 entry->kaddr = NULL;
84 }
85 }
86}
87
88/**
89 * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
90 */
91int suspend_nvs_alloc(void)
92{
93 struct nvs_page *entry;
94
95 list_for_each_entry(entry, &nvs_list, node) {
96 entry->data = (void *)__get_free_page(GFP_KERNEL);
97 if (!entry->data) {
98 suspend_nvs_free();
99 return -ENOMEM;
100 }
101 }
102 return 0;
103}
104
105/**
106 * suspend_nvs_save - save NVS memory regions
107 */
108void suspend_nvs_save(void)
109{
110 struct nvs_page *entry;
111
112 printk(KERN_INFO "PM: Saving platform NVS memory\n");
113
114 list_for_each_entry(entry, &nvs_list, node)
115 if (entry->data) {
116 entry->kaddr = ioremap(entry->phys_start, entry->size);
117 memcpy(entry->data, entry->kaddr, entry->size);
118 }
119}
120
121/**
122 * suspend_nvs_restore - restore NVS memory regions
123 *
124 * This function is going to be called with interrupts disabled, so it
125 * cannot iounmap the virtual addresses used to access the NVS region.
126 */
127void suspend_nvs_restore(void)
128{
129 struct nvs_page *entry;
130
131 printk(KERN_INFO "PM: Restoring platform NVS memory\n");
132
133 list_for_each_entry(entry, &nvs_list, node)
134 if (entry->data)
135 memcpy(entry->kaddr, entry->data, entry->size);
136}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e50b4c1b2a0f..d6d2a10320e0 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -64,6 +64,12 @@ static int try_to_freeze_tasks(bool sig_only)
64 * perturb a task in TASK_STOPPED or TASK_TRACED. 64 * perturb a task in TASK_STOPPED or TASK_TRACED.
65 * It is "frozen enough". If the task does wake 65 * It is "frozen enough". If the task does wake
66 * up, it will immediately call try_to_freeze. 66 * up, it will immediately call try_to_freeze.
67 *
68 * Because freeze_task() goes through p's
69 * scheduler lock after setting TIF_FREEZE, it's
70 * guaranteed that either we see TASK_RUNNING or
71 * try_to_stop() after schedule() in ptrace/signal
72 * stop sees TIF_FREEZE.
67 */ 73 */
68 if (!task_is_stopped_or_traced(p) && 74 if (!task_is_stopped_or_traced(p) &&
69 !freezer_should_skip(p)) 75 !freezer_should_skip(p))
@@ -79,7 +85,7 @@ static int try_to_freeze_tasks(bool sig_only)
79 if (!todo || time_after(jiffies, end_time)) 85 if (!todo || time_after(jiffies, end_time))
80 break; 86 break;
81 87
82 if (!pm_check_wakeup_events()) { 88 if (pm_wakeup_pending()) {
83 wakeup = true; 89 wakeup = true;
84 break; 90 break;
85 } 91 }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 031d5e3a6197..de6f86bfa303 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -31,13 +31,13 @@ const char *const pm_states[PM_SUSPEND_MAX] = {
31 [PM_SUSPEND_MEM] = "mem", 31 [PM_SUSPEND_MEM] = "mem",
32}; 32};
33 33
34static struct platform_suspend_ops *suspend_ops; 34static const struct platform_suspend_ops *suspend_ops;
35 35
36/** 36/**
37 * suspend_set_ops - Set the global suspend method table. 37 * suspend_set_ops - Set the global suspend method table.
38 * @ops: Pointer to ops structure. 38 * @ops: Pointer to ops structure.
39 */ 39 */
40void suspend_set_ops(struct platform_suspend_ops *ops) 40void suspend_set_ops(const struct platform_suspend_ops *ops)
41{ 41{
42 mutex_lock(&pm_mutex); 42 mutex_lock(&pm_mutex);
43 suspend_ops = ops; 43 suspend_ops = ops;
@@ -164,7 +164,7 @@ static int suspend_enter(suspend_state_t state)
164 164
165 error = sysdev_suspend(PMSG_SUSPEND); 165 error = sysdev_suspend(PMSG_SUSPEND);
166 if (!error) { 166 if (!error) {
167 if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) { 167 if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
168 error = suspend_ops->enter(state); 168 error = suspend_ops->enter(state);
169 events_check_enabled = false; 169 events_check_enabled = false;
170 } 170 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8c7e4832b9be..7c97c3a0eee3 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void)
224 return res; 224 return res;
225 225
226 root_swap = res; 226 root_swap = res;
227 res = blkdev_get(hib_resume_bdev, FMODE_WRITE); 227 res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
228 if (res) 228 if (res)
229 return res; 229 return res;
230 230
@@ -888,7 +888,7 @@ out_finish:
888/** 888/**
889 * swsusp_read - read the hibernation image. 889 * swsusp_read - read the hibernation image.
890 * @flags_p: flags passed by the "frozen" kernel in the image header should 890 * @flags_p: flags passed by the "frozen" kernel in the image header should
891 * be written into this memeory location 891 * be written into this memory location
892 */ 892 */
893 893
894int swsusp_read(unsigned int *flags_p) 894int swsusp_read(unsigned int *flags_p)
@@ -930,7 +930,8 @@ int swsusp_check(void)
930{ 930{
931 int error; 931 int error;
932 932
933 hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 933 hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
934 FMODE_READ, NULL);
934 if (!IS_ERR(hib_resume_bdev)) { 935 if (!IS_ERR(hib_resume_bdev)) {
935 set_blocksize(hib_resume_bdev, PAGE_SIZE); 936 set_blocksize(hib_resume_bdev, PAGE_SIZE);
936 clear_page(swsusp_header); 937 clear_page(swsusp_header);
diff --git a/kernel/printk.c b/kernel/printk.c
index ab3ffc5b3b64..53d9a9ec88e6 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -39,16 +39,11 @@
39#include <linux/syslog.h> 39#include <linux/syslog.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/rculist.h>
42 43
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44 45
45/* 46/*
46 * for_each_console() allows you to iterate on each console
47 */
48#define for_each_console(con) \
49 for (con = console_drivers; con != NULL; con = con->next)
50
51/*
52 * Architectures can override it: 47 * Architectures can override it:
53 */ 48 */
54void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) 49void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
@@ -279,12 +274,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
279 * at open time. 274 * at open time.
280 */ 275 */
281 if (type == SYSLOG_ACTION_OPEN || !from_file) { 276 if (type == SYSLOG_ACTION_OPEN || !from_file) {
282 if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) 277 if (dmesg_restrict && !capable(CAP_SYSLOG))
283 return -EPERM; 278 goto warn; /* switch to return -EPERM after 2.6.39 */
284 if ((type != SYSLOG_ACTION_READ_ALL && 279 if ((type != SYSLOG_ACTION_READ_ALL &&
285 type != SYSLOG_ACTION_SIZE_BUFFER) && 280 type != SYSLOG_ACTION_SIZE_BUFFER) &&
286 !capable(CAP_SYS_ADMIN)) 281 !capable(CAP_SYSLOG))
287 return -EPERM; 282 goto warn; /* switch to return -EPERM after 2.6.39 */
288 } 283 }
289 284
290 error = security_syslog(type); 285 error = security_syslog(type);
@@ -428,6 +423,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
428 } 423 }
429out: 424out:
430 return error; 425 return error;
426warn:
427 /* remove after 2.6.39 */
428 if (capable(CAP_SYS_ADMIN))
429 WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
430 "but no CAP_SYSLOG (deprecated and denied).\n");
431 return -EPERM;
431} 432}
432 433
433SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 434SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
@@ -1359,6 +1360,7 @@ void register_console(struct console *newcon)
1359 spin_unlock_irqrestore(&logbuf_lock, flags); 1360 spin_unlock_irqrestore(&logbuf_lock, flags);
1360 } 1361 }
1361 release_console_sem(); 1362 release_console_sem();
1363 console_sysfs_notify();
1362 1364
1363 /* 1365 /*
1364 * By unregistering the bootconsoles after we enable the real console 1366 * By unregistering the bootconsoles after we enable the real console
@@ -1417,6 +1419,7 @@ int unregister_console(struct console *console)
1417 console_drivers->flags |= CON_CONSDEV; 1419 console_drivers->flags |= CON_CONSDEV;
1418 1420
1419 release_console_sem(); 1421 release_console_sem();
1422 console_sysfs_notify();
1420 return res; 1423 return res;
1421} 1424}
1422EXPORT_SYMBOL(unregister_console); 1425EXPORT_SYMBOL(unregister_console);
@@ -1500,7 +1503,7 @@ int kmsg_dump_register(struct kmsg_dumper *dumper)
1500 /* Don't allow registering multiple times */ 1503 /* Don't allow registering multiple times */
1501 if (!dumper->registered) { 1504 if (!dumper->registered) {
1502 dumper->registered = 1; 1505 dumper->registered = 1;
1503 list_add_tail(&dumper->list, &dump_list); 1506 list_add_tail_rcu(&dumper->list, &dump_list);
1504 err = 0; 1507 err = 0;
1505 } 1508 }
1506 spin_unlock_irqrestore(&dump_list_lock, flags); 1509 spin_unlock_irqrestore(&dump_list_lock, flags);
@@ -1524,29 +1527,16 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper)
1524 spin_lock_irqsave(&dump_list_lock, flags); 1527 spin_lock_irqsave(&dump_list_lock, flags);
1525 if (dumper->registered) { 1528 if (dumper->registered) {
1526 dumper->registered = 0; 1529 dumper->registered = 0;
1527 list_del(&dumper->list); 1530 list_del_rcu(&dumper->list);
1528 err = 0; 1531 err = 0;
1529 } 1532 }
1530 spin_unlock_irqrestore(&dump_list_lock, flags); 1533 spin_unlock_irqrestore(&dump_list_lock, flags);
1534 synchronize_rcu();
1531 1535
1532 return err; 1536 return err;
1533} 1537}
1534EXPORT_SYMBOL_GPL(kmsg_dump_unregister); 1538EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
1535 1539
1536static const char * const kmsg_reasons[] = {
1537 [KMSG_DUMP_OOPS] = "oops",
1538 [KMSG_DUMP_PANIC] = "panic",
1539 [KMSG_DUMP_KEXEC] = "kexec",
1540};
1541
1542static const char *kmsg_to_str(enum kmsg_dump_reason reason)
1543{
1544 if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
1545 return "unknown";
1546
1547 return kmsg_reasons[reason];
1548}
1549
1550/** 1540/**
1551 * kmsg_dump - dump kernel log to kernel message dumpers. 1541 * kmsg_dump - dump kernel log to kernel message dumpers.
1552 * @reason: the reason (oops, panic etc) for dumping 1542 * @reason: the reason (oops, panic etc) for dumping
@@ -1585,13 +1575,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
1585 l2 = chars; 1575 l2 = chars;
1586 } 1576 }
1587 1577
1588 if (!spin_trylock_irqsave(&dump_list_lock, flags)) { 1578 rcu_read_lock();
1589 printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n", 1579 list_for_each_entry_rcu(dumper, &dump_list, list)
1590 kmsg_to_str(reason));
1591 return;
1592 }
1593 list_for_each_entry(dumper, &dump_list, list)
1594 dumper->dump(dumper, reason, s1, l1, s2, l2); 1580 dumper->dump(dumper, reason, s1, l1, s2, l2);
1595 spin_unlock_irqrestore(&dump_list_lock, flags); 1581 rcu_read_unlock();
1596} 1582}
1597#endif 1583#endif
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 034493724749..0c343b9a46d5 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -189,7 +189,8 @@ static int rcu_kthread(void *arg)
189 unsigned long flags; 189 unsigned long flags;
190 190
191 for (;;) { 191 for (;;) {
192 wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); 192 wait_event_interruptible(rcu_kthread_wq,
193 have_rcu_kthread_work != 0);
193 morework = rcu_boost(); 194 morework = rcu_boost();
194 local_irq_save(flags); 195 local_irq_save(flags);
195 work = have_rcu_kthread_work; 196 work = have_rcu_kthread_work;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d0ddfea6579d..dd4aea806f8e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -364,8 +364,8 @@ void rcu_irq_exit(void)
364 WARN_ON_ONCE(rdtp->dynticks & 0x1); 364 WARN_ON_ONCE(rdtp->dynticks & 0x1);
365 365
366 /* If the interrupt queued a callback, get out of dyntick mode. */ 366 /* If the interrupt queued a callback, get out of dyntick mode. */
367 if (__get_cpu_var(rcu_sched_data).nxtlist || 367 if (__this_cpu_read(rcu_sched_data.nxtlist) ||
368 __get_cpu_var(rcu_bh_data).nxtlist) 368 __this_cpu_read(rcu_bh_data.nxtlist))
369 set_need_resched(); 369 set_need_resched();
370} 370}
371 371
diff --git a/kernel/sched.c b/kernel/sched.c
index 04949089e760..ea3e5eff3878 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -278,14 +278,12 @@ struct task_group {
278#endif 278#endif
279}; 279};
280 280
281#define root_task_group init_task_group
282
283/* task_group_lock serializes the addition/removal of task groups */ 281/* task_group_lock serializes the addition/removal of task groups */
284static DEFINE_SPINLOCK(task_group_lock); 282static DEFINE_SPINLOCK(task_group_lock);
285 283
286#ifdef CONFIG_FAIR_GROUP_SCHED 284#ifdef CONFIG_FAIR_GROUP_SCHED
287 285
288# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 286# define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
289 287
290/* 288/*
291 * A weight of 0 or 1 can cause arithmetics problems. 289 * A weight of 0 or 1 can cause arithmetics problems.
@@ -298,13 +296,13 @@ static DEFINE_SPINLOCK(task_group_lock);
298#define MIN_SHARES 2 296#define MIN_SHARES 2
299#define MAX_SHARES (1UL << 18) 297#define MAX_SHARES (1UL << 18)
300 298
301static int init_task_group_load = INIT_TASK_GROUP_LOAD; 299static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
302#endif 300#endif
303 301
304/* Default task group. 302/* Default task group.
305 * Every task in system belong to this group at bootup. 303 * Every task in system belong to this group at bootup.
306 */ 304 */
307struct task_group init_task_group; 305struct task_group root_task_group;
308 306
309#endif /* CONFIG_CGROUP_SCHED */ 307#endif /* CONFIG_CGROUP_SCHED */
310 308
@@ -743,7 +741,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
743 buf[cnt] = 0; 741 buf[cnt] = 0;
744 cmp = strstrip(buf); 742 cmp = strstrip(buf);
745 743
746 if (strncmp(buf, "NO_", 3) == 0) { 744 if (strncmp(cmp, "NO_", 3) == 0) {
747 neg = 1; 745 neg = 1;
748 cmp += 3; 746 cmp += 3;
749 } 747 }
@@ -2507,7 +2505,7 @@ out:
2507 * try_to_wake_up_local - try to wake up a local task with rq lock held 2505 * try_to_wake_up_local - try to wake up a local task with rq lock held
2508 * @p: the thread to be awakened 2506 * @p: the thread to be awakened
2509 * 2507 *
2510 * Put @p on the run-queue if it's not alredy there. The caller must 2508 * Put @p on the run-queue if it's not already there. The caller must
2511 * ensure that this_rq() is locked, @p is bound to this_rq() and not 2509 * ensure that this_rq() is locked, @p is bound to this_rq() and not
2512 * the current task. this_rq() stays locked over invocation. 2510 * the current task. this_rq() stays locked over invocation.
2513 */ 2511 */
@@ -7848,7 +7846,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
7848 cfs_rq->tg = tg; 7846 cfs_rq->tg = tg;
7849 7847
7850 tg->se[cpu] = se; 7848 tg->se[cpu] = se;
7851 /* se could be NULL for init_task_group */ 7849 /* se could be NULL for root_task_group */
7852 if (!se) 7850 if (!se)
7853 return; 7851 return;
7854 7852
@@ -7908,18 +7906,18 @@ void __init sched_init(void)
7908 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); 7906 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
7909 7907
7910#ifdef CONFIG_FAIR_GROUP_SCHED 7908#ifdef CONFIG_FAIR_GROUP_SCHED
7911 init_task_group.se = (struct sched_entity **)ptr; 7909 root_task_group.se = (struct sched_entity **)ptr;
7912 ptr += nr_cpu_ids * sizeof(void **); 7910 ptr += nr_cpu_ids * sizeof(void **);
7913 7911
7914 init_task_group.cfs_rq = (struct cfs_rq **)ptr; 7912 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
7915 ptr += nr_cpu_ids * sizeof(void **); 7913 ptr += nr_cpu_ids * sizeof(void **);
7916 7914
7917#endif /* CONFIG_FAIR_GROUP_SCHED */ 7915#endif /* CONFIG_FAIR_GROUP_SCHED */
7918#ifdef CONFIG_RT_GROUP_SCHED 7916#ifdef CONFIG_RT_GROUP_SCHED
7919 init_task_group.rt_se = (struct sched_rt_entity **)ptr; 7917 root_task_group.rt_se = (struct sched_rt_entity **)ptr;
7920 ptr += nr_cpu_ids * sizeof(void **); 7918 ptr += nr_cpu_ids * sizeof(void **);
7921 7919
7922 init_task_group.rt_rq = (struct rt_rq **)ptr; 7920 root_task_group.rt_rq = (struct rt_rq **)ptr;
7923 ptr += nr_cpu_ids * sizeof(void **); 7921 ptr += nr_cpu_ids * sizeof(void **);
7924 7922
7925#endif /* CONFIG_RT_GROUP_SCHED */ 7923#endif /* CONFIG_RT_GROUP_SCHED */
@@ -7939,13 +7937,13 @@ void __init sched_init(void)
7939 global_rt_period(), global_rt_runtime()); 7937 global_rt_period(), global_rt_runtime());
7940 7938
7941#ifdef CONFIG_RT_GROUP_SCHED 7939#ifdef CONFIG_RT_GROUP_SCHED
7942 init_rt_bandwidth(&init_task_group.rt_bandwidth, 7940 init_rt_bandwidth(&root_task_group.rt_bandwidth,
7943 global_rt_period(), global_rt_runtime()); 7941 global_rt_period(), global_rt_runtime());
7944#endif /* CONFIG_RT_GROUP_SCHED */ 7942#endif /* CONFIG_RT_GROUP_SCHED */
7945 7943
7946#ifdef CONFIG_CGROUP_SCHED 7944#ifdef CONFIG_CGROUP_SCHED
7947 list_add(&init_task_group.list, &task_groups); 7945 list_add(&root_task_group.list, &task_groups);
7948 INIT_LIST_HEAD(&init_task_group.children); 7946 INIT_LIST_HEAD(&root_task_group.children);
7949 autogroup_init(&init_task); 7947 autogroup_init(&init_task);
7950#endif /* CONFIG_CGROUP_SCHED */ 7948#endif /* CONFIG_CGROUP_SCHED */
7951 7949
@@ -7960,34 +7958,34 @@ void __init sched_init(void)
7960 init_cfs_rq(&rq->cfs, rq); 7958 init_cfs_rq(&rq->cfs, rq);
7961 init_rt_rq(&rq->rt, rq); 7959 init_rt_rq(&rq->rt, rq);
7962#ifdef CONFIG_FAIR_GROUP_SCHED 7960#ifdef CONFIG_FAIR_GROUP_SCHED
7963 init_task_group.shares = init_task_group_load; 7961 root_task_group.shares = root_task_group_load;
7964 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); 7962 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
7965 /* 7963 /*
7966 * How much cpu bandwidth does init_task_group get? 7964 * How much cpu bandwidth does root_task_group get?
7967 * 7965 *
7968 * In case of task-groups formed thr' the cgroup filesystem, it 7966 * In case of task-groups formed thr' the cgroup filesystem, it
7969 * gets 100% of the cpu resources in the system. This overall 7967 * gets 100% of the cpu resources in the system. This overall
7970 * system cpu resource is divided among the tasks of 7968 * system cpu resource is divided among the tasks of
7971 * init_task_group and its child task-groups in a fair manner, 7969 * root_task_group and its child task-groups in a fair manner,
7972 * based on each entity's (task or task-group's) weight 7970 * based on each entity's (task or task-group's) weight
7973 * (se->load.weight). 7971 * (se->load.weight).
7974 * 7972 *
7975 * In other words, if init_task_group has 10 tasks of weight 7973 * In other words, if root_task_group has 10 tasks of weight
7976 * 1024) and two child groups A0 and A1 (of weight 1024 each), 7974 * 1024) and two child groups A0 and A1 (of weight 1024 each),
7977 * then A0's share of the cpu resource is: 7975 * then A0's share of the cpu resource is:
7978 * 7976 *
7979 * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% 7977 * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
7980 * 7978 *
7981 * We achieve this by letting init_task_group's tasks sit 7979 * We achieve this by letting root_task_group's tasks sit
7982 * directly in rq->cfs (i.e init_task_group->se[] = NULL). 7980 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
7983 */ 7981 */
7984 init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL); 7982 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
7985#endif /* CONFIG_FAIR_GROUP_SCHED */ 7983#endif /* CONFIG_FAIR_GROUP_SCHED */
7986 7984
7987 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; 7985 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
7988#ifdef CONFIG_RT_GROUP_SCHED 7986#ifdef CONFIG_RT_GROUP_SCHED
7989 INIT_LIST_HEAD(&rq->leaf_rt_rq_list); 7987 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
7990 init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL); 7988 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
7991#endif 7989#endif
7992 7990
7993 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7991 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8379,6 +8377,7 @@ static void free_sched_group(struct task_group *tg)
8379{ 8377{
8380 free_fair_sched_group(tg); 8378 free_fair_sched_group(tg);
8381 free_rt_sched_group(tg); 8379 free_rt_sched_group(tg);
8380 autogroup_free(tg);
8382 kfree(tg); 8381 kfree(tg);
8383} 8382}
8384 8383
@@ -8812,7 +8811,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
8812 8811
8813 if (!cgrp->parent) { 8812 if (!cgrp->parent) {
8814 /* This is early initialization for the top cgroup */ 8813 /* This is early initialization for the top cgroup */
8815 return &init_task_group.css; 8814 return &root_task_group.css;
8816 } 8815 }
8817 8816
8818 parent = cgroup_tg(cgrp->parent); 8817 parent = cgroup_tg(cgrp->parent);
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index c80fedcd476b..32a723b8f84c 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -9,10 +9,10 @@ unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
9static struct autogroup autogroup_default; 9static struct autogroup autogroup_default;
10static atomic_t autogroup_seq_nr; 10static atomic_t autogroup_seq_nr;
11 11
12static void autogroup_init(struct task_struct *init_task) 12static void __init autogroup_init(struct task_struct *init_task)
13{ 13{
14 autogroup_default.tg = &init_task_group; 14 autogroup_default.tg = &root_task_group;
15 init_task_group.autogroup = &autogroup_default; 15 root_task_group.autogroup = &autogroup_default;
16 kref_init(&autogroup_default.kref); 16 kref_init(&autogroup_default.kref);
17 init_rwsem(&autogroup_default.lock); 17 init_rwsem(&autogroup_default.lock);
18 init_task->signal->autogroup = &autogroup_default; 18 init_task->signal->autogroup = &autogroup_default;
@@ -63,7 +63,7 @@ static inline struct autogroup *autogroup_create(void)
63 if (!ag) 63 if (!ag)
64 goto out_fail; 64 goto out_fail;
65 65
66 tg = sched_create_group(&init_task_group); 66 tg = sched_create_group(&root_task_group);
67 67
68 if (IS_ERR(tg)) 68 if (IS_ERR(tg))
69 goto out_free; 69 goto out_free;
diff --git a/kernel/smp.c b/kernel/smp.c
index 12ed8b013e2d..4ec30e069987 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -13,6 +13,7 @@
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15 15
16#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
16static struct { 17static struct {
17 struct list_head queue; 18 struct list_head queue;
18 raw_spinlock_t lock; 19 raw_spinlock_t lock;
@@ -529,3 +530,21 @@ void ipi_call_unlock_irq(void)
529{ 530{
530 raw_spin_unlock_irq(&call_function.lock); 531 raw_spin_unlock_irq(&call_function.lock);
531} 532}
533#endif /* USE_GENERIC_SMP_HELPERS */
534
535/*
536 * Call a function on all processors
537 */
538int on_each_cpu(void (*func) (void *info), void *info, int wait)
539{
540 int ret = 0;
541
542 preempt_disable();
543 ret = smp_call_function(func, info, wait);
544 local_irq_disable();
545 func(info);
546 local_irq_enable();
547 preempt_enable();
548 return ret;
549}
550EXPORT_SYMBOL(on_each_cpu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d4d918a91881..68eb5efec388 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -70,7 +70,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
70static void wakeup_softirqd(void) 70static void wakeup_softirqd(void)
71{ 71{
72 /* Interrupts are disabled: no need to stop preemption */ 72 /* Interrupts are disabled: no need to stop preemption */
73 struct task_struct *tsk = __get_cpu_var(ksoftirqd); 73 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
74 74
75 if (tsk && tsk->state != TASK_RUNNING) 75 if (tsk && tsk->state != TASK_RUNNING)
76 wake_up_process(tsk); 76 wake_up_process(tsk);
@@ -388,8 +388,8 @@ void __tasklet_schedule(struct tasklet_struct *t)
388 388
389 local_irq_save(flags); 389 local_irq_save(flags);
390 t->next = NULL; 390 t->next = NULL;
391 *__get_cpu_var(tasklet_vec).tail = t; 391 *__this_cpu_read(tasklet_vec.tail) = t;
392 __get_cpu_var(tasklet_vec).tail = &(t->next); 392 __this_cpu_write(tasklet_vec.tail, &(t->next));
393 raise_softirq_irqoff(TASKLET_SOFTIRQ); 393 raise_softirq_irqoff(TASKLET_SOFTIRQ);
394 local_irq_restore(flags); 394 local_irq_restore(flags);
395} 395}
@@ -402,8 +402,8 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
402 402
403 local_irq_save(flags); 403 local_irq_save(flags);
404 t->next = NULL; 404 t->next = NULL;
405 *__get_cpu_var(tasklet_hi_vec).tail = t; 405 *__this_cpu_read(tasklet_hi_vec.tail) = t;
406 __get_cpu_var(tasklet_hi_vec).tail = &(t->next); 406 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
407 raise_softirq_irqoff(HI_SOFTIRQ); 407 raise_softirq_irqoff(HI_SOFTIRQ);
408 local_irq_restore(flags); 408 local_irq_restore(flags);
409} 409}
@@ -414,8 +414,8 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
414{ 414{
415 BUG_ON(!irqs_disabled()); 415 BUG_ON(!irqs_disabled());
416 416
417 t->next = __get_cpu_var(tasklet_hi_vec).head; 417 t->next = __this_cpu_read(tasklet_hi_vec.head);
418 __get_cpu_var(tasklet_hi_vec).head = t; 418 __this_cpu_write(tasklet_hi_vec.head, t);
419 __raise_softirq_irqoff(HI_SOFTIRQ); 419 __raise_softirq_irqoff(HI_SOFTIRQ);
420} 420}
421 421
@@ -426,9 +426,9 @@ static void tasklet_action(struct softirq_action *a)
426 struct tasklet_struct *list; 426 struct tasklet_struct *list;
427 427
428 local_irq_disable(); 428 local_irq_disable();
429 list = __get_cpu_var(tasklet_vec).head; 429 list = __this_cpu_read(tasklet_vec.head);
430 __get_cpu_var(tasklet_vec).head = NULL; 430 __this_cpu_write(tasklet_vec.head, NULL);
431 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head; 431 __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
432 local_irq_enable(); 432 local_irq_enable();
433 433
434 while (list) { 434 while (list) {
@@ -449,8 +449,8 @@ static void tasklet_action(struct softirq_action *a)
449 449
450 local_irq_disable(); 450 local_irq_disable();
451 t->next = NULL; 451 t->next = NULL;
452 *__get_cpu_var(tasklet_vec).tail = t; 452 *__this_cpu_read(tasklet_vec.tail) = t;
453 __get_cpu_var(tasklet_vec).tail = &(t->next); 453 __this_cpu_write(tasklet_vec.tail, &(t->next));
454 __raise_softirq_irqoff(TASKLET_SOFTIRQ); 454 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
455 local_irq_enable(); 455 local_irq_enable();
456 } 456 }
@@ -461,9 +461,9 @@ static void tasklet_hi_action(struct softirq_action *a)
461 struct tasklet_struct *list; 461 struct tasklet_struct *list;
462 462
463 local_irq_disable(); 463 local_irq_disable();
464 list = __get_cpu_var(tasklet_hi_vec).head; 464 list = __this_cpu_read(tasklet_hi_vec.head);
465 __get_cpu_var(tasklet_hi_vec).head = NULL; 465 __this_cpu_write(tasklet_hi_vec.head, NULL);
466 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head; 466 __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
467 local_irq_enable(); 467 local_irq_enable();
468 468
469 while (list) { 469 while (list) {
@@ -484,8 +484,8 @@ static void tasklet_hi_action(struct softirq_action *a)
484 484
485 local_irq_disable(); 485 local_irq_disable();
486 t->next = NULL; 486 t->next = NULL;
487 *__get_cpu_var(tasklet_hi_vec).tail = t; 487 *__this_cpu_read(tasklet_hi_vec.tail) = t;
488 __get_cpu_var(tasklet_hi_vec).tail = &(t->next); 488 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
489 __raise_softirq_irqoff(HI_SOFTIRQ); 489 __raise_softirq_irqoff(HI_SOFTIRQ);
490 local_irq_enable(); 490 local_irq_enable();
491 } 491 }
@@ -802,16 +802,16 @@ static void takeover_tasklets(unsigned int cpu)
802 802
803 /* Find end, append list for that CPU. */ 803 /* Find end, append list for that CPU. */
804 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { 804 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
805 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head; 805 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
806 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; 806 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
807 per_cpu(tasklet_vec, cpu).head = NULL; 807 per_cpu(tasklet_vec, cpu).head = NULL;
808 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; 808 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
809 } 809 }
810 raise_softirq_irqoff(TASKLET_SOFTIRQ); 810 raise_softirq_irqoff(TASKLET_SOFTIRQ);
811 811
812 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { 812 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
813 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; 813 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
814 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; 814 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
815 per_cpu(tasklet_hi_vec, cpu).head = NULL; 815 per_cpu(tasklet_hi_vec, cpu).head = NULL;
816 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; 816 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
817 } 817 }
@@ -853,7 +853,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
853 cpumask_any(cpu_online_mask)); 853 cpumask_any(cpu_online_mask));
854 case CPU_DEAD: 854 case CPU_DEAD:
855 case CPU_DEAD_FROZEN: { 855 case CPU_DEAD_FROZEN: {
856 static struct sched_param param = { 856 static const struct sched_param param = {
857 .sched_priority = MAX_RT_PRIO-1 857 .sched_priority = MAX_RT_PRIO-1
858 }; 858 };
859 859
@@ -885,25 +885,6 @@ static __init int spawn_ksoftirqd(void)
885} 885}
886early_initcall(spawn_ksoftirqd); 886early_initcall(spawn_ksoftirqd);
887 887
888#ifdef CONFIG_SMP
889/*
890 * Call a function on all processors
891 */
892int on_each_cpu(void (*func) (void *info), void *info, int wait)
893{
894 int ret = 0;
895
896 preempt_disable();
897 ret = smp_call_function(func, info, wait);
898 local_irq_disable();
899 func(info);
900 local_irq_enable();
901 preempt_enable();
902 return ret;
903}
904EXPORT_SYMBOL(on_each_cpu);
905#endif
906
907/* 888/*
908 * [ These __weak aliases are kept in a separate compilation unit, so that 889 * [ These __weak aliases are kept in a separate compilation unit, so that
909 * GCC does not inline them incorrectly. ] 890 * GCC does not inline them incorrectly. ]
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 98d8c1e80edb..73ce23feaea9 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -156,6 +156,16 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
156EXPORT_SYMBOL_GPL(__srcu_read_unlock); 156EXPORT_SYMBOL_GPL(__srcu_read_unlock);
157 157
158/* 158/*
159 * We use an adaptive strategy for synchronize_srcu() and especially for
160 * synchronize_srcu_expedited(). We spin for a fixed time period
161 * (defined below) to allow SRCU readers to exit their read-side critical
162 * sections. If there are still some readers after 10 microseconds,
163 * we repeatedly block for 1-millisecond time periods. This approach
164 * has done well in testing, so there is no need for a config parameter.
165 */
166#define SYNCHRONIZE_SRCU_READER_DELAY 10
167
168/*
159 * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 169 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
160 */ 170 */
161static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) 171static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
@@ -207,11 +217,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
207 * will have finished executing. We initially give readers 217 * will have finished executing. We initially give readers
208 * an arbitrarily chosen 10 microseconds to get out of their 218 * an arbitrarily chosen 10 microseconds to get out of their
209 * SRCU read-side critical sections, then loop waiting 1/HZ 219 * SRCU read-side critical sections, then loop waiting 1/HZ
210 * seconds per iteration. 220 * seconds per iteration. The 10-microsecond value has done
221 * very well in testing.
211 */ 222 */
212 223
213 if (srcu_readers_active_idx(sp, idx)) 224 if (srcu_readers_active_idx(sp, idx))
214 udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); 225 udelay(SYNCHRONIZE_SRCU_READER_DELAY);
215 while (srcu_readers_active_idx(sp, idx)) 226 while (srcu_readers_active_idx(sp, idx))
216 schedule_timeout_interruptible(1); 227 schedule_timeout_interruptible(1);
217 228
diff --git a/kernel/sys.c b/kernel/sys.c
index 2745dcdb6c6c..31b71a276b40 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -43,6 +43,8 @@
43#include <linux/kprobes.h> 43#include <linux/kprobes.h>
44#include <linux/user_namespace.h> 44#include <linux/user_namespace.h>
45 45
46#include <linux/kmsg_dump.h>
47
46#include <asm/uaccess.h> 48#include <asm/uaccess.h>
47#include <asm/io.h> 49#include <asm/io.h>
48#include <asm/unistd.h> 50#include <asm/unistd.h>
@@ -285,6 +287,7 @@ out_unlock:
285 */ 287 */
286void emergency_restart(void) 288void emergency_restart(void)
287{ 289{
290 kmsg_dump(KMSG_DUMP_EMERG);
288 machine_emergency_restart(); 291 machine_emergency_restart();
289} 292}
290EXPORT_SYMBOL_GPL(emergency_restart); 293EXPORT_SYMBOL_GPL(emergency_restart);
@@ -312,6 +315,7 @@ void kernel_restart(char *cmd)
312 printk(KERN_EMERG "Restarting system.\n"); 315 printk(KERN_EMERG "Restarting system.\n");
313 else 316 else
314 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 317 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
318 kmsg_dump(KMSG_DUMP_RESTART);
315 machine_restart(cmd); 319 machine_restart(cmd);
316} 320}
317EXPORT_SYMBOL_GPL(kernel_restart); 321EXPORT_SYMBOL_GPL(kernel_restart);
@@ -333,6 +337,7 @@ void kernel_halt(void)
333 kernel_shutdown_prepare(SYSTEM_HALT); 337 kernel_shutdown_prepare(SYSTEM_HALT);
334 sysdev_shutdown(); 338 sysdev_shutdown();
335 printk(KERN_EMERG "System halted.\n"); 339 printk(KERN_EMERG "System halted.\n");
340 kmsg_dump(KMSG_DUMP_HALT);
336 machine_halt(); 341 machine_halt();
337} 342}
338 343
@@ -351,6 +356,7 @@ void kernel_power_off(void)
351 disable_nonboot_cpus(); 356 disable_nonboot_cpus();
352 sysdev_shutdown(); 357 sysdev_shutdown();
353 printk(KERN_EMERG "Power down.\n"); 358 printk(KERN_EMERG "Power down.\n");
359 kmsg_dump(KMSG_DUMP_POWEROFF);
354 machine_power_off(); 360 machine_power_off();
355} 361}
356EXPORT_SYMBOL_GPL(kernel_power_off); 362EXPORT_SYMBOL_GPL(kernel_power_off);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ae5cbb1e3ced..bc86bb32e126 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/sysctl.h> 25#include <linux/sysctl.h>
26#include <linux/signal.h> 26#include <linux/signal.h>
27#include <linux/printk.h>
27#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
28#include <linux/security.h> 29#include <linux/security.h>
29#include <linux/ctype.h> 30#include <linux/ctype.h>
@@ -245,10 +246,6 @@ static struct ctl_table root_table[] = {
245 .mode = 0555, 246 .mode = 0555,
246 .child = dev_table, 247 .child = dev_table,
247 }, 248 },
248/*
249 * NOTE: do not add new entries to this table unless you have read
250 * Documentation/sysctl/ctl_unnumbered.txt
251 */
252 { } 249 { }
253}; 250};
254 251
@@ -710,6 +707,15 @@ static struct ctl_table kern_table[] = {
710 .extra1 = &zero, 707 .extra1 = &zero,
711 .extra2 = &one, 708 .extra2 = &one,
712 }, 709 },
710 {
711 .procname = "kptr_restrict",
712 .data = &kptr_restrict,
713 .maxlen = sizeof(int),
714 .mode = 0644,
715 .proc_handler = proc_dointvec_minmax,
716 .extra1 = &zero,
717 .extra2 = &two,
718 },
713#endif 719#endif
714 { 720 {
715 .procname = "ngroups_max", 721 .procname = "ngroups_max",
@@ -962,10 +968,6 @@ static struct ctl_table kern_table[] = {
962 .proc_handler = proc_dointvec, 968 .proc_handler = proc_dointvec,
963 }, 969 },
964#endif 970#endif
965/*
966 * NOTE: do not add new entries to this table unless you have read
967 * Documentation/sysctl/ctl_unnumbered.txt
968 */
969 { } 971 { }
970}; 972};
971 973
@@ -1326,11 +1328,6 @@ static struct ctl_table vm_table[] = {
1326 .extra2 = &one, 1328 .extra2 = &one,
1327 }, 1329 },
1328#endif 1330#endif
1329
1330/*
1331 * NOTE: do not add new entries to this table unless you have read
1332 * Documentation/sysctl/ctl_unnumbered.txt
1333 */
1334 { } 1331 { }
1335}; 1332};
1336 1333
@@ -1486,10 +1483,6 @@ static struct ctl_table fs_table[] = {
1486 .proc_handler = &pipe_proc_fn, 1483 .proc_handler = &pipe_proc_fn,
1487 .extra1 = &pipe_min_size, 1484 .extra1 = &pipe_min_size,
1488 }, 1485 },
1489/*
1490 * NOTE: do not add new entries to this table unless you have read
1491 * Documentation/sysctl/ctl_unnumbered.txt
1492 */
1493 { } 1486 { }
1494}; 1487};
1495 1488
@@ -2899,7 +2892,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2899 } 2892 }
2900} 2893}
2901 2894
2902#else /* CONFIG_PROC_FS */ 2895#else /* CONFIG_PROC_SYSCTL */
2903 2896
2904int proc_dostring(struct ctl_table *table, int write, 2897int proc_dostring(struct ctl_table *table, int write,
2905 void __user *buffer, size_t *lenp, loff_t *ppos) 2898 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2951,7 +2944,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2951} 2944}
2952 2945
2953 2946
2954#endif /* CONFIG_PROC_FS */ 2947#endif /* CONFIG_PROC_SYSCTL */
2955 2948
2956/* 2949/*
2957 * No sense putting this after each symbol definition, twice, 2950 * No sense putting this after each symbol definition, twice,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 4b2545a136ff..b875bedf7c9a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1192,7 +1192,7 @@ static ssize_t bin_dn_node_address(struct file *file,
1192 1192
1193 buf[result] = '\0'; 1193 buf[result] = '\0';
1194 1194
1195 /* Convert the decnet addresss to binary */ 1195 /* Convert the decnet address to binary */
1196 result = -EIO; 1196 result = -EIO;
1197 nodep = strchr(buf, '.') + 1; 1197 nodep = strchr(buf, '.') + 1;
1198 if (!nodep) 1198 if (!nodep)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3308fd7f1b52..3971c6b9d58d 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -89,8 +89,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
89 return -ENOMEM; 89 return -ENOMEM;
90 90
91 if (!info) { 91 if (!info) {
92 int seq = get_cpu_var(taskstats_seqnum)++; 92 int seq = this_cpu_inc_return(taskstats_seqnum) - 1;
93 put_cpu_var(taskstats_seqnum);
94 93
95 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 94 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
96 } else 95 } else
@@ -349,7 +348,7 @@ static int parse(struct nlattr *na, struct cpumask *mask)
349 return ret; 348 return ret;
350} 349}
351 350
352#ifdef CONFIG_IA64 351#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
353#define TASKSTATS_NEEDS_PADDING 1 352#define TASKSTATS_NEEDS_PADDING 1
354#endif 353#endif
355 354
@@ -612,7 +611,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
612 fill_tgid_exit(tsk); 611 fill_tgid_exit(tsk);
613 } 612 }
614 613
615 listeners = &__raw_get_cpu_var(listener_array); 614 listeners = __this_cpu_ptr(&listener_array);
616 if (list_empty(&listeners->list)) 615 if (list_empty(&listeners->list))
617 return; 616 return;
618 617
diff --git a/kernel/time.c b/kernel/time.c
index ba9b338d1835..32174359576f 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(current_fs_time);
238 * Avoid unnecessary multiplications/divisions in the 238 * Avoid unnecessary multiplications/divisions in the
239 * two most common HZ cases: 239 * two most common HZ cases:
240 */ 240 */
241unsigned int inline jiffies_to_msecs(const unsigned long j) 241inline unsigned int jiffies_to_msecs(const unsigned long j)
242{ 242{
243#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) 243#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
244 return (MSEC_PER_SEC / HZ) * j; 244 return (MSEC_PER_SEC / HZ) * j;
@@ -254,7 +254,7 @@ unsigned int inline jiffies_to_msecs(const unsigned long j)
254} 254}
255EXPORT_SYMBOL(jiffies_to_msecs); 255EXPORT_SYMBOL(jiffies_to_msecs);
256 256
257unsigned int inline jiffies_to_usecs(const unsigned long j) 257inline unsigned int jiffies_to_usecs(const unsigned long j)
258{ 258{
259#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) 259#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
260 return (USEC_PER_SEC / HZ) * j; 260 return (USEC_PER_SEC / HZ) * j;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 8588abcac07b..6519cf62d9cd 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -152,6 +152,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
152 */ 152 */
153 for (sft = 32; sft > 0; sft--) { 153 for (sft = 32; sft > 0; sft--) {
154 tmp = (u64) to << sft; 154 tmp = (u64) to << sft;
155 tmp += from / 2;
155 do_div(tmp, from); 156 do_div(tmp, from);
156 if ((tmp >> sftacc) == 0) 157 if ((tmp >> sftacc) == 0)
157 break; 158 break;
@@ -678,7 +679,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
678int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) 679int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
679{ 680{
680 681
681 /* Intialize mult/shift and max_idle_ns */ 682 /* Initialize mult/shift and max_idle_ns */
682 __clocksource_updatefreq_scale(cs, scale, freq); 683 __clocksource_updatefreq_scale(cs, scale, freq);
683 684
684 /* Add clocksource to the clcoksource list */ 685 /* Add clocksource to the clcoksource list */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index d2321891538f..5c00242fa921 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -14,6 +14,7 @@
14#include <linux/timex.h> 14#include <linux/timex.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/module.h>
17 18
18/* 19/*
19 * NTP timekeeping variables: 20 * NTP timekeeping variables:
@@ -74,6 +75,162 @@ static long time_adjust;
74/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ 75/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
75static s64 ntp_tick_adj; 76static s64 ntp_tick_adj;
76 77
78#ifdef CONFIG_NTP_PPS
79
80/*
81 * The following variables are used when a pulse-per-second (PPS) signal
82 * is available. They establish the engineering parameters of the clock
83 * discipline loop when controlled by the PPS signal.
84 */
85#define PPS_VALID 10 /* PPS signal watchdog max (s) */
86#define PPS_POPCORN 4 /* popcorn spike threshold (shift) */
87#define PPS_INTMIN 2 /* min freq interval (s) (shift) */
88#define PPS_INTMAX 8 /* max freq interval (s) (shift) */
89#define PPS_INTCOUNT 4 /* number of consecutive good intervals to
90 increase pps_shift or consecutive bad
91 intervals to decrease it */
92#define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */
93
94static int pps_valid; /* signal watchdog counter */
95static long pps_tf[3]; /* phase median filter */
96static long pps_jitter; /* current jitter (ns) */
97static struct timespec pps_fbase; /* beginning of the last freq interval */
98static int pps_shift; /* current interval duration (s) (shift) */
99static int pps_intcnt; /* interval counter */
100static s64 pps_freq; /* frequency offset (scaled ns/s) */
101static long pps_stabil; /* current stability (scaled ns/s) */
102
103/*
104 * PPS signal quality monitors
105 */
106static long pps_calcnt; /* calibration intervals */
107static long pps_jitcnt; /* jitter limit exceeded */
108static long pps_stbcnt; /* stability limit exceeded */
109static long pps_errcnt; /* calibration errors */
110
111
112/* PPS kernel consumer compensates the whole phase error immediately.
113 * Otherwise, reduce the offset by a fixed factor times the time constant.
114 */
115static inline s64 ntp_offset_chunk(s64 offset)
116{
117 if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
118 return offset;
119 else
120 return shift_right(offset, SHIFT_PLL + time_constant);
121}
122
123static inline void pps_reset_freq_interval(void)
124{
125 /* the PPS calibration interval may end
126 surprisingly early */
127 pps_shift = PPS_INTMIN;
128 pps_intcnt = 0;
129}
130
131/**
132 * pps_clear - Clears the PPS state variables
133 *
134 * Must be called while holding a write on the xtime_lock
135 */
136static inline void pps_clear(void)
137{
138 pps_reset_freq_interval();
139 pps_tf[0] = 0;
140 pps_tf[1] = 0;
141 pps_tf[2] = 0;
142 pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
143 pps_freq = 0;
144}
145
146/* Decrease pps_valid to indicate that another second has passed since
147 * the last PPS signal. When it reaches 0, indicate that PPS signal is
148 * missing.
149 *
150 * Must be called while holding a write on the xtime_lock
151 */
152static inline void pps_dec_valid(void)
153{
154 if (pps_valid > 0)
155 pps_valid--;
156 else {
157 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
158 STA_PPSWANDER | STA_PPSERROR);
159 pps_clear();
160 }
161}
162
163static inline void pps_set_freq(s64 freq)
164{
165 pps_freq = freq;
166}
167
168static inline int is_error_status(int status)
169{
170 return (time_status & (STA_UNSYNC|STA_CLOCKERR))
171 /* PPS signal lost when either PPS time or
172 * PPS frequency synchronization requested
173 */
174 || ((time_status & (STA_PPSFREQ|STA_PPSTIME))
175 && !(time_status & STA_PPSSIGNAL))
176 /* PPS jitter exceeded when
177 * PPS time synchronization requested */
178 || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
179 == (STA_PPSTIME|STA_PPSJITTER))
180 /* PPS wander exceeded or calibration error when
181 * PPS frequency synchronization requested
182 */
183 || ((time_status & STA_PPSFREQ)
184 && (time_status & (STA_PPSWANDER|STA_PPSERROR)));
185}
186
187static inline void pps_fill_timex(struct timex *txc)
188{
189 txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
190 PPM_SCALE_INV, NTP_SCALE_SHIFT);
191 txc->jitter = pps_jitter;
192 if (!(time_status & STA_NANO))
193 txc->jitter /= NSEC_PER_USEC;
194 txc->shift = pps_shift;
195 txc->stabil = pps_stabil;
196 txc->jitcnt = pps_jitcnt;
197 txc->calcnt = pps_calcnt;
198 txc->errcnt = pps_errcnt;
199 txc->stbcnt = pps_stbcnt;
200}
201
202#else /* !CONFIG_NTP_PPS */
203
204static inline s64 ntp_offset_chunk(s64 offset)
205{
206 return shift_right(offset, SHIFT_PLL + time_constant);
207}
208
209static inline void pps_reset_freq_interval(void) {}
210static inline void pps_clear(void) {}
211static inline void pps_dec_valid(void) {}
212static inline void pps_set_freq(s64 freq) {}
213
214static inline int is_error_status(int status)
215{
216 return status & (STA_UNSYNC|STA_CLOCKERR);
217}
218
219static inline void pps_fill_timex(struct timex *txc)
220{
221 /* PPS is not implemented, so these are zero */
222 txc->ppsfreq = 0;
223 txc->jitter = 0;
224 txc->shift = 0;
225 txc->stabil = 0;
226 txc->jitcnt = 0;
227 txc->calcnt = 0;
228 txc->errcnt = 0;
229 txc->stbcnt = 0;
230}
231
232#endif /* CONFIG_NTP_PPS */
233
77/* 234/*
78 * NTP methods: 235 * NTP methods:
79 */ 236 */
@@ -185,6 +342,9 @@ void ntp_clear(void)
185 342
186 tick_length = tick_length_base; 343 tick_length = tick_length_base;
187 time_offset = 0; 344 time_offset = 0;
345
346 /* Clear PPS state variables */
347 pps_clear();
188} 348}
189 349
190/* 350/*
@@ -250,16 +410,16 @@ void second_overflow(void)
250 time_status |= STA_UNSYNC; 410 time_status |= STA_UNSYNC;
251 } 411 }
252 412
253 /* 413 /* Compute the phase adjustment for the next second */
254 * Compute the phase adjustment for the next second. The offset is
255 * reduced by a fixed factor times the time constant.
256 */
257 tick_length = tick_length_base; 414 tick_length = tick_length_base;
258 415
259 delta = shift_right(time_offset, SHIFT_PLL + time_constant); 416 delta = ntp_offset_chunk(time_offset);
260 time_offset -= delta; 417 time_offset -= delta;
261 tick_length += delta; 418 tick_length += delta;
262 419
420 /* Check PPS signal */
421 pps_dec_valid();
422
263 if (!time_adjust) 423 if (!time_adjust)
264 return; 424 return;
265 425
@@ -369,6 +529,8 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
369 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { 529 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
370 time_state = TIME_OK; 530 time_state = TIME_OK;
371 time_status = STA_UNSYNC; 531 time_status = STA_UNSYNC;
532 /* restart PPS frequency calibration */
533 pps_reset_freq_interval();
372 } 534 }
373 535
374 /* 536 /*
@@ -418,6 +580,8 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
418 time_freq = txc->freq * PPM_SCALE; 580 time_freq = txc->freq * PPM_SCALE;
419 time_freq = min(time_freq, MAXFREQ_SCALED); 581 time_freq = min(time_freq, MAXFREQ_SCALED);
420 time_freq = max(time_freq, -MAXFREQ_SCALED); 582 time_freq = max(time_freq, -MAXFREQ_SCALED);
583 /* update pps_freq */
584 pps_set_freq(time_freq);
421 } 585 }
422 586
423 if (txc->modes & ADJ_MAXERROR) 587 if (txc->modes & ADJ_MAXERROR)
@@ -508,7 +672,8 @@ int do_adjtimex(struct timex *txc)
508 } 672 }
509 673
510 result = time_state; /* mostly `TIME_OK' */ 674 result = time_state; /* mostly `TIME_OK' */
511 if (time_status & (STA_UNSYNC|STA_CLOCKERR)) 675 /* check for errors */
676 if (is_error_status(time_status))
512 result = TIME_ERROR; 677 result = TIME_ERROR;
513 678
514 txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * 679 txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
@@ -522,15 +687,8 @@ int do_adjtimex(struct timex *txc)
522 txc->tick = tick_usec; 687 txc->tick = tick_usec;
523 txc->tai = time_tai; 688 txc->tai = time_tai;
524 689
525 /* PPS is not implemented, so these are zero */ 690 /* fill PPS status fields */
526 txc->ppsfreq = 0; 691 pps_fill_timex(txc);
527 txc->jitter = 0;
528 txc->shift = 0;
529 txc->stabil = 0;
530 txc->jitcnt = 0;
531 txc->calcnt = 0;
532 txc->errcnt = 0;
533 txc->stbcnt = 0;
534 692
535 write_sequnlock_irq(&xtime_lock); 693 write_sequnlock_irq(&xtime_lock);
536 694
@@ -544,6 +702,243 @@ int do_adjtimex(struct timex *txc)
544 return result; 702 return result;
545} 703}
546 704
705#ifdef CONFIG_NTP_PPS
706
707/* actually struct pps_normtime is good old struct timespec, but it is
708 * semantically different (and it is the reason why it was invented):
709 * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
710 * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
711struct pps_normtime {
712 __kernel_time_t sec; /* seconds */
713 long nsec; /* nanoseconds */
714};
715
716/* normalize the timestamp so that nsec is in the
717 ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
718static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
719{
720 struct pps_normtime norm = {
721 .sec = ts.tv_sec,
722 .nsec = ts.tv_nsec
723 };
724
725 if (norm.nsec > (NSEC_PER_SEC >> 1)) {
726 norm.nsec -= NSEC_PER_SEC;
727 norm.sec++;
728 }
729
730 return norm;
731}
732
733/* get current phase correction and jitter */
734static inline long pps_phase_filter_get(long *jitter)
735{
736 *jitter = pps_tf[0] - pps_tf[1];
737 if (*jitter < 0)
738 *jitter = -*jitter;
739
740 /* TODO: test various filters */
741 return pps_tf[0];
742}
743
744/* add the sample to the phase filter */
745static inline void pps_phase_filter_add(long err)
746{
747 pps_tf[2] = pps_tf[1];
748 pps_tf[1] = pps_tf[0];
749 pps_tf[0] = err;
750}
751
752/* decrease frequency calibration interval length.
753 * It is halved after four consecutive unstable intervals.
754 */
755static inline void pps_dec_freq_interval(void)
756{
757 if (--pps_intcnt <= -PPS_INTCOUNT) {
758 pps_intcnt = -PPS_INTCOUNT;
759 if (pps_shift > PPS_INTMIN) {
760 pps_shift--;
761 pps_intcnt = 0;
762 }
763 }
764}
765
766/* increase frequency calibration interval length.
767 * It is doubled after four consecutive stable intervals.
768 */
769static inline void pps_inc_freq_interval(void)
770{
771 if (++pps_intcnt >= PPS_INTCOUNT) {
772 pps_intcnt = PPS_INTCOUNT;
773 if (pps_shift < PPS_INTMAX) {
774 pps_shift++;
775 pps_intcnt = 0;
776 }
777 }
778}
779
780/* update clock frequency based on MONOTONIC_RAW clock PPS signal
781 * timestamps
782 *
783 * At the end of the calibration interval the difference between the
784 * first and last MONOTONIC_RAW clock timestamps divided by the length
785 * of the interval becomes the frequency update. If the interval was
786 * too long, the data are discarded.
787 * Returns the difference between old and new frequency values.
788 */
789static long hardpps_update_freq(struct pps_normtime freq_norm)
790{
791 long delta, delta_mod;
792 s64 ftemp;
793
794 /* check if the frequency interval was too long */
795 if (freq_norm.sec > (2 << pps_shift)) {
796 time_status |= STA_PPSERROR;
797 pps_errcnt++;
798 pps_dec_freq_interval();
799 pr_err("hardpps: PPSERROR: interval too long - %ld s\n",
800 freq_norm.sec);
801 return 0;
802 }
803
804 /* here the raw frequency offset and wander (stability) is
805 * calculated. If the wander is less than the wander threshold
806 * the interval is increased; otherwise it is decreased.
807 */
808 ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
809 freq_norm.sec);
810 delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
811 pps_freq = ftemp;
812 if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
813 pr_warning("hardpps: PPSWANDER: change=%ld\n", delta);
814 time_status |= STA_PPSWANDER;
815 pps_stbcnt++;
816 pps_dec_freq_interval();
817 } else { /* good sample */
818 pps_inc_freq_interval();
819 }
820
821 /* the stability metric is calculated as the average of recent
822 * frequency changes, but is used only for performance
823 * monitoring
824 */
825 delta_mod = delta;
826 if (delta_mod < 0)
827 delta_mod = -delta_mod;
828 pps_stabil += (div_s64(((s64)delta_mod) <<
829 (NTP_SCALE_SHIFT - SHIFT_USEC),
830 NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;
831
832 /* if enabled, the system clock frequency is updated */
833 if ((time_status & STA_PPSFREQ) != 0 &&
834 (time_status & STA_FREQHOLD) == 0) {
835 time_freq = pps_freq;
836 ntp_update_frequency();
837 }
838
839 return delta;
840}
841
842/* correct REALTIME clock phase error against PPS signal */
843static void hardpps_update_phase(long error)
844{
845 long correction = -error;
846 long jitter;
847
848 /* add the sample to the median filter */
849 pps_phase_filter_add(correction);
850 correction = pps_phase_filter_get(&jitter);
851
852 /* Nominal jitter is due to PPS signal noise. If it exceeds the
853 * threshold, the sample is discarded; otherwise, if so enabled,
854 * the time offset is updated.
855 */
856 if (jitter > (pps_jitter << PPS_POPCORN)) {
857 pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
858 jitter, (pps_jitter << PPS_POPCORN));
859 time_status |= STA_PPSJITTER;
860 pps_jitcnt++;
861 } else if (time_status & STA_PPSTIME) {
862 /* correct the time using the phase offset */
863 time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
864 NTP_INTERVAL_FREQ);
865 /* cancel running adjtime() */
866 time_adjust = 0;
867 }
868 /* update jitter */
869 pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
870}
871
872/*
873 * hardpps() - discipline CPU clock oscillator to external PPS signal
874 *
875 * This routine is called at each PPS signal arrival in order to
876 * discipline the CPU clock oscillator to the PPS signal. It takes two
877 * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former
878 * is used to correct clock phase error and the latter is used to
879 * correct the frequency.
880 *
881 * This code is based on David Mills's reference nanokernel
882 * implementation. It was mostly rewritten but keeps the same idea.
883 */
884void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
885{
886 struct pps_normtime pts_norm, freq_norm;
887 unsigned long flags;
888
889 pts_norm = pps_normalize_ts(*phase_ts);
890
891 write_seqlock_irqsave(&xtime_lock, flags);
892
893 /* clear the error bits, they will be set again if needed */
894 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
895
896 /* indicate signal presence */
897 time_status |= STA_PPSSIGNAL;
898 pps_valid = PPS_VALID;
899
900 /* when called for the first time,
901 * just start the frequency interval */
902 if (unlikely(pps_fbase.tv_sec == 0)) {
903 pps_fbase = *raw_ts;
904 write_sequnlock_irqrestore(&xtime_lock, flags);
905 return;
906 }
907
908 /* ok, now we have a base for frequency calculation */
909 freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
910
911 /* check that the signal is in the range
912 * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
913 if ((freq_norm.sec == 0) ||
914 (freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
915 (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
916 time_status |= STA_PPSJITTER;
917 /* restart the frequency calibration interval */
918 pps_fbase = *raw_ts;
919 write_sequnlock_irqrestore(&xtime_lock, flags);
920 pr_err("hardpps: PPSJITTER: bad pulse\n");
921 return;
922 }
923
924 /* signal is ok */
925
926 /* check if the current frequency interval is finished */
927 if (freq_norm.sec >= (1 << pps_shift)) {
928 pps_calcnt++;
929 /* restart the frequency calibration interval */
930 pps_fbase = *raw_ts;
931 hardpps_update_freq(freq_norm);
932 }
933
934 hardpps_update_phase(pts_norm.nsec);
935
936 write_sequnlock_irqrestore(&xtime_lock, flags);
937}
938EXPORT_SYMBOL(hardpps);
939
940#endif /* CONFIG_NTP_PPS */
941
547static int __init ntp_tick_adj_setup(char *str) 942static int __init ntp_tick_adj_setup(char *str)
548{ 943{
549 ntp_tick_adj = simple_strtol(str, NULL, 0); 944 ntp_tick_adj = simple_strtol(str, NULL, 0);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b6b898d2eeef..051bc80a0c43 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -49,7 +49,7 @@ struct tick_device *tick_get_device(int cpu)
49 */ 49 */
50int tick_is_oneshot_available(void) 50int tick_is_oneshot_available(void)
51{ 51{
52 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 52 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
53 53
54 return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); 54 return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
55} 55}
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index aada0e52680a..5cbc101f908b 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -95,7 +95,7 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
95 */ 95 */
96int tick_program_event(ktime_t expires, int force) 96int tick_program_event(ktime_t expires, int force)
97{ 97{
98 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 98 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
99 99
100 return tick_dev_program_event(dev, expires, force); 100 return tick_dev_program_event(dev, expires, force);
101} 101}
@@ -167,7 +167,7 @@ int tick_oneshot_mode_active(void)
167 int ret; 167 int ret;
168 168
169 local_irq_save(flags); 169 local_irq_save(flags);
170 ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT; 170 ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT;
171 local_irq_restore(flags); 171 local_irq_restore(flags);
172 172
173 return ret; 173 return ret;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index eef7452bd8a9..d27c7562902c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -288,6 +288,49 @@ void ktime_get_ts(struct timespec *ts)
288} 288}
289EXPORT_SYMBOL_GPL(ktime_get_ts); 289EXPORT_SYMBOL_GPL(ktime_get_ts);
290 290
291#ifdef CONFIG_NTP_PPS
292
293/**
294 * getnstime_raw_and_real - get day and raw monotonic time in timespec format
295 * @ts_raw: pointer to the timespec to be set to raw monotonic time
296 * @ts_real: pointer to the timespec to be set to the time of day
297 *
298 * This function reads both the time of day and raw monotonic time at the
299 * same time atomically and stores the resulting timestamps in timespec
300 * format.
301 */
302void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
303{
304 unsigned long seq;
305 s64 nsecs_raw, nsecs_real;
306
307 WARN_ON_ONCE(timekeeping_suspended);
308
309 do {
310 u32 arch_offset;
311
312 seq = read_seqbegin(&xtime_lock);
313
314 *ts_raw = raw_time;
315 *ts_real = xtime;
316
317 nsecs_raw = timekeeping_get_ns_raw();
318 nsecs_real = timekeeping_get_ns();
319
320 /* If arch requires, add in gettimeoffset() */
321 arch_offset = arch_gettimeoffset();
322 nsecs_raw += arch_offset;
323 nsecs_real += arch_offset;
324
325 } while (read_seqretry(&xtime_lock, seq));
326
327 timespec_add_ns(ts_raw, nsecs_raw);
328 timespec_add_ns(ts_real, nsecs_real);
329}
330EXPORT_SYMBOL(getnstime_raw_and_real);
331
332#endif /* CONFIG_NTP_PPS */
333
291/** 334/**
292 * do_gettimeofday - Returns the time of day in a timeval 335 * do_gettimeofday - Returns the time of day in a timeval
293 * @tv: pointer to the timeval to be set 336 * @tv: pointer to the timeval to be set
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 53f338190b26..761c510a06c5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
52endif 52endif
53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
55obj-$(CONFIG_EVENT_TRACING) += power-traces.o 55obj-$(CONFIG_TRACEPOINTS) += power-traces.o
56ifeq ($(CONFIG_TRACING),y) 56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif 58endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7b8ec0281548..153562d0b93c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -758,53 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore,
758 * @q: queue the io is for 758 * @q: queue the io is for
759 * @bio: the source bio 759 * @bio: the source bio
760 * @what: the action 760 * @what: the action
761 * @error: error, if any
761 * 762 *
762 * Description: 763 * Description:
763 * Records an action against a bio. Will log the bio offset + size. 764 * Records an action against a bio. Will log the bio offset + size.
764 * 765 *
765 **/ 766 **/
766static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 767static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
767 u32 what) 768 u32 what, int error)
768{ 769{
769 struct blk_trace *bt = q->blk_trace; 770 struct blk_trace *bt = q->blk_trace;
770 771
771 if (likely(!bt)) 772 if (likely(!bt))
772 return; 773 return;
773 774
775 if (!error && !bio_flagged(bio, BIO_UPTODATE))
776 error = EIO;
777
774 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 778 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
775 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 779 error, 0, NULL);
776} 780}
777 781
778static void blk_add_trace_bio_bounce(void *ignore, 782static void blk_add_trace_bio_bounce(void *ignore,
779 struct request_queue *q, struct bio *bio) 783 struct request_queue *q, struct bio *bio)
780{ 784{
781 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 785 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
782} 786}
783 787
784static void blk_add_trace_bio_complete(void *ignore, 788static void blk_add_trace_bio_complete(void *ignore,
785 struct request_queue *q, struct bio *bio) 789 struct request_queue *q, struct bio *bio,
790 int error)
786{ 791{
787 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 792 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
788} 793}
789 794
790static void blk_add_trace_bio_backmerge(void *ignore, 795static void blk_add_trace_bio_backmerge(void *ignore,
791 struct request_queue *q, 796 struct request_queue *q,
792 struct bio *bio) 797 struct bio *bio)
793{ 798{
794 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 799 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
795} 800}
796 801
797static void blk_add_trace_bio_frontmerge(void *ignore, 802static void blk_add_trace_bio_frontmerge(void *ignore,
798 struct request_queue *q, 803 struct request_queue *q,
799 struct bio *bio) 804 struct bio *bio)
800{ 805{
801 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 806 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
802} 807}
803 808
804static void blk_add_trace_bio_queue(void *ignore, 809static void blk_add_trace_bio_queue(void *ignore,
805 struct request_queue *q, struct bio *bio) 810 struct request_queue *q, struct bio *bio)
806{ 811{
807 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 812 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
808} 813}
809 814
810static void blk_add_trace_getrq(void *ignore, 815static void blk_add_trace_getrq(void *ignore,
@@ -812,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore,
812 struct bio *bio, int rw) 817 struct bio *bio, int rw)
813{ 818{
814 if (bio) 819 if (bio)
815 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 820 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
816 else { 821 else {
817 struct blk_trace *bt = q->blk_trace; 822 struct blk_trace *bt = q->blk_trace;
818 823
@@ -827,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore,
827 struct bio *bio, int rw) 832 struct bio *bio, int rw)
828{ 833{
829 if (bio) 834 if (bio)
830 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 835 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
831 else { 836 else {
832 struct blk_trace *bt = q->blk_trace; 837 struct blk_trace *bt = q->blk_trace;
833 838
@@ -887,7 +892,7 @@ static void blk_add_trace_split(void *ignore,
887} 892}
888 893
889/** 894/**
890 * blk_add_trace_remap - Add a trace for a remap operation 895 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
891 * @ignore: trace callback data parameter (not used) 896 * @ignore: trace callback data parameter (not used)
892 * @q: queue the io is for 897 * @q: queue the io is for
893 * @bio: the source bio 898 * @bio: the source bio
@@ -899,9 +904,9 @@ static void blk_add_trace_split(void *ignore,
899 * it spans a stripe (or similar). Add a trace for that action. 904 * it spans a stripe (or similar). Add a trace for that action.
900 * 905 *
901 **/ 906 **/
902static void blk_add_trace_remap(void *ignore, 907static void blk_add_trace_bio_remap(void *ignore,
903 struct request_queue *q, struct bio *bio, 908 struct request_queue *q, struct bio *bio,
904 dev_t dev, sector_t from) 909 dev_t dev, sector_t from)
905{ 910{
906 struct blk_trace *bt = q->blk_trace; 911 struct blk_trace *bt = q->blk_trace;
907 struct blk_io_trace_remap r; 912 struct blk_io_trace_remap r;
@@ -1016,7 +1021,7 @@ static void blk_register_tracepoints(void)
1016 WARN_ON(ret); 1021 WARN_ON(ret);
1017 ret = register_trace_block_split(blk_add_trace_split, NULL); 1022 ret = register_trace_block_split(blk_add_trace_split, NULL);
1018 WARN_ON(ret); 1023 WARN_ON(ret);
1019 ret = register_trace_block_remap(blk_add_trace_remap, NULL); 1024 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1020 WARN_ON(ret); 1025 WARN_ON(ret);
1021 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1026 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1022 WARN_ON(ret); 1027 WARN_ON(ret);
@@ -1025,7 +1030,7 @@ static void blk_register_tracepoints(void)
1025static void blk_unregister_tracepoints(void) 1030static void blk_unregister_tracepoints(void)
1026{ 1031{
1027 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1032 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1028 unregister_trace_block_remap(blk_add_trace_remap, NULL); 1033 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1029 unregister_trace_block_split(blk_add_trace_split, NULL); 1034 unregister_trace_block_split(blk_add_trace_split, NULL);
1030 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); 1035 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
1031 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); 1036 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f8cf959bad45..dc53ecb80589 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1313 1313
1314 __this_cpu_inc(user_stack_count); 1314 __this_cpu_inc(user_stack_count);
1315 1315
1316
1317
1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1316 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1319 sizeof(*entry), flags, pc); 1317 sizeof(*entry), flags, pc);
1320 if (!event) 1318 if (!event)
1321 return; 1319 goto out_drop_count;
1322 entry = ring_buffer_event_data(event); 1320 entry = ring_buffer_event_data(event);
1323 1321
1324 entry->tgid = current->tgid; 1322 entry->tgid = current->tgid;
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1333 if (!filter_check_discard(call, entry, buffer, event)) 1331 if (!filter_check_discard(call, entry, buffer, event))
1334 ring_buffer_unlock_commit(buffer, event); 1332 ring_buffer_unlock_commit(buffer, event);
1335 1333
1334 out_drop_count:
1336 __this_cpu_dec(user_stack_count); 1335 __this_cpu_dec(user_stack_count);
1337
1338 out: 1336 out:
1339 preempt_enable(); 1337 preempt_enable();
1340} 1338}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e3dfecaf13e6..6cf223764be8 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -53,7 +53,7 @@
53 */ 53 */
54 54
55/* 55/*
56 * Function trace entry - function address and parent function addres: 56 * Function trace entry - function address and parent function address:
57 */ 57 */
58FTRACE_ENTRY(function, ftrace_entry, 58FTRACE_ENTRY(function, ftrace_entry,
59 59
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 562c56e048fd..659732eba07c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
558static int trace_wakeup_test_thread(void *data) 558static int trace_wakeup_test_thread(void *data)
559{ 559{
560 /* Make this a RT thread, doesn't need to be too high */ 560 /* Make this a RT thread, doesn't need to be too high */
561 static struct sched_param param = { .sched_priority = 5 }; 561 static const struct sched_param param = { .sched_priority = 5 };
562 struct completion *x = data; 562 struct completion *x = data;
563 563
564 sched_setscheduler(current, SCHED_FIFO, &param); 564 sched_setscheduler(current, SCHED_FIFO, &param);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index bac752f0cfb5..b706529b4fc7 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event,
23static int syscall_enter_define_fields(struct ftrace_event_call *call); 23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call); 24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25 25
26/* All syscall exit events have the same fields */
27static LIST_HEAD(syscall_exit_fields);
28
29static struct list_head * 26static struct list_head *
30syscall_get_enter_fields(struct ftrace_event_call *call) 27syscall_get_enter_fields(struct ftrace_event_call *call)
31{ 28{
@@ -34,34 +31,28 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
34 return &entry->enter_fields; 31 return &entry->enter_fields;
35} 32}
36 33
37static struct list_head *
38syscall_get_exit_fields(struct ftrace_event_call *call)
39{
40 return &syscall_exit_fields;
41}
42
43struct trace_event_functions enter_syscall_print_funcs = { 34struct trace_event_functions enter_syscall_print_funcs = {
44 .trace = print_syscall_enter, 35 .trace = print_syscall_enter,
45}; 36};
46 37
47struct trace_event_functions exit_syscall_print_funcs = { 38struct trace_event_functions exit_syscall_print_funcs = {
48 .trace = print_syscall_exit, 39 .trace = print_syscall_exit,
49}; 40};
50 41
51struct ftrace_event_class event_class_syscall_enter = { 42struct ftrace_event_class event_class_syscall_enter = {
52 .system = "syscalls", 43 .system = "syscalls",
53 .reg = syscall_enter_register, 44 .reg = syscall_enter_register,
54 .define_fields = syscall_enter_define_fields, 45 .define_fields = syscall_enter_define_fields,
55 .get_fields = syscall_get_enter_fields, 46 .get_fields = syscall_get_enter_fields,
56 .raw_init = init_syscall_trace, 47 .raw_init = init_syscall_trace,
57}; 48};
58 49
59struct ftrace_event_class event_class_syscall_exit = { 50struct ftrace_event_class event_class_syscall_exit = {
60 .system = "syscalls", 51 .system = "syscalls",
61 .reg = syscall_exit_register, 52 .reg = syscall_exit_register,
62 .define_fields = syscall_exit_define_fields, 53 .define_fields = syscall_exit_define_fields,
63 .get_fields = syscall_get_exit_fields, 54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
64 .raw_init = init_syscall_trace, 55 .raw_init = init_syscall_trace,
65}; 56};
66 57
67extern unsigned long __start_syscalls_metadata[]; 58extern unsigned long __start_syscalls_metadata[];
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 25915832291a..9da289c34f22 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -12,6 +12,8 @@
12#include <linux/highuid.h> 12#include <linux/highuid.h>
13#include <linux/cred.h> 13#include <linux/cred.h>
14 14
15static struct kmem_cache *user_ns_cachep __read_mostly;
16
15/* 17/*
16 * Create a new user namespace, deriving the creator from the user in the 18 * Create a new user namespace, deriving the creator from the user in the
17 * passed credentials, and replacing that user with the new root user for the 19 * passed credentials, and replacing that user with the new root user for the
@@ -26,7 +28,7 @@ int create_user_ns(struct cred *new)
26 struct user_struct *root_user; 28 struct user_struct *root_user;
27 int n; 29 int n;
28 30
29 ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); 31 ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL);
30 if (!ns) 32 if (!ns)
31 return -ENOMEM; 33 return -ENOMEM;
32 34
@@ -38,7 +40,7 @@ int create_user_ns(struct cred *new)
38 /* Alloc new root user. */ 40 /* Alloc new root user. */
39 root_user = alloc_uid(ns, 0); 41 root_user = alloc_uid(ns, 0);
40 if (!root_user) { 42 if (!root_user) {
41 kfree(ns); 43 kmem_cache_free(user_ns_cachep, ns);
42 return -ENOMEM; 44 return -ENOMEM;
43 } 45 }
44 46
@@ -71,7 +73,7 @@ static void free_user_ns_work(struct work_struct *work)
71 struct user_namespace *ns = 73 struct user_namespace *ns =
72 container_of(work, struct user_namespace, destroyer); 74 container_of(work, struct user_namespace, destroyer);
73 free_uid(ns->creator); 75 free_uid(ns->creator);
74 kfree(ns); 76 kmem_cache_free(user_ns_cachep, ns);
75} 77}
76 78
77void free_user_ns(struct kref *kref) 79void free_user_ns(struct kref *kref)
@@ -126,3 +128,10 @@ gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t
126 /* No useful relationship so no mapping */ 128 /* No useful relationship so no mapping */
127 return overflowgid; 129 return overflowgid;
128} 130}
131
132static __init int user_namespaces_init(void)
133{
134 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
135 return 0;
136}
137module_init(user_namespaces_init);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e7b575ac33c..d7ebdf4cea98 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -118,12 +118,12 @@ static void __touch_watchdog(void)
118{ 118{
119 int this_cpu = smp_processor_id(); 119 int this_cpu = smp_processor_id();
120 120
121 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); 121 __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
122} 122}
123 123
124void touch_softlockup_watchdog(void) 124void touch_softlockup_watchdog(void)
125{ 125{
126 __raw_get_cpu_var(watchdog_touch_ts) = 0; 126 __this_cpu_write(watchdog_touch_ts, 0);
127} 127}
128EXPORT_SYMBOL(touch_softlockup_watchdog); 128EXPORT_SYMBOL(touch_softlockup_watchdog);
129 129
@@ -167,12 +167,12 @@ void touch_softlockup_watchdog_sync(void)
167/* watchdog detector functions */ 167/* watchdog detector functions */
168static int is_hardlockup(void) 168static int is_hardlockup(void)
169{ 169{
170 unsigned long hrint = __get_cpu_var(hrtimer_interrupts); 170 unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
171 171
172 if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) 172 if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
173 return 1; 173 return 1;
174 174
175 __get_cpu_var(hrtimer_interrupts_saved) = hrint; 175 __this_cpu_write(hrtimer_interrupts_saved, hrint);
176 return 0; 176 return 0;
177} 177}
178#endif 178#endif
@@ -205,8 +205,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
205 /* Ensure the watchdog never gets throttled */ 205 /* Ensure the watchdog never gets throttled */
206 event->hw.interrupts = 0; 206 event->hw.interrupts = 0;
207 207
208 if (__get_cpu_var(watchdog_nmi_touch) == true) { 208 if (__this_cpu_read(watchdog_nmi_touch) == true) {
209 __get_cpu_var(watchdog_nmi_touch) = false; 209 __this_cpu_write(watchdog_nmi_touch, false);
210 return; 210 return;
211 } 211 }
212 212
@@ -220,7 +220,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
220 int this_cpu = smp_processor_id(); 220 int this_cpu = smp_processor_id();
221 221
222 /* only print hardlockups once */ 222 /* only print hardlockups once */
223 if (__get_cpu_var(hard_watchdog_warn) == true) 223 if (__this_cpu_read(hard_watchdog_warn) == true)
224 return; 224 return;
225 225
226 if (hardlockup_panic) 226 if (hardlockup_panic)
@@ -228,16 +228,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
228 else 228 else
229 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 229 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
230 230
231 __get_cpu_var(hard_watchdog_warn) = true; 231 __this_cpu_write(hard_watchdog_warn, true);
232 return; 232 return;
233 } 233 }
234 234
235 __get_cpu_var(hard_watchdog_warn) = false; 235 __this_cpu_write(hard_watchdog_warn, false);
236 return; 236 return;
237} 237}
238static void watchdog_interrupt_count(void) 238static void watchdog_interrupt_count(void)
239{ 239{
240 __get_cpu_var(hrtimer_interrupts)++; 240 __this_cpu_inc(hrtimer_interrupts);
241} 241}
242#else 242#else
243static inline void watchdog_interrupt_count(void) { return; } 243static inline void watchdog_interrupt_count(void) { return; }
@@ -246,7 +246,7 @@ static inline void watchdog_interrupt_count(void) { return; }
246/* watchdog kicker functions */ 246/* watchdog kicker functions */
247static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 247static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
248{ 248{
249 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); 249 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
250 struct pt_regs *regs = get_irq_regs(); 250 struct pt_regs *regs = get_irq_regs();
251 int duration; 251 int duration;
252 252
@@ -254,18 +254,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
254 watchdog_interrupt_count(); 254 watchdog_interrupt_count();
255 255
256 /* kick the softlockup detector */ 256 /* kick the softlockup detector */
257 wake_up_process(__get_cpu_var(softlockup_watchdog)); 257 wake_up_process(__this_cpu_read(softlockup_watchdog));
258 258
259 /* .. and repeat */ 259 /* .. and repeat */
260 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); 260 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
261 261
262 if (touch_ts == 0) { 262 if (touch_ts == 0) {
263 if (unlikely(__get_cpu_var(softlockup_touch_sync))) { 263 if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
264 /* 264 /*
265 * If the time stamp was touched atomically 265 * If the time stamp was touched atomically
266 * make sure the scheduler tick is up to date. 266 * make sure the scheduler tick is up to date.
267 */ 267 */
268 __get_cpu_var(softlockup_touch_sync) = false; 268 __this_cpu_write(softlockup_touch_sync, false);
269 sched_clock_tick(); 269 sched_clock_tick();
270 } 270 }
271 __touch_watchdog(); 271 __touch_watchdog();
@@ -281,7 +281,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
281 duration = is_softlockup(touch_ts); 281 duration = is_softlockup(touch_ts);
282 if (unlikely(duration)) { 282 if (unlikely(duration)) {
283 /* only warn once */ 283 /* only warn once */
284 if (__get_cpu_var(soft_watchdog_warn) == true) 284 if (__this_cpu_read(soft_watchdog_warn) == true)
285 return HRTIMER_RESTART; 285 return HRTIMER_RESTART;
286 286
287 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 287 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
@@ -296,9 +296,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
296 296
297 if (softlockup_panic) 297 if (softlockup_panic)
298 panic("softlockup: hung tasks"); 298 panic("softlockup: hung tasks");
299 __get_cpu_var(soft_watchdog_warn) = true; 299 __this_cpu_write(soft_watchdog_warn, true);
300 } else 300 } else
301 __get_cpu_var(soft_watchdog_warn) = false; 301 __this_cpu_write(soft_watchdog_warn, false);
302 302
303 return HRTIMER_RESTART; 303 return HRTIMER_RESTART;
304} 304}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e785b0f2aea5..8ee6ec82f88a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -932,6 +932,38 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
932 wake_up_worker(gcwq); 932 wake_up_worker(gcwq);
933} 933}
934 934
935/*
936 * Test whether @work is being queued from another work executing on the
937 * same workqueue. This is rather expensive and should only be used from
938 * cold paths.
939 */
940static bool is_chained_work(struct workqueue_struct *wq)
941{
942 unsigned long flags;
943 unsigned int cpu;
944
945 for_each_gcwq_cpu(cpu) {
946 struct global_cwq *gcwq = get_gcwq(cpu);
947 struct worker *worker;
948 struct hlist_node *pos;
949 int i;
950
951 spin_lock_irqsave(&gcwq->lock, flags);
952 for_each_busy_worker(worker, i, pos, gcwq) {
953 if (worker->task != current)
954 continue;
955 spin_unlock_irqrestore(&gcwq->lock, flags);
956 /*
957 * I'm @worker, no locking necessary. See if @work
958 * is headed to the same workqueue.
959 */
960 return worker->current_cwq->wq == wq;
961 }
962 spin_unlock_irqrestore(&gcwq->lock, flags);
963 }
964 return false;
965}
966
935static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, 967static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
936 struct work_struct *work) 968 struct work_struct *work)
937{ 969{
@@ -943,7 +975,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
943 975
944 debug_work_activate(work); 976 debug_work_activate(work);
945 977
946 if (WARN_ON_ONCE(wq->flags & WQ_DYING)) 978 /* if dying, only works from the same workqueue are allowed */
979 if (unlikely(wq->flags & WQ_DYING) &&
980 WARN_ON_ONCE(!is_chained_work(wq)))
947 return; 981 return;
948 982
949 /* determine gcwq to use */ 983 /* determine gcwq to use */
@@ -2936,11 +2970,35 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
2936 */ 2970 */
2937void destroy_workqueue(struct workqueue_struct *wq) 2971void destroy_workqueue(struct workqueue_struct *wq)
2938{ 2972{
2973 unsigned int flush_cnt = 0;
2939 unsigned int cpu; 2974 unsigned int cpu;
2940 2975
2976 /*
2977 * Mark @wq dying and drain all pending works. Once WQ_DYING is
2978 * set, only chain queueing is allowed. IOW, only currently
2979 * pending or running work items on @wq can queue further work
2980 * items on it. @wq is flushed repeatedly until it becomes empty.
2981 * The number of flushing is detemined by the depth of chaining and
2982 * should be relatively short. Whine if it takes too long.
2983 */
2941 wq->flags |= WQ_DYING; 2984 wq->flags |= WQ_DYING;
2985reflush:
2942 flush_workqueue(wq); 2986 flush_workqueue(wq);
2943 2987
2988 for_each_cwq_cpu(cpu, wq) {
2989 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2990
2991 if (!cwq->nr_active && list_empty(&cwq->delayed_works))
2992 continue;
2993
2994 if (++flush_cnt == 10 ||
2995 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2996 printk(KERN_WARNING "workqueue %s: flush on "
2997 "destruction isn't complete after %u tries\n",
2998 wq->name, flush_cnt);
2999 goto reflush;
3000 }
3001
2944 /* 3002 /*
2945 * wq list is used to freeze wq, remove from list after 3003 * wq list is used to freeze wq, remove from list after
2946 * flushing is complete in case freeze races us. 3004 * flushing is complete in case freeze races us.