diff options
Diffstat (limited to 'kernel')
52 files changed, 969 insertions, 526 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0b5ff083fa22..353d3fe8ba33 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -43,7 +43,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |||
43 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 43 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
44 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o | 44 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o |
45 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 45 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
46 | obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o | 46 | obj-$(CONFIG_SMP) += smp.o |
47 | ifneq ($(CONFIG_SMP),y) | 47 | ifneq ($(CONFIG_SMP),y) |
48 | obj-y += up.o | 48 | obj-y += up.o |
49 | endif | 49 | endif |
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ | |||
100 | obj-$(CONFIG_TRACING) += trace/ | 100 | obj-$(CONFIG_TRACING) += trace/ |
101 | obj-$(CONFIG_X86_DS) += trace/ | 101 | obj-$(CONFIG_X86_DS) += trace/ |
102 | obj-$(CONFIG_RING_BUFFER) += trace/ | 102 | obj-$(CONFIG_RING_BUFFER) += trace/ |
103 | obj-$(CONFIG_TRACEPOINTS) += trace/ | ||
103 | obj-$(CONFIG_SMP) += sched_cpupri.o | 104 | obj-$(CONFIG_SMP) += sched_cpupri.o |
104 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 105 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
105 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 106 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
@@ -121,7 +122,7 @@ $(obj)/configs.o: $(obj)/config_data.h | |||
121 | # config_data.h contains the same information as ikconfig.h but gzipped. | 122 | # config_data.h contains the same information as ikconfig.h but gzipped. |
122 | # Info from config_data can be extracted from /proc/config* | 123 | # Info from config_data can be extracted from /proc/config* |
123 | targets += config_data.gz | 124 | targets += config_data.gz |
124 | $(obj)/config_data.gz: .config FORCE | 125 | $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE |
125 | $(call if_changed,gzip) | 126 | $(call if_changed,gzip) |
126 | 127 | ||
127 | quiet_cmd_ikconfiggz = IKCFG $@ | 128 | quiet_cmd_ikconfiggz = IKCFG $@ |
diff --git a/kernel/audit.c b/kernel/audit.c index 77770a034d59..e4956244ae50 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -400,7 +400,7 @@ static void kauditd_send_skb(struct sk_buff *skb) | |||
400 | if (err < 0) { | 400 | if (err < 0) { |
401 | BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ | 401 | BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ |
402 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | 402 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); |
403 | audit_log_lost("auditd dissapeared\n"); | 403 | audit_log_lost("auditd disappeared\n"); |
404 | audit_pid = 0; | 404 | audit_pid = 0; |
405 | /* we might get lucky and get this in the next auditd */ | 405 | /* we might get lucky and get this in the next auditd */ |
406 | audit_hold_skb(skb); | 406 | audit_hold_skb(skb); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 66a416b42c18..b24d7027b83c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); | |||
764 | */ | 764 | */ |
765 | 765 | ||
766 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); | 766 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); |
767 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); | ||
767 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 768 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
768 | static int cgroup_populate_dir(struct cgroup *cgrp); | 769 | static int cgroup_populate_dir(struct cgroup *cgrp); |
769 | static const struct inode_operations cgroup_dir_inode_operations; | 770 | static const struct inode_operations cgroup_dir_inode_operations; |
@@ -860,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
860 | iput(inode); | 861 | iput(inode); |
861 | } | 862 | } |
862 | 863 | ||
864 | static int cgroup_delete(const struct dentry *d) | ||
865 | { | ||
866 | return 1; | ||
867 | } | ||
868 | |||
863 | static void remove_dir(struct dentry *d) | 869 | static void remove_dir(struct dentry *d) |
864 | { | 870 | { |
865 | struct dentry *parent = dget(d->d_parent); | 871 | struct dentry *parent = dget(d->d_parent); |
@@ -874,25 +880,29 @@ static void cgroup_clear_directory(struct dentry *dentry) | |||
874 | struct list_head *node; | 880 | struct list_head *node; |
875 | 881 | ||
876 | BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | 882 | BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); |
877 | spin_lock(&dcache_lock); | 883 | spin_lock(&dentry->d_lock); |
878 | node = dentry->d_subdirs.next; | 884 | node = dentry->d_subdirs.next; |
879 | while (node != &dentry->d_subdirs) { | 885 | while (node != &dentry->d_subdirs) { |
880 | struct dentry *d = list_entry(node, struct dentry, d_u.d_child); | 886 | struct dentry *d = list_entry(node, struct dentry, d_u.d_child); |
887 | |||
888 | spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); | ||
881 | list_del_init(node); | 889 | list_del_init(node); |
882 | if (d->d_inode) { | 890 | if (d->d_inode) { |
883 | /* This should never be called on a cgroup | 891 | /* This should never be called on a cgroup |
884 | * directory with child cgroups */ | 892 | * directory with child cgroups */ |
885 | BUG_ON(d->d_inode->i_mode & S_IFDIR); | 893 | BUG_ON(d->d_inode->i_mode & S_IFDIR); |
886 | d = dget_locked(d); | 894 | dget_dlock(d); |
887 | spin_unlock(&dcache_lock); | 895 | spin_unlock(&d->d_lock); |
896 | spin_unlock(&dentry->d_lock); | ||
888 | d_delete(d); | 897 | d_delete(d); |
889 | simple_unlink(dentry->d_inode, d); | 898 | simple_unlink(dentry->d_inode, d); |
890 | dput(d); | 899 | dput(d); |
891 | spin_lock(&dcache_lock); | 900 | spin_lock(&dentry->d_lock); |
892 | } | 901 | } else |
902 | spin_unlock(&d->d_lock); | ||
893 | node = dentry->d_subdirs.next; | 903 | node = dentry->d_subdirs.next; |
894 | } | 904 | } |
895 | spin_unlock(&dcache_lock); | 905 | spin_unlock(&dentry->d_lock); |
896 | } | 906 | } |
897 | 907 | ||
898 | /* | 908 | /* |
@@ -900,11 +910,16 @@ static void cgroup_clear_directory(struct dentry *dentry) | |||
900 | */ | 910 | */ |
901 | static void cgroup_d_remove_dir(struct dentry *dentry) | 911 | static void cgroup_d_remove_dir(struct dentry *dentry) |
902 | { | 912 | { |
913 | struct dentry *parent; | ||
914 | |||
903 | cgroup_clear_directory(dentry); | 915 | cgroup_clear_directory(dentry); |
904 | 916 | ||
905 | spin_lock(&dcache_lock); | 917 | parent = dentry->d_parent; |
918 | spin_lock(&parent->d_lock); | ||
919 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
906 | list_del_init(&dentry->d_u.d_child); | 920 | list_del_init(&dentry->d_u.d_child); |
907 | spin_unlock(&dcache_lock); | 921 | spin_unlock(&dentry->d_lock); |
922 | spin_unlock(&parent->d_lock); | ||
908 | remove_dir(dentry); | 923 | remove_dir(dentry); |
909 | } | 924 | } |
910 | 925 | ||
@@ -1440,6 +1455,11 @@ static int cgroup_set_super(struct super_block *sb, void *data) | |||
1440 | 1455 | ||
1441 | static int cgroup_get_rootdir(struct super_block *sb) | 1456 | static int cgroup_get_rootdir(struct super_block *sb) |
1442 | { | 1457 | { |
1458 | static const struct dentry_operations cgroup_dops = { | ||
1459 | .d_iput = cgroup_diput, | ||
1460 | .d_delete = cgroup_delete, | ||
1461 | }; | ||
1462 | |||
1443 | struct inode *inode = | 1463 | struct inode *inode = |
1444 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); | 1464 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); |
1445 | struct dentry *dentry; | 1465 | struct dentry *dentry; |
@@ -1457,6 +1477,8 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1457 | return -ENOMEM; | 1477 | return -ENOMEM; |
1458 | } | 1478 | } |
1459 | sb->s_root = dentry; | 1479 | sb->s_root = dentry; |
1480 | /* for everything else we want ->d_op set */ | ||
1481 | sb->s_d_op = &cgroup_dops; | ||
1460 | return 0; | 1482 | return 0; |
1461 | } | 1483 | } |
1462 | 1484 | ||
@@ -2180,12 +2202,20 @@ static const struct file_operations cgroup_file_operations = { | |||
2180 | }; | 2202 | }; |
2181 | 2203 | ||
2182 | static const struct inode_operations cgroup_dir_inode_operations = { | 2204 | static const struct inode_operations cgroup_dir_inode_operations = { |
2183 | .lookup = simple_lookup, | 2205 | .lookup = cgroup_lookup, |
2184 | .mkdir = cgroup_mkdir, | 2206 | .mkdir = cgroup_mkdir, |
2185 | .rmdir = cgroup_rmdir, | 2207 | .rmdir = cgroup_rmdir, |
2186 | .rename = cgroup_rename, | 2208 | .rename = cgroup_rename, |
2187 | }; | 2209 | }; |
2188 | 2210 | ||
2211 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
2212 | { | ||
2213 | if (dentry->d_name.len > NAME_MAX) | ||
2214 | return ERR_PTR(-ENAMETOOLONG); | ||
2215 | d_add(dentry, NULL); | ||
2216 | return NULL; | ||
2217 | } | ||
2218 | |||
2189 | /* | 2219 | /* |
2190 | * Check if a file is a control file | 2220 | * Check if a file is a control file |
2191 | */ | 2221 | */ |
@@ -2199,10 +2229,6 @@ static inline struct cftype *__file_cft(struct file *file) | |||
2199 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, | 2229 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, |
2200 | struct super_block *sb) | 2230 | struct super_block *sb) |
2201 | { | 2231 | { |
2202 | static const struct dentry_operations cgroup_dops = { | ||
2203 | .d_iput = cgroup_diput, | ||
2204 | }; | ||
2205 | |||
2206 | struct inode *inode; | 2232 | struct inode *inode; |
2207 | 2233 | ||
2208 | if (!dentry) | 2234 | if (!dentry) |
@@ -2228,7 +2254,6 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode, | |||
2228 | inode->i_size = 0; | 2254 | inode->i_size = 0; |
2229 | inode->i_fop = &cgroup_file_operations; | 2255 | inode->i_fop = &cgroup_file_operations; |
2230 | } | 2256 | } |
2231 | dentry->d_op = &cgroup_dops; | ||
2232 | d_instantiate(dentry, inode); | 2257 | d_instantiate(dentry, inode); |
2233 | dget(dentry); /* Extra count - pin the dentry in core */ | 2258 | dget(dentry); /* Extra count - pin the dentry in core */ |
2234 | return 0; | 2259 | return 0; |
@@ -3638,9 +3663,7 @@ again: | |||
3638 | list_del(&cgrp->sibling); | 3663 | list_del(&cgrp->sibling); |
3639 | cgroup_unlock_hierarchy(cgrp->root); | 3664 | cgroup_unlock_hierarchy(cgrp->root); |
3640 | 3665 | ||
3641 | spin_lock(&cgrp->dentry->d_lock); | ||
3642 | d = dget(cgrp->dentry); | 3666 | d = dget(cgrp->dentry); |
3643 | spin_unlock(&d->d_lock); | ||
3644 | 3667 | ||
3645 | cgroup_d_remove_dir(d); | 3668 | cgroup_d_remove_dir(d); |
3646 | dput(d); | 3669 | dput(d); |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index a6e729766821..bd3e8e29caa3 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -2914,7 +2914,7 @@ static void __init kdb_cmd_init(void) | |||
2914 | } | 2914 | } |
2915 | } | 2915 | } |
2916 | 2916 | ||
2917 | /* Intialize kdb_printf, breakpoint tables and kdb state */ | 2917 | /* Initialize kdb_printf, breakpoint tables and kdb state */ |
2918 | void __init kdb_init(int lvl) | 2918 | void __init kdb_init(int lvl) |
2919 | { | 2919 | { |
2920 | static int kdb_init_lvl = KDB_NOT_INITIALIZED; | 2920 | static int kdb_init_lvl = KDB_NOT_INITIALIZED; |
diff --git a/kernel/exit.c b/kernel/exit.c index 676149a4ac5f..f9a45ebcc7b1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -69,7 +69,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) | |||
69 | 69 | ||
70 | list_del_rcu(&p->tasks); | 70 | list_del_rcu(&p->tasks); |
71 | list_del_init(&p->sibling); | 71 | list_del_init(&p->sibling); |
72 | __get_cpu_var(process_counts)--; | 72 | __this_cpu_dec(process_counts); |
73 | } | 73 | } |
74 | list_del_rcu(&p->thread_group); | 74 | list_del_rcu(&p->thread_group); |
75 | } | 75 | } |
@@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code) | |||
994 | exit_fs(tsk); | 994 | exit_fs(tsk); |
995 | check_stack_usage(); | 995 | check_stack_usage(); |
996 | exit_thread(); | 996 | exit_thread(); |
997 | |||
998 | /* | ||
999 | * Flush inherited counters to the parent - before the parent | ||
1000 | * gets woken up by child-exit notifications. | ||
1001 | * | ||
1002 | * because of cgroup mode, must be called before cgroup_exit() | ||
1003 | */ | ||
1004 | perf_event_exit_task(tsk); | ||
1005 | |||
997 | cgroup_exit(tsk, 1); | 1006 | cgroup_exit(tsk, 1); |
998 | 1007 | ||
999 | if (group_dead) | 1008 | if (group_dead) |
@@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code) | |||
1007 | * FIXME: do that only when needed, using sched_exit tracepoint | 1016 | * FIXME: do that only when needed, using sched_exit tracepoint |
1008 | */ | 1017 | */ |
1009 | flush_ptrace_hw_breakpoint(tsk); | 1018 | flush_ptrace_hw_breakpoint(tsk); |
1010 | /* | ||
1011 | * Flush inherited counters to the parent - before the parent | ||
1012 | * gets woken up by child-exit notifications. | ||
1013 | */ | ||
1014 | perf_event_exit_task(tsk); | ||
1015 | 1019 | ||
1016 | exit_notify(tsk, group_dead); | 1020 | exit_notify(tsk, group_dead); |
1017 | #ifdef CONFIG_NUMA | 1021 | #ifdef CONFIG_NUMA |
diff --git a/kernel/fork.c b/kernel/fork.c index 7d164e25b0f0..25e429152ddc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -66,6 +66,7 @@ | |||
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | 67 | #include <linux/user-return-notifier.h> |
68 | #include <linux/oom.h> | 68 | #include <linux/oom.h> |
69 | #include <linux/khugepaged.h> | ||
69 | 70 | ||
70 | #include <asm/pgtable.h> | 71 | #include <asm/pgtable.h> |
71 | #include <asm/pgalloc.h> | 72 | #include <asm/pgalloc.h> |
@@ -169,15 +170,14 @@ EXPORT_SYMBOL(free_task); | |||
169 | static inline void free_signal_struct(struct signal_struct *sig) | 170 | static inline void free_signal_struct(struct signal_struct *sig) |
170 | { | 171 | { |
171 | taskstats_tgid_free(sig); | 172 | taskstats_tgid_free(sig); |
173 | sched_autogroup_exit(sig); | ||
172 | kmem_cache_free(signal_cachep, sig); | 174 | kmem_cache_free(signal_cachep, sig); |
173 | } | 175 | } |
174 | 176 | ||
175 | static inline void put_signal_struct(struct signal_struct *sig) | 177 | static inline void put_signal_struct(struct signal_struct *sig) |
176 | { | 178 | { |
177 | if (atomic_dec_and_test(&sig->sigcnt)) { | 179 | if (atomic_dec_and_test(&sig->sigcnt)) |
178 | sched_autogroup_exit(sig); | ||
179 | free_signal_struct(sig); | 180 | free_signal_struct(sig); |
180 | } | ||
181 | } | 181 | } |
182 | 182 | ||
183 | void __put_task_struct(struct task_struct *tsk) | 183 | void __put_task_struct(struct task_struct *tsk) |
@@ -331,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
331 | retval = ksm_fork(mm, oldmm); | 331 | retval = ksm_fork(mm, oldmm); |
332 | if (retval) | 332 | if (retval) |
333 | goto out; | 333 | goto out; |
334 | retval = khugepaged_fork(mm, oldmm); | ||
335 | if (retval) | ||
336 | goto out; | ||
334 | 337 | ||
335 | prev = NULL; | 338 | prev = NULL; |
336 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 339 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
@@ -530,6 +533,9 @@ void __mmdrop(struct mm_struct *mm) | |||
530 | mm_free_pgd(mm); | 533 | mm_free_pgd(mm); |
531 | destroy_context(mm); | 534 | destroy_context(mm); |
532 | mmu_notifier_mm_destroy(mm); | 535 | mmu_notifier_mm_destroy(mm); |
536 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
537 | VM_BUG_ON(mm->pmd_huge_pte); | ||
538 | #endif | ||
533 | free_mm(mm); | 539 | free_mm(mm); |
534 | } | 540 | } |
535 | EXPORT_SYMBOL_GPL(__mmdrop); | 541 | EXPORT_SYMBOL_GPL(__mmdrop); |
@@ -544,6 +550,7 @@ void mmput(struct mm_struct *mm) | |||
544 | if (atomic_dec_and_test(&mm->mm_users)) { | 550 | if (atomic_dec_and_test(&mm->mm_users)) { |
545 | exit_aio(mm); | 551 | exit_aio(mm); |
546 | ksm_exit(mm); | 552 | ksm_exit(mm); |
553 | khugepaged_exit(mm); /* must run before exit_mmap */ | ||
547 | exit_mmap(mm); | 554 | exit_mmap(mm); |
548 | set_mm_exe_file(mm, NULL); | 555 | set_mm_exe_file(mm, NULL); |
549 | if (!list_empty(&mm->mmlist)) { | 556 | if (!list_empty(&mm->mmlist)) { |
@@ -670,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
670 | mm->token_priority = 0; | 677 | mm->token_priority = 0; |
671 | mm->last_interval = 0; | 678 | mm->last_interval = 0; |
672 | 679 | ||
680 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
681 | mm->pmd_huge_pte = NULL; | ||
682 | #endif | ||
683 | |||
673 | if (!mm_init(mm, tsk)) | 684 | if (!mm_init(mm, tsk)) |
674 | goto fail_nomem; | 685 | goto fail_nomem; |
675 | 686 | ||
@@ -911,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
911 | 922 | ||
912 | sig->oom_adj = current->signal->oom_adj; | 923 | sig->oom_adj = current->signal->oom_adj; |
913 | sig->oom_score_adj = current->signal->oom_score_adj; | 924 | sig->oom_score_adj = current->signal->oom_score_adj; |
925 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | ||
914 | 926 | ||
915 | mutex_init(&sig->cred_guard_mutex); | 927 | mutex_init(&sig->cred_guard_mutex); |
916 | 928 | ||
@@ -1286,7 +1298,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1286 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1298 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1287 | list_add_tail(&p->sibling, &p->real_parent->children); | 1299 | list_add_tail(&p->sibling, &p->real_parent->children); |
1288 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1300 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1289 | __get_cpu_var(process_counts)++; | 1301 | __this_cpu_inc(process_counts); |
1290 | } | 1302 | } |
1291 | attach_pid(p, PIDTYPE_PID, pid); | 1303 | attach_pid(p, PIDTYPE_PID, pid); |
1292 | nr_threads++; | 1304 | nr_threads++; |
@@ -1318,7 +1330,7 @@ bad_fork_cleanup_mm: | |||
1318 | } | 1330 | } |
1319 | bad_fork_cleanup_signal: | 1331 | bad_fork_cleanup_signal: |
1320 | if (!(clone_flags & CLONE_THREAD)) | 1332 | if (!(clone_flags & CLONE_THREAD)) |
1321 | put_signal_struct(p->signal); | 1333 | free_signal_struct(p->signal); |
1322 | bad_fork_cleanup_sighand: | 1334 | bad_fork_cleanup_sighand: |
1323 | __cleanup_sighand(p->sighand); | 1335 | __cleanup_sighand(p->sighand); |
1324 | bad_fork_cleanup_fs: | 1336 | bad_fork_cleanup_fs: |
@@ -1411,23 +1423,6 @@ long do_fork(unsigned long clone_flags, | |||
1411 | } | 1423 | } |
1412 | 1424 | ||
1413 | /* | 1425 | /* |
1414 | * We hope to recycle these flags after 2.6.26 | ||
1415 | */ | ||
1416 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
1417 | static int __read_mostly count = 100; | ||
1418 | |||
1419 | if (count > 0 && printk_ratelimit()) { | ||
1420 | char comm[TASK_COMM_LEN]; | ||
1421 | |||
1422 | count--; | ||
1423 | printk(KERN_INFO "fork(): process `%s' used deprecated " | ||
1424 | "clone flags 0x%lx\n", | ||
1425 | get_task_comm(comm, current), | ||
1426 | clone_flags & CLONE_STOPPED); | ||
1427 | } | ||
1428 | } | ||
1429 | |||
1430 | /* | ||
1431 | * When called from kernel_thread, don't do user tracing stuff. | 1426 | * When called from kernel_thread, don't do user tracing stuff. |
1432 | */ | 1427 | */ |
1433 | if (likely(user_mode(regs))) | 1428 | if (likely(user_mode(regs))) |
@@ -1465,16 +1460,7 @@ long do_fork(unsigned long clone_flags, | |||
1465 | */ | 1460 | */ |
1466 | p->flags &= ~PF_STARTING; | 1461 | p->flags &= ~PF_STARTING; |
1467 | 1462 | ||
1468 | if (unlikely(clone_flags & CLONE_STOPPED)) { | 1463 | wake_up_new_task(p, clone_flags); |
1469 | /* | ||
1470 | * We'll start up with an immediate SIGSTOP. | ||
1471 | */ | ||
1472 | sigaddset(&p->pending.signal, SIGSTOP); | ||
1473 | set_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1474 | __set_task_state(p, TASK_STOPPED); | ||
1475 | } else { | ||
1476 | wake_up_new_task(p, clone_flags); | ||
1477 | } | ||
1478 | 1464 | ||
1479 | tracehook_report_clone_complete(trace, regs, | 1465 | tracehook_report_clone_complete(trace, regs, |
1480 | clone_flags, nr, p); | 1466 | clone_flags, nr, p); |
diff --git a/kernel/freezer.c b/kernel/freezer.c index bd1d42b17cb2..66ecd2ead215 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
@@ -104,8 +104,13 @@ bool freeze_task(struct task_struct *p, bool sig_only) | |||
104 | } | 104 | } |
105 | 105 | ||
106 | if (should_send_signal(p)) { | 106 | if (should_send_signal(p)) { |
107 | if (!signal_pending(p)) | 107 | fake_signal_wake_up(p); |
108 | fake_signal_wake_up(p); | 108 | /* |
109 | * fake_signal_wake_up() goes through p's scheduler | ||
110 | * lock and guarantees that TASK_STOPPED/TRACED -> | ||
111 | * TASK_RUNNING transition can't race with task state | ||
112 | * testing in try_to_freeze_tasks(). | ||
113 | */ | ||
109 | } else if (sig_only) { | 114 | } else if (sig_only) { |
110 | return false; | 115 | return false; |
111 | } else { | 116 | } else { |
diff --git a/kernel/futex.c b/kernel/futex.c index 3019b92e6917..b766d28accd6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) | |||
233 | { | 233 | { |
234 | unsigned long address = (unsigned long)uaddr; | 234 | unsigned long address = (unsigned long)uaddr; |
235 | struct mm_struct *mm = current->mm; | 235 | struct mm_struct *mm = current->mm; |
236 | struct page *page; | 236 | struct page *page, *page_head; |
237 | int err; | 237 | int err; |
238 | 238 | ||
239 | /* | 239 | /* |
@@ -265,11 +265,46 @@ again: | |||
265 | if (err < 0) | 265 | if (err < 0) |
266 | return err; | 266 | return err; |
267 | 267 | ||
268 | page = compound_head(page); | 268 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
269 | lock_page(page); | 269 | page_head = page; |
270 | if (!page->mapping) { | 270 | if (unlikely(PageTail(page))) { |
271 | unlock_page(page); | ||
272 | put_page(page); | 271 | put_page(page); |
272 | /* serialize against __split_huge_page_splitting() */ | ||
273 | local_irq_disable(); | ||
274 | if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { | ||
275 | page_head = compound_head(page); | ||
276 | /* | ||
277 | * page_head is valid pointer but we must pin | ||
278 | * it before taking the PG_lock and/or | ||
279 | * PG_compound_lock. The moment we re-enable | ||
280 | * irqs __split_huge_page_splitting() can | ||
281 | * return and the head page can be freed from | ||
282 | * under us. We can't take the PG_lock and/or | ||
283 | * PG_compound_lock on a page that could be | ||
284 | * freed from under us. | ||
285 | */ | ||
286 | if (page != page_head) { | ||
287 | get_page(page_head); | ||
288 | put_page(page); | ||
289 | } | ||
290 | local_irq_enable(); | ||
291 | } else { | ||
292 | local_irq_enable(); | ||
293 | goto again; | ||
294 | } | ||
295 | } | ||
296 | #else | ||
297 | page_head = compound_head(page); | ||
298 | if (page != page_head) { | ||
299 | get_page(page_head); | ||
300 | put_page(page); | ||
301 | } | ||
302 | #endif | ||
303 | |||
304 | lock_page(page_head); | ||
305 | if (!page_head->mapping) { | ||
306 | unlock_page(page_head); | ||
307 | put_page(page_head); | ||
273 | goto again; | 308 | goto again; |
274 | } | 309 | } |
275 | 310 | ||
@@ -280,20 +315,20 @@ again: | |||
280 | * it's a read-only handle, it's expected that futexes attach to | 315 | * it's a read-only handle, it's expected that futexes attach to |
281 | * the object not the particular process. | 316 | * the object not the particular process. |
282 | */ | 317 | */ |
283 | if (PageAnon(page)) { | 318 | if (PageAnon(page_head)) { |
284 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ | 319 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ |
285 | key->private.mm = mm; | 320 | key->private.mm = mm; |
286 | key->private.address = address; | 321 | key->private.address = address; |
287 | } else { | 322 | } else { |
288 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ | 323 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
289 | key->shared.inode = page->mapping->host; | 324 | key->shared.inode = page_head->mapping->host; |
290 | key->shared.pgoff = page->index; | 325 | key->shared.pgoff = page_head->index; |
291 | } | 326 | } |
292 | 327 | ||
293 | get_futex_key_refs(key); | 328 | get_futex_key_refs(key); |
294 | 329 | ||
295 | unlock_page(page); | 330 | unlock_page(page_head); |
296 | put_page(page); | 331 | put_page(page_head); |
297 | return 0; | 332 | return 0; |
298 | } | 333 | } |
299 | 334 | ||
@@ -791,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
791 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 826 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
792 | 827 | ||
793 | /* | 828 | /* |
794 | * This happens when we have stolen the lock and the original | 829 | * It is possible that the next waiter (the one that brought |
795 | * pending owner did not enqueue itself back on the rt_mutex. | 830 | * this owner to the kernel) timed out and is no longer |
796 | * Thats not a tragedy. We know that way, that a lock waiter | 831 | * waiting on the lock. |
797 | * is on the fly. We make the futex_q waiter the pending owner. | ||
798 | */ | 832 | */ |
799 | if (!new_owner) | 833 | if (!new_owner) |
800 | new_owner = this->task; | 834 | new_owner = this->task; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f2429fc3438c..0c8d7c048615 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -497,7 +497,7 @@ static inline int hrtimer_is_hres_enabled(void) | |||
497 | */ | 497 | */ |
498 | static inline int hrtimer_hres_active(void) | 498 | static inline int hrtimer_hres_active(void) |
499 | { | 499 | { |
500 | return __get_cpu_var(hrtimer_bases).hres_active; | 500 | return __this_cpu_read(hrtimer_bases.hres_active); |
501 | } | 501 | } |
502 | 502 | ||
503 | /* | 503 | /* |
@@ -1745,7 +1745,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, | |||
1745 | } | 1745 | } |
1746 | 1746 | ||
1747 | /* | 1747 | /* |
1748 | * A NULL parameter means "inifinte" | 1748 | * A NULL parameter means "infinite" |
1749 | */ | 1749 | */ |
1750 | if (!expires) { | 1750 | if (!expires) { |
1751 | schedule(); | 1751 | schedule(); |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9988d03797f5..282f20230e67 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; } | |||
72 | 72 | ||
73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) |
74 | { | 74 | { |
75 | int cpu; | ||
76 | |||
75 | desc->irq_data.irq = irq; | 77 | desc->irq_data.irq = irq; |
76 | desc->irq_data.chip = &no_irq_chip; | 78 | desc->irq_data.chip = &no_irq_chip; |
77 | desc->irq_data.chip_data = NULL; | 79 | desc->irq_data.chip_data = NULL; |
@@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |||
83 | desc->irq_count = 0; | 85 | desc->irq_count = 0; |
84 | desc->irqs_unhandled = 0; | 86 | desc->irqs_unhandled = 0; |
85 | desc->name = NULL; | 87 | desc->name = NULL; |
86 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | 88 | for_each_possible_cpu(cpu) |
89 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; | ||
87 | desc_smp_init(desc, node); | 90 | desc_smp_init(desc, node); |
88 | } | 91 | } |
89 | 92 | ||
@@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
133 | if (!desc) | 136 | if (!desc) |
134 | return NULL; | 137 | return NULL; |
135 | /* allocate based on nr_cpu_ids */ | 138 | /* allocate based on nr_cpu_ids */ |
136 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | 139 | desc->kstat_irqs = alloc_percpu(unsigned int); |
137 | gfp, node); | ||
138 | if (!desc->kstat_irqs) | 140 | if (!desc->kstat_irqs) |
139 | goto err_desc; | 141 | goto err_desc; |
140 | 142 | ||
@@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
149 | return desc; | 151 | return desc; |
150 | 152 | ||
151 | err_kstat: | 153 | err_kstat: |
152 | kfree(desc->kstat_irqs); | 154 | free_percpu(desc->kstat_irqs); |
153 | err_desc: | 155 | err_desc: |
154 | kfree(desc); | 156 | kfree(desc); |
155 | return NULL; | 157 | return NULL; |
@@ -166,7 +168,7 @@ static void free_desc(unsigned int irq) | |||
166 | mutex_unlock(&sparse_irq_lock); | 168 | mutex_unlock(&sparse_irq_lock); |
167 | 169 | ||
168 | free_masks(desc); | 170 | free_masks(desc); |
169 | kfree(desc->kstat_irqs); | 171 | free_percpu(desc->kstat_irqs); |
170 | kfree(desc); | 172 | kfree(desc); |
171 | } | 173 | } |
172 | 174 | ||
@@ -234,7 +236,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | |||
234 | } | 236 | } |
235 | }; | 237 | }; |
236 | 238 | ||
237 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
238 | int __init early_irq_init(void) | 239 | int __init early_irq_init(void) |
239 | { | 240 | { |
240 | int count, i, node = first_online_node; | 241 | int count, i, node = first_online_node; |
@@ -250,7 +251,8 @@ int __init early_irq_init(void) | |||
250 | for (i = 0; i < count; i++) { | 251 | for (i = 0; i < count; i++) { |
251 | desc[i].irq_data.irq = i; | 252 | desc[i].irq_data.irq = i; |
252 | desc[i].irq_data.chip = &no_irq_chip; | 253 | desc[i].irq_data.chip = &no_irq_chip; |
253 | desc[i].kstat_irqs = kstat_irqs_all[i]; | 254 | /* TODO : do this allocation on-demand ... */ |
255 | desc[i].kstat_irqs = alloc_percpu(unsigned int); | ||
254 | alloc_masks(desc + i, GFP_KERNEL, node); | 256 | alloc_masks(desc + i, GFP_KERNEL, node); |
255 | desc_smp_init(desc + i, node); | 257 | desc_smp_init(desc + i, node); |
256 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 258 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
@@ -275,6 +277,22 @@ static void free_desc(unsigned int irq) | |||
275 | 277 | ||
276 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | 278 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) |
277 | { | 279 | { |
280 | #if defined(CONFIG_KSTAT_IRQS_ONDEMAND) | ||
281 | struct irq_desc *desc; | ||
282 | unsigned int i; | ||
283 | |||
284 | for (i = 0; i < cnt; i++) { | ||
285 | desc = irq_to_desc(start + i); | ||
286 | if (desc && !desc->kstat_irqs) { | ||
287 | unsigned int __percpu *stats = alloc_percpu(unsigned int); | ||
288 | |||
289 | if (!stats) | ||
290 | return -1; | ||
291 | if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL) | ||
292 | free_percpu(stats); | ||
293 | } | ||
294 | } | ||
295 | #endif | ||
278 | return start; | 296 | return start; |
279 | } | 297 | } |
280 | #endif /* !CONFIG_SPARSE_IRQ */ | 298 | #endif /* !CONFIG_SPARSE_IRQ */ |
@@ -391,7 +409,9 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
391 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | 409 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) |
392 | { | 410 | { |
393 | struct irq_desc *desc = irq_to_desc(irq); | 411 | struct irq_desc *desc = irq_to_desc(irq); |
394 | return desc ? desc->kstat_irqs[cpu] : 0; | 412 | |
413 | return desc && desc->kstat_irqs ? | ||
414 | *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; | ||
395 | } | 415 | } |
396 | 416 | ||
397 | #ifdef CONFIG_GENERIC_HARDIRQS | 417 | #ifdef CONFIG_GENERIC_HARDIRQS |
@@ -401,10 +421,10 @@ unsigned int kstat_irqs(unsigned int irq) | |||
401 | int cpu; | 421 | int cpu; |
402 | int sum = 0; | 422 | int sum = 0; |
403 | 423 | ||
404 | if (!desc) | 424 | if (!desc || !desc->kstat_irqs) |
405 | return 0; | 425 | return 0; |
406 | for_each_possible_cpu(cpu) | 426 | for_each_possible_cpu(cpu) |
407 | sum += desc->kstat_irqs[cpu]; | 427 | sum += *per_cpu_ptr(desc->kstat_irqs, cpu); |
408 | return sum; | 428 | return sum; |
409 | } | 429 | } |
410 | #endif /* CONFIG_GENERIC_HARDIRQS */ | 430 | #endif /* CONFIG_GENERIC_HARDIRQS */ |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 91a5fa25054e..0caa59f747dd 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -577,7 +577,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } | |||
577 | */ | 577 | */ |
578 | static int irq_thread(void *data) | 578 | static int irq_thread(void *data) |
579 | { | 579 | { |
580 | static struct sched_param param = { | 580 | static const struct sched_param param = { |
581 | .sched_priority = MAX_USER_RT_PRIO/2, | 581 | .sched_priority = MAX_USER_RT_PRIO/2, |
582 | }; | 582 | }; |
583 | struct irqaction *action = data; | 583 | struct irqaction *action = data; |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 90f881904bb1..c58fa7da8aef 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -77,21 +77,21 @@ void __weak arch_irq_work_raise(void) | |||
77 | */ | 77 | */ |
78 | static void __irq_work_queue(struct irq_work *entry) | 78 | static void __irq_work_queue(struct irq_work *entry) |
79 | { | 79 | { |
80 | struct irq_work **head, *next; | 80 | struct irq_work *next; |
81 | 81 | ||
82 | head = &get_cpu_var(irq_work_list); | 82 | preempt_disable(); |
83 | 83 | ||
84 | do { | 84 | do { |
85 | next = *head; | 85 | next = __this_cpu_read(irq_work_list); |
86 | /* Can assign non-atomic because we keep the flags set. */ | 86 | /* Can assign non-atomic because we keep the flags set. */ |
87 | entry->next = next_flags(next, IRQ_WORK_FLAGS); | 87 | entry->next = next_flags(next, IRQ_WORK_FLAGS); |
88 | } while (cmpxchg(head, next, entry) != next); | 88 | } while (this_cpu_cmpxchg(irq_work_list, next, entry) != next); |
89 | 89 | ||
90 | /* The list was empty, raise self-interrupt to start processing. */ | 90 | /* The list was empty, raise self-interrupt to start processing. */ |
91 | if (!irq_work_next(entry)) | 91 | if (!irq_work_next(entry)) |
92 | arch_irq_work_raise(); | 92 | arch_irq_work_raise(); |
93 | 93 | ||
94 | put_cpu_var(irq_work_list); | 94 | preempt_enable(); |
95 | } | 95 | } |
96 | 96 | ||
97 | /* | 97 | /* |
@@ -120,16 +120,16 @@ EXPORT_SYMBOL_GPL(irq_work_queue); | |||
120 | */ | 120 | */ |
121 | void irq_work_run(void) | 121 | void irq_work_run(void) |
122 | { | 122 | { |
123 | struct irq_work *list, **head; | 123 | struct irq_work *list; |
124 | 124 | ||
125 | head = &__get_cpu_var(irq_work_list); | 125 | if (this_cpu_read(irq_work_list) == NULL) |
126 | if (*head == NULL) | ||
127 | return; | 126 | return; |
128 | 127 | ||
129 | BUG_ON(!in_irq()); | 128 | BUG_ON(!in_irq()); |
130 | BUG_ON(!irqs_disabled()); | 129 | BUG_ON(!irqs_disabled()); |
131 | 130 | ||
132 | list = xchg(head, NULL); | 131 | list = this_cpu_xchg(irq_work_list, NULL); |
132 | |||
133 | while (list != NULL) { | 133 | while (list != NULL) { |
134 | struct irq_work *entry = list; | 134 | struct irq_work *entry = list; |
135 | 135 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index b55045bc7563..ec19b92c7ebd 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -163,7 +163,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, | |||
163 | * just verifies it is an address we can use. | 163 | * just verifies it is an address we can use. |
164 | * | 164 | * |
165 | * Since the kernel does everything in page size chunks ensure | 165 | * Since the kernel does everything in page size chunks ensure |
166 | * the destination addreses are page aligned. Too many | 166 | * the destination addresses are page aligned. Too many |
167 | * special cases crop of when we don't do this. The most | 167 | * special cases crop of when we don't do this. The most |
168 | * insidious is getting overlapping destination addresses | 168 | * insidious is getting overlapping destination addresses |
169 | * simply because addresses are changed to page size | 169 | * simply because addresses are changed to page size |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7663e5df0e6f..77981813a1e7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -317,12 +317,12 @@ void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty) | |||
317 | /* We have preemption disabled.. so it is safe to use __ versions */ | 317 | /* We have preemption disabled.. so it is safe to use __ versions */ |
318 | static inline void set_kprobe_instance(struct kprobe *kp) | 318 | static inline void set_kprobe_instance(struct kprobe *kp) |
319 | { | 319 | { |
320 | __get_cpu_var(kprobe_instance) = kp; | 320 | __this_cpu_write(kprobe_instance, kp); |
321 | } | 321 | } |
322 | 322 | ||
323 | static inline void reset_kprobe_instance(void) | 323 | static inline void reset_kprobe_instance(void) |
324 | { | 324 | { |
325 | __get_cpu_var(kprobe_instance) = NULL; | 325 | __this_cpu_write(kprobe_instance, NULL); |
326 | } | 326 | } |
327 | 327 | ||
328 | /* | 328 | /* |
@@ -965,7 +965,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
965 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | 965 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, |
966 | int trapnr) | 966 | int trapnr) |
967 | { | 967 | { |
968 | struct kprobe *cur = __get_cpu_var(kprobe_instance); | 968 | struct kprobe *cur = __this_cpu_read(kprobe_instance); |
969 | 969 | ||
970 | /* | 970 | /* |
971 | * if we faulted "during" the execution of a user specified | 971 | * if we faulted "during" the execution of a user specified |
@@ -980,7 +980,7 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | |||
980 | 980 | ||
981 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | 981 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) |
982 | { | 982 | { |
983 | struct kprobe *cur = __get_cpu_var(kprobe_instance); | 983 | struct kprobe *cur = __this_cpu_read(kprobe_instance); |
984 | int ret = 0; | 984 | int ret = 0; |
985 | 985 | ||
986 | if (cur && cur->break_handler) { | 986 | if (cur && cur->break_handler) { |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 5355cfd44a3f..c55afba990a3 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
148 | wait_for_completion(&create.done); | 148 | wait_for_completion(&create.done); |
149 | 149 | ||
150 | if (!IS_ERR(create.result)) { | 150 | if (!IS_ERR(create.result)) { |
151 | static struct sched_param param = { .sched_priority = 0 }; | 151 | static const struct sched_param param = { .sched_priority = 0 }; |
152 | va_list args; | 152 | va_list args; |
153 | 153 | ||
154 | va_start(args, namefmt); | 154 | va_start(args, namefmt); |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 17110a4a4fc2..ee74b35e528d 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -241,24 +241,19 @@ static int lstats_show(struct seq_file *m, void *v) | |||
241 | seq_puts(m, "Latency Top version : v0.1\n"); | 241 | seq_puts(m, "Latency Top version : v0.1\n"); |
242 | 242 | ||
243 | for (i = 0; i < MAXLR; i++) { | 243 | for (i = 0; i < MAXLR; i++) { |
244 | if (latency_record[i].backtrace[0]) { | 244 | struct latency_record *lr = &latency_record[i]; |
245 | |||
246 | if (lr->backtrace[0]) { | ||
245 | int q; | 247 | int q; |
246 | seq_printf(m, "%i %lu %lu ", | 248 | seq_printf(m, "%i %lu %lu", |
247 | latency_record[i].count, | 249 | lr->count, lr->time, lr->max); |
248 | latency_record[i].time, | ||
249 | latency_record[i].max); | ||
250 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { | 250 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { |
251 | char sym[KSYM_SYMBOL_LEN]; | 251 | unsigned long bt = lr->backtrace[q]; |
252 | char *c; | 252 | if (!bt) |
253 | if (!latency_record[i].backtrace[q]) | ||
254 | break; | 253 | break; |
255 | if (latency_record[i].backtrace[q] == ULONG_MAX) | 254 | if (bt == ULONG_MAX) |
256 | break; | 255 | break; |
257 | sprint_symbol(sym, latency_record[i].backtrace[q]); | 256 | seq_printf(m, " %ps", (void *)bt); |
258 | c = strchr(sym, '+'); | ||
259 | if (c) | ||
260 | *c = 0; | ||
261 | seq_printf(m, "%s ", sym); | ||
262 | } | 257 | } |
263 | seq_printf(m, "\n"); | 258 | seq_printf(m, "\n"); |
264 | } | 259 | } |
diff --git a/kernel/panic.c b/kernel/panic.c index 4c13b1a88ebb..991bb87a1704 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -34,6 +34,7 @@ static int pause_on_oops_flag; | |||
34 | static DEFINE_SPINLOCK(pause_on_oops_lock); | 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
35 | 35 | ||
36 | int panic_timeout; | 36 | int panic_timeout; |
37 | EXPORT_SYMBOL_GPL(panic_timeout); | ||
37 | 38 | ||
38 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); | 39 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); |
39 | 40 | ||
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 11847bf1e8cc..05ebe841270b 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -38,6 +38,12 @@ | |||
38 | 38 | ||
39 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
40 | 40 | ||
41 | enum event_type_t { | ||
42 | EVENT_FLEXIBLE = 0x1, | ||
43 | EVENT_PINNED = 0x2, | ||
44 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
45 | }; | ||
46 | |||
41 | atomic_t perf_task_events __read_mostly; | 47 | atomic_t perf_task_events __read_mostly; |
42 | static atomic_t nr_mmap_events __read_mostly; | 48 | static atomic_t nr_mmap_events __read_mostly; |
43 | static atomic_t nr_comm_events __read_mostly; | 49 | static atomic_t nr_comm_events __read_mostly; |
@@ -65,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
65 | 71 | ||
66 | static atomic64_t perf_event_id; | 72 | static atomic64_t perf_event_id; |
67 | 73 | ||
74 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
75 | enum event_type_t event_type); | ||
76 | |||
77 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
78 | enum event_type_t event_type); | ||
79 | |||
68 | void __weak perf_event_print_debug(void) { } | 80 | void __weak perf_event_print_debug(void) { } |
69 | 81 | ||
70 | extern __weak const char *perf_pmu_name(void) | 82 | extern __weak const char *perf_pmu_name(void) |
@@ -72,6 +84,11 @@ extern __weak const char *perf_pmu_name(void) | |||
72 | return "pmu"; | 84 | return "pmu"; |
73 | } | 85 | } |
74 | 86 | ||
87 | static inline u64 perf_clock(void) | ||
88 | { | ||
89 | return local_clock(); | ||
90 | } | ||
91 | |||
75 | void perf_pmu_disable(struct pmu *pmu) | 92 | void perf_pmu_disable(struct pmu *pmu) |
76 | { | 93 | { |
77 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 94 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
@@ -240,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
240 | put_ctx(ctx); | 257 | put_ctx(ctx); |
241 | } | 258 | } |
242 | 259 | ||
243 | static inline u64 perf_clock(void) | ||
244 | { | ||
245 | return local_clock(); | ||
246 | } | ||
247 | |||
248 | /* | 260 | /* |
249 | * Update the record of the current time in a context. | 261 | * Update the record of the current time in a context. |
250 | */ | 262 | */ |
@@ -256,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx) | |||
256 | ctx->timestamp = now; | 268 | ctx->timestamp = now; |
257 | } | 269 | } |
258 | 270 | ||
271 | static u64 perf_event_time(struct perf_event *event) | ||
272 | { | ||
273 | struct perf_event_context *ctx = event->ctx; | ||
274 | return ctx ? ctx->time : 0; | ||
275 | } | ||
276 | |||
259 | /* | 277 | /* |
260 | * Update the total_time_enabled and total_time_running fields for a event. | 278 | * Update the total_time_enabled and total_time_running fields for a event. |
261 | */ | 279 | */ |
@@ -269,7 +287,7 @@ static void update_event_times(struct perf_event *event) | |||
269 | return; | 287 | return; |
270 | 288 | ||
271 | if (ctx->is_active) | 289 | if (ctx->is_active) |
272 | run_end = ctx->time; | 290 | run_end = perf_event_time(event); |
273 | else | 291 | else |
274 | run_end = event->tstamp_stopped; | 292 | run_end = event->tstamp_stopped; |
275 | 293 | ||
@@ -278,7 +296,7 @@ static void update_event_times(struct perf_event *event) | |||
278 | if (event->state == PERF_EVENT_STATE_INACTIVE) | 296 | if (event->state == PERF_EVENT_STATE_INACTIVE) |
279 | run_end = event->tstamp_stopped; | 297 | run_end = event->tstamp_stopped; |
280 | else | 298 | else |
281 | run_end = ctx->time; | 299 | run_end = perf_event_time(event); |
282 | 300 | ||
283 | event->total_time_running = run_end - event->tstamp_running; | 301 | event->total_time_running = run_end - event->tstamp_running; |
284 | } | 302 | } |
@@ -534,6 +552,7 @@ event_sched_out(struct perf_event *event, | |||
534 | struct perf_cpu_context *cpuctx, | 552 | struct perf_cpu_context *cpuctx, |
535 | struct perf_event_context *ctx) | 553 | struct perf_event_context *ctx) |
536 | { | 554 | { |
555 | u64 tstamp = perf_event_time(event); | ||
537 | u64 delta; | 556 | u64 delta; |
538 | /* | 557 | /* |
539 | * An event which could not be activated because of | 558 | * An event which could not be activated because of |
@@ -545,7 +564,7 @@ event_sched_out(struct perf_event *event, | |||
545 | && !event_filter_match(event)) { | 564 | && !event_filter_match(event)) { |
546 | delta = ctx->time - event->tstamp_stopped; | 565 | delta = ctx->time - event->tstamp_stopped; |
547 | event->tstamp_running += delta; | 566 | event->tstamp_running += delta; |
548 | event->tstamp_stopped = ctx->time; | 567 | event->tstamp_stopped = tstamp; |
549 | } | 568 | } |
550 | 569 | ||
551 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 570 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
@@ -556,7 +575,7 @@ event_sched_out(struct perf_event *event, | |||
556 | event->pending_disable = 0; | 575 | event->pending_disable = 0; |
557 | event->state = PERF_EVENT_STATE_OFF; | 576 | event->state = PERF_EVENT_STATE_OFF; |
558 | } | 577 | } |
559 | event->tstamp_stopped = ctx->time; | 578 | event->tstamp_stopped = tstamp; |
560 | event->pmu->del(event, 0); | 579 | event->pmu->del(event, 0); |
561 | event->oncpu = -1; | 580 | event->oncpu = -1; |
562 | 581 | ||
@@ -768,6 +787,8 @@ event_sched_in(struct perf_event *event, | |||
768 | struct perf_cpu_context *cpuctx, | 787 | struct perf_cpu_context *cpuctx, |
769 | struct perf_event_context *ctx) | 788 | struct perf_event_context *ctx) |
770 | { | 789 | { |
790 | u64 tstamp = perf_event_time(event); | ||
791 | |||
771 | if (event->state <= PERF_EVENT_STATE_OFF) | 792 | if (event->state <= PERF_EVENT_STATE_OFF) |
772 | return 0; | 793 | return 0; |
773 | 794 | ||
@@ -784,9 +805,9 @@ event_sched_in(struct perf_event *event, | |||
784 | return -EAGAIN; | 805 | return -EAGAIN; |
785 | } | 806 | } |
786 | 807 | ||
787 | event->tstamp_running += ctx->time - event->tstamp_stopped; | 808 | event->tstamp_running += tstamp - event->tstamp_stopped; |
788 | 809 | ||
789 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | 810 | event->shadow_ctx_time = tstamp - ctx->timestamp; |
790 | 811 | ||
791 | if (!is_software_event(event)) | 812 | if (!is_software_event(event)) |
792 | cpuctx->active_oncpu++; | 813 | cpuctx->active_oncpu++; |
@@ -898,11 +919,13 @@ static int group_can_go_on(struct perf_event *event, | |||
898 | static void add_event_to_ctx(struct perf_event *event, | 919 | static void add_event_to_ctx(struct perf_event *event, |
899 | struct perf_event_context *ctx) | 920 | struct perf_event_context *ctx) |
900 | { | 921 | { |
922 | u64 tstamp = perf_event_time(event); | ||
923 | |||
901 | list_add_event(event, ctx); | 924 | list_add_event(event, ctx); |
902 | perf_group_attach(event); | 925 | perf_group_attach(event); |
903 | event->tstamp_enabled = ctx->time; | 926 | event->tstamp_enabled = tstamp; |
904 | event->tstamp_running = ctx->time; | 927 | event->tstamp_running = tstamp; |
905 | event->tstamp_stopped = ctx->time; | 928 | event->tstamp_stopped = tstamp; |
906 | } | 929 | } |
907 | 930 | ||
908 | /* | 931 | /* |
@@ -937,7 +960,7 @@ static void __perf_install_in_context(void *info) | |||
937 | 960 | ||
938 | add_event_to_ctx(event, ctx); | 961 | add_event_to_ctx(event, ctx); |
939 | 962 | ||
940 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 963 | if (!event_filter_match(event)) |
941 | goto unlock; | 964 | goto unlock; |
942 | 965 | ||
943 | /* | 966 | /* |
@@ -1042,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
1042 | struct perf_event_context *ctx) | 1065 | struct perf_event_context *ctx) |
1043 | { | 1066 | { |
1044 | struct perf_event *sub; | 1067 | struct perf_event *sub; |
1068 | u64 tstamp = perf_event_time(event); | ||
1045 | 1069 | ||
1046 | event->state = PERF_EVENT_STATE_INACTIVE; | 1070 | event->state = PERF_EVENT_STATE_INACTIVE; |
1047 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 1071 | event->tstamp_enabled = tstamp - event->total_time_enabled; |
1048 | list_for_each_entry(sub, &event->sibling_list, group_entry) { | 1072 | list_for_each_entry(sub, &event->sibling_list, group_entry) { |
1049 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { | 1073 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) |
1050 | sub->tstamp_enabled = | 1074 | sub->tstamp_enabled = tstamp - sub->total_time_enabled; |
1051 | ctx->time - sub->total_time_enabled; | ||
1052 | } | ||
1053 | } | 1075 | } |
1054 | } | 1076 | } |
1055 | 1077 | ||
@@ -1082,7 +1104,7 @@ static void __perf_event_enable(void *info) | |||
1082 | goto unlock; | 1104 | goto unlock; |
1083 | __perf_event_mark_enabled(event, ctx); | 1105 | __perf_event_mark_enabled(event, ctx); |
1084 | 1106 | ||
1085 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1107 | if (!event_filter_match(event)) |
1086 | goto unlock; | 1108 | goto unlock; |
1087 | 1109 | ||
1088 | /* | 1110 | /* |
@@ -1193,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1193 | return 0; | 1215 | return 0; |
1194 | } | 1216 | } |
1195 | 1217 | ||
1196 | enum event_type_t { | ||
1197 | EVENT_FLEXIBLE = 0x1, | ||
1198 | EVENT_PINNED = 0x2, | ||
1199 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1200 | }; | ||
1201 | |||
1202 | static void ctx_sched_out(struct perf_event_context *ctx, | 1218 | static void ctx_sched_out(struct perf_event_context *ctx, |
1203 | struct perf_cpu_context *cpuctx, | 1219 | struct perf_cpu_context *cpuctx, |
1204 | enum event_type_t event_type) | 1220 | enum event_type_t event_type) |
@@ -1435,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, | |||
1435 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 1451 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1436 | if (event->state <= PERF_EVENT_STATE_OFF) | 1452 | if (event->state <= PERF_EVENT_STATE_OFF) |
1437 | continue; | 1453 | continue; |
1438 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1454 | if (!event_filter_match(event)) |
1439 | continue; | 1455 | continue; |
1440 | 1456 | ||
1441 | if (group_can_go_on(event, cpuctx, 1)) | 1457 | if (group_can_go_on(event, cpuctx, 1)) |
@@ -1467,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1467 | * Listen to the 'cpu' scheduling filter constraint | 1483 | * Listen to the 'cpu' scheduling filter constraint |
1468 | * of events: | 1484 | * of events: |
1469 | */ | 1485 | */ |
1470 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1486 | if (!event_filter_match(event)) |
1471 | continue; | 1487 | continue; |
1472 | 1488 | ||
1473 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | 1489 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
@@ -1694,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
1694 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1710 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1695 | continue; | 1711 | continue; |
1696 | 1712 | ||
1697 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1713 | if (!event_filter_match(event)) |
1698 | continue; | 1714 | continue; |
1699 | 1715 | ||
1700 | hwc = &event->hw; | 1716 | hwc = &event->hw; |
@@ -3893,7 +3909,7 @@ static int perf_event_task_match(struct perf_event *event) | |||
3893 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 3909 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
3894 | return 0; | 3910 | return 0; |
3895 | 3911 | ||
3896 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3912 | if (!event_filter_match(event)) |
3897 | return 0; | 3913 | return 0; |
3898 | 3914 | ||
3899 | if (event->attr.comm || event->attr.mmap || | 3915 | if (event->attr.comm || event->attr.mmap || |
@@ -4030,7 +4046,7 @@ static int perf_event_comm_match(struct perf_event *event) | |||
4030 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4046 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
4031 | return 0; | 4047 | return 0; |
4032 | 4048 | ||
4033 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4049 | if (!event_filter_match(event)) |
4034 | return 0; | 4050 | return 0; |
4035 | 4051 | ||
4036 | if (event->attr.comm) | 4052 | if (event->attr.comm) |
@@ -4178,7 +4194,7 @@ static int perf_event_mmap_match(struct perf_event *event, | |||
4178 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4194 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
4179 | return 0; | 4195 | return 0; |
4180 | 4196 | ||
4181 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4197 | if (!event_filter_match(event)) |
4182 | return 0; | 4198 | return 0; |
4183 | 4199 | ||
4184 | if ((!executable && event->attr.mmap_data) || | 4200 | if ((!executable && event->attr.mmap_data) || |
@@ -4648,7 +4664,7 @@ int perf_swevent_get_recursion_context(void) | |||
4648 | } | 4664 | } |
4649 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4665 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
4650 | 4666 | ||
4651 | void inline perf_swevent_put_recursion_context(int rctx) | 4667 | inline void perf_swevent_put_recursion_context(int rctx) |
4652 | { | 4668 | { |
4653 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 4669 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4654 | 4670 | ||
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index a5aff3ebad38..265729966ece 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG | |||
100 | depends on PM_ADVANCED_DEBUG | 100 | depends on PM_ADVANCED_DEBUG |
101 | default n | 101 | default n |
102 | 102 | ||
103 | config SUSPEND_NVS | ||
104 | bool | ||
105 | |||
106 | config SUSPEND | 103 | config SUSPEND |
107 | bool "Suspend to RAM and standby" | 104 | bool "Suspend to RAM and standby" |
108 | depends on PM && ARCH_SUSPEND_POSSIBLE | 105 | depends on PM && ARCH_SUSPEND_POSSIBLE |
109 | select SUSPEND_NVS if HAS_IOMEM | ||
110 | default y | 106 | default y |
111 | ---help--- | 107 | ---help--- |
112 | Allow the system to enter sleep states in which main memory is | 108 | Allow the system to enter sleep states in which main memory is |
@@ -140,7 +136,6 @@ config HIBERNATION | |||
140 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE | 136 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE |
141 | select LZO_COMPRESS | 137 | select LZO_COMPRESS |
142 | select LZO_DECOMPRESS | 138 | select LZO_DECOMPRESS |
143 | select SUSPEND_NVS if HAS_IOMEM | ||
144 | ---help--- | 139 | ---help--- |
145 | Enable the suspend to disk (STD) functionality, which is usually | 140 | Enable the suspend to disk (STD) functionality, which is usually |
146 | called "hibernation" in user interfaces. STD checkpoints the | 141 | called "hibernation" in user interfaces. STD checkpoints the |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index f9063c6b185d..c350e18b53e3 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -1,7 +1,4 @@ | |||
1 | 1 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | |
2 | ifeq ($(CONFIG_PM_DEBUG),y) | ||
3 | EXTRA_CFLAGS += -DDEBUG | ||
4 | endif | ||
5 | 2 | ||
6 | obj-$(CONFIG_PM) += main.o | 3 | obj-$(CONFIG_PM) += main.o |
7 | obj-$(CONFIG_PM_SLEEP) += console.o | 4 | obj-$(CONFIG_PM_SLEEP) += console.o |
@@ -10,6 +7,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o | |||
10 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o | 7 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o |
11 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ | 8 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ |
12 | block_io.o | 9 | block_io.o |
13 | obj-$(CONFIG_SUSPEND_NVS) += nvs.o | ||
14 | 10 | ||
15 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | 11 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 048d0b514831..1832bd264219 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -51,18 +51,18 @@ enum { | |||
51 | 51 | ||
52 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | 52 | static int hibernation_mode = HIBERNATION_SHUTDOWN; |
53 | 53 | ||
54 | static struct platform_hibernation_ops *hibernation_ops; | 54 | static const struct platform_hibernation_ops *hibernation_ops; |
55 | 55 | ||
56 | /** | 56 | /** |
57 | * hibernation_set_ops - set the global hibernate operations | 57 | * hibernation_set_ops - set the global hibernate operations |
58 | * @ops: the hibernation operations to use in subsequent hibernation transitions | 58 | * @ops: the hibernation operations to use in subsequent hibernation transitions |
59 | */ | 59 | */ |
60 | 60 | ||
61 | void hibernation_set_ops(struct platform_hibernation_ops *ops) | 61 | void hibernation_set_ops(const struct platform_hibernation_ops *ops) |
62 | { | 62 | { |
63 | if (ops && !(ops->begin && ops->end && ops->pre_snapshot | 63 | if (ops && !(ops->begin && ops->end && ops->pre_snapshot |
64 | && ops->prepare && ops->finish && ops->enter && ops->pre_restore | 64 | && ops->prepare && ops->finish && ops->enter && ops->pre_restore |
65 | && ops->restore_cleanup)) { | 65 | && ops->restore_cleanup && ops->leave)) { |
66 | WARN_ON(1); | 66 | WARN_ON(1); |
67 | return; | 67 | return; |
68 | } | 68 | } |
@@ -278,7 +278,7 @@ static int create_image(int platform_mode) | |||
278 | goto Enable_irqs; | 278 | goto Enable_irqs; |
279 | } | 279 | } |
280 | 280 | ||
281 | if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events()) | 281 | if (hibernation_test(TEST_CORE) || pm_wakeup_pending()) |
282 | goto Power_up; | 282 | goto Power_up; |
283 | 283 | ||
284 | in_suspend = 1; | 284 | in_suspend = 1; |
@@ -516,7 +516,7 @@ int hibernation_platform_enter(void) | |||
516 | 516 | ||
517 | local_irq_disable(); | 517 | local_irq_disable(); |
518 | sysdev_suspend(PMSG_HIBERNATE); | 518 | sysdev_suspend(PMSG_HIBERNATE); |
519 | if (!pm_check_wakeup_events()) { | 519 | if (pm_wakeup_pending()) { |
520 | error = -EAGAIN; | 520 | error = -EAGAIN; |
521 | goto Power_up; | 521 | goto Power_up; |
522 | } | 522 | } |
@@ -647,6 +647,7 @@ int hibernate(void) | |||
647 | swsusp_free(); | 647 | swsusp_free(); |
648 | if (!error) | 648 | if (!error) |
649 | power_down(); | 649 | power_down(); |
650 | in_suspend = 0; | ||
650 | pm_restore_gfp_mask(); | 651 | pm_restore_gfp_mask(); |
651 | } else { | 652 | } else { |
652 | pr_debug("PM: Image restored successfully.\n"); | 653 | pr_debug("PM: Image restored successfully.\n"); |
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c deleted file mode 100644 index 1836db60bbb6..000000000000 --- a/kernel/power/nvs.c +++ /dev/null | |||
@@ -1,136 +0,0 @@ | |||
1 | /* | ||
2 | * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory | ||
3 | * | ||
4 | * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. | ||
5 | * | ||
6 | * This file is released under the GPLv2. | ||
7 | */ | ||
8 | |||
9 | #include <linux/io.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/suspend.h> | ||
15 | |||
16 | /* | ||
17 | * Platforms, like ACPI, may want us to save some memory used by them during | ||
18 | * suspend and to restore the contents of this memory during the subsequent | ||
19 | * resume. The code below implements a mechanism allowing us to do that. | ||
20 | */ | ||
21 | |||
22 | struct nvs_page { | ||
23 | unsigned long phys_start; | ||
24 | unsigned int size; | ||
25 | void *kaddr; | ||
26 | void *data; | ||
27 | struct list_head node; | ||
28 | }; | ||
29 | |||
30 | static LIST_HEAD(nvs_list); | ||
31 | |||
32 | /** | ||
33 | * suspend_nvs_register - register platform NVS memory region to save | ||
34 | * @start - physical address of the region | ||
35 | * @size - size of the region | ||
36 | * | ||
37 | * The NVS region need not be page-aligned (both ends) and we arrange | ||
38 | * things so that the data from page-aligned addresses in this region will | ||
39 | * be copied into separate RAM pages. | ||
40 | */ | ||
41 | int suspend_nvs_register(unsigned long start, unsigned long size) | ||
42 | { | ||
43 | struct nvs_page *entry, *next; | ||
44 | |||
45 | while (size > 0) { | ||
46 | unsigned int nr_bytes; | ||
47 | |||
48 | entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); | ||
49 | if (!entry) | ||
50 | goto Error; | ||
51 | |||
52 | list_add_tail(&entry->node, &nvs_list); | ||
53 | entry->phys_start = start; | ||
54 | nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); | ||
55 | entry->size = (size < nr_bytes) ? size : nr_bytes; | ||
56 | |||
57 | start += entry->size; | ||
58 | size -= entry->size; | ||
59 | } | ||
60 | return 0; | ||
61 | |||
62 | Error: | ||
63 | list_for_each_entry_safe(entry, next, &nvs_list, node) { | ||
64 | list_del(&entry->node); | ||
65 | kfree(entry); | ||
66 | } | ||
67 | return -ENOMEM; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * suspend_nvs_free - free data pages allocated for saving NVS regions | ||
72 | */ | ||
73 | void suspend_nvs_free(void) | ||
74 | { | ||
75 | struct nvs_page *entry; | ||
76 | |||
77 | list_for_each_entry(entry, &nvs_list, node) | ||
78 | if (entry->data) { | ||
79 | free_page((unsigned long)entry->data); | ||
80 | entry->data = NULL; | ||
81 | if (entry->kaddr) { | ||
82 | iounmap(entry->kaddr); | ||
83 | entry->kaddr = NULL; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * suspend_nvs_alloc - allocate memory necessary for saving NVS regions | ||
90 | */ | ||
91 | int suspend_nvs_alloc(void) | ||
92 | { | ||
93 | struct nvs_page *entry; | ||
94 | |||
95 | list_for_each_entry(entry, &nvs_list, node) { | ||
96 | entry->data = (void *)__get_free_page(GFP_KERNEL); | ||
97 | if (!entry->data) { | ||
98 | suspend_nvs_free(); | ||
99 | return -ENOMEM; | ||
100 | } | ||
101 | } | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * suspend_nvs_save - save NVS memory regions | ||
107 | */ | ||
108 | void suspend_nvs_save(void) | ||
109 | { | ||
110 | struct nvs_page *entry; | ||
111 | |||
112 | printk(KERN_INFO "PM: Saving platform NVS memory\n"); | ||
113 | |||
114 | list_for_each_entry(entry, &nvs_list, node) | ||
115 | if (entry->data) { | ||
116 | entry->kaddr = ioremap(entry->phys_start, entry->size); | ||
117 | memcpy(entry->data, entry->kaddr, entry->size); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * suspend_nvs_restore - restore NVS memory regions | ||
123 | * | ||
124 | * This function is going to be called with interrupts disabled, so it | ||
125 | * cannot iounmap the virtual addresses used to access the NVS region. | ||
126 | */ | ||
127 | void suspend_nvs_restore(void) | ||
128 | { | ||
129 | struct nvs_page *entry; | ||
130 | |||
131 | printk(KERN_INFO "PM: Restoring platform NVS memory\n"); | ||
132 | |||
133 | list_for_each_entry(entry, &nvs_list, node) | ||
134 | if (entry->data) | ||
135 | memcpy(entry->kaddr, entry->data, entry->size); | ||
136 | } | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index e50b4c1b2a0f..d6d2a10320e0 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -64,6 +64,12 @@ static int try_to_freeze_tasks(bool sig_only) | |||
64 | * perturb a task in TASK_STOPPED or TASK_TRACED. | 64 | * perturb a task in TASK_STOPPED or TASK_TRACED. |
65 | * It is "frozen enough". If the task does wake | 65 | * It is "frozen enough". If the task does wake |
66 | * up, it will immediately call try_to_freeze. | 66 | * up, it will immediately call try_to_freeze. |
67 | * | ||
68 | * Because freeze_task() goes through p's | ||
69 | * scheduler lock after setting TIF_FREEZE, it's | ||
70 | * guaranteed that either we see TASK_RUNNING or | ||
71 | * try_to_stop() after schedule() in ptrace/signal | ||
72 | * stop sees TIF_FREEZE. | ||
67 | */ | 73 | */ |
68 | if (!task_is_stopped_or_traced(p) && | 74 | if (!task_is_stopped_or_traced(p) && |
69 | !freezer_should_skip(p)) | 75 | !freezer_should_skip(p)) |
@@ -79,7 +85,7 @@ static int try_to_freeze_tasks(bool sig_only) | |||
79 | if (!todo || time_after(jiffies, end_time)) | 85 | if (!todo || time_after(jiffies, end_time)) |
80 | break; | 86 | break; |
81 | 87 | ||
82 | if (!pm_check_wakeup_events()) { | 88 | if (pm_wakeup_pending()) { |
83 | wakeup = true; | 89 | wakeup = true; |
84 | break; | 90 | break; |
85 | } | 91 | } |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 031d5e3a6197..de6f86bfa303 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -31,13 +31,13 @@ const char *const pm_states[PM_SUSPEND_MAX] = { | |||
31 | [PM_SUSPEND_MEM] = "mem", | 31 | [PM_SUSPEND_MEM] = "mem", |
32 | }; | 32 | }; |
33 | 33 | ||
34 | static struct platform_suspend_ops *suspend_ops; | 34 | static const struct platform_suspend_ops *suspend_ops; |
35 | 35 | ||
36 | /** | 36 | /** |
37 | * suspend_set_ops - Set the global suspend method table. | 37 | * suspend_set_ops - Set the global suspend method table. |
38 | * @ops: Pointer to ops structure. | 38 | * @ops: Pointer to ops structure. |
39 | */ | 39 | */ |
40 | void suspend_set_ops(struct platform_suspend_ops *ops) | 40 | void suspend_set_ops(const struct platform_suspend_ops *ops) |
41 | { | 41 | { |
42 | mutex_lock(&pm_mutex); | 42 | mutex_lock(&pm_mutex); |
43 | suspend_ops = ops; | 43 | suspend_ops = ops; |
@@ -164,7 +164,7 @@ static int suspend_enter(suspend_state_t state) | |||
164 | 164 | ||
165 | error = sysdev_suspend(PMSG_SUSPEND); | 165 | error = sysdev_suspend(PMSG_SUSPEND); |
166 | if (!error) { | 166 | if (!error) { |
167 | if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) { | 167 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { |
168 | error = suspend_ops->enter(state); | 168 | error = suspend_ops->enter(state); |
169 | events_check_enabled = false; | 169 | events_check_enabled = false; |
170 | } | 170 | } |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8c7e4832b9be..7c97c3a0eee3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void) | |||
224 | return res; | 224 | return res; |
225 | 225 | ||
226 | root_swap = res; | 226 | root_swap = res; |
227 | res = blkdev_get(hib_resume_bdev, FMODE_WRITE); | 227 | res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); |
228 | if (res) | 228 | if (res) |
229 | return res; | 229 | return res; |
230 | 230 | ||
@@ -888,7 +888,7 @@ out_finish: | |||
888 | /** | 888 | /** |
889 | * swsusp_read - read the hibernation image. | 889 | * swsusp_read - read the hibernation image. |
890 | * @flags_p: flags passed by the "frozen" kernel in the image header should | 890 | * @flags_p: flags passed by the "frozen" kernel in the image header should |
891 | * be written into this memeory location | 891 | * be written into this memory location |
892 | */ | 892 | */ |
893 | 893 | ||
894 | int swsusp_read(unsigned int *flags_p) | 894 | int swsusp_read(unsigned int *flags_p) |
@@ -930,7 +930,8 @@ int swsusp_check(void) | |||
930 | { | 930 | { |
931 | int error; | 931 | int error; |
932 | 932 | ||
933 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 933 | hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, |
934 | FMODE_READ, NULL); | ||
934 | if (!IS_ERR(hib_resume_bdev)) { | 935 | if (!IS_ERR(hib_resume_bdev)) { |
935 | set_blocksize(hib_resume_bdev, PAGE_SIZE); | 936 | set_blocksize(hib_resume_bdev, PAGE_SIZE); |
936 | clear_page(swsusp_header); | 937 | clear_page(swsusp_header); |
diff --git a/kernel/printk.c b/kernel/printk.c index ab3ffc5b3b64..53d9a9ec88e6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -39,16 +39,11 @@ | |||
39 | #include <linux/syslog.h> | 39 | #include <linux/syslog.h> |
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/rculist.h> | ||
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | 45 | ||
45 | /* | 46 | /* |
46 | * for_each_console() allows you to iterate on each console | ||
47 | */ | ||
48 | #define for_each_console(con) \ | ||
49 | for (con = console_drivers; con != NULL; con = con->next) | ||
50 | |||
51 | /* | ||
52 | * Architectures can override it: | 47 | * Architectures can override it: |
53 | */ | 48 | */ |
54 | void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) | 49 | void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) |
@@ -279,12 +274,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
279 | * at open time. | 274 | * at open time. |
280 | */ | 275 | */ |
281 | if (type == SYSLOG_ACTION_OPEN || !from_file) { | 276 | if (type == SYSLOG_ACTION_OPEN || !from_file) { |
282 | if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) | 277 | if (dmesg_restrict && !capable(CAP_SYSLOG)) |
283 | return -EPERM; | 278 | goto warn; /* switch to return -EPERM after 2.6.39 */ |
284 | if ((type != SYSLOG_ACTION_READ_ALL && | 279 | if ((type != SYSLOG_ACTION_READ_ALL && |
285 | type != SYSLOG_ACTION_SIZE_BUFFER) && | 280 | type != SYSLOG_ACTION_SIZE_BUFFER) && |
286 | !capable(CAP_SYS_ADMIN)) | 281 | !capable(CAP_SYSLOG)) |
287 | return -EPERM; | 282 | goto warn; /* switch to return -EPERM after 2.6.39 */ |
288 | } | 283 | } |
289 | 284 | ||
290 | error = security_syslog(type); | 285 | error = security_syslog(type); |
@@ -428,6 +423,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
428 | } | 423 | } |
429 | out: | 424 | out: |
430 | return error; | 425 | return error; |
426 | warn: | ||
427 | /* remove after 2.6.39 */ | ||
428 | if (capable(CAP_SYS_ADMIN)) | ||
429 | WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " | ||
430 | "but no CAP_SYSLOG (deprecated and denied).\n"); | ||
431 | return -EPERM; | ||
431 | } | 432 | } |
432 | 433 | ||
433 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | 434 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) |
@@ -1359,6 +1360,7 @@ void register_console(struct console *newcon) | |||
1359 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1360 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1360 | } | 1361 | } |
1361 | release_console_sem(); | 1362 | release_console_sem(); |
1363 | console_sysfs_notify(); | ||
1362 | 1364 | ||
1363 | /* | 1365 | /* |
1364 | * By unregistering the bootconsoles after we enable the real console | 1366 | * By unregistering the bootconsoles after we enable the real console |
@@ -1417,6 +1419,7 @@ int unregister_console(struct console *console) | |||
1417 | console_drivers->flags |= CON_CONSDEV; | 1419 | console_drivers->flags |= CON_CONSDEV; |
1418 | 1420 | ||
1419 | release_console_sem(); | 1421 | release_console_sem(); |
1422 | console_sysfs_notify(); | ||
1420 | return res; | 1423 | return res; |
1421 | } | 1424 | } |
1422 | EXPORT_SYMBOL(unregister_console); | 1425 | EXPORT_SYMBOL(unregister_console); |
@@ -1500,7 +1503,7 @@ int kmsg_dump_register(struct kmsg_dumper *dumper) | |||
1500 | /* Don't allow registering multiple times */ | 1503 | /* Don't allow registering multiple times */ |
1501 | if (!dumper->registered) { | 1504 | if (!dumper->registered) { |
1502 | dumper->registered = 1; | 1505 | dumper->registered = 1; |
1503 | list_add_tail(&dumper->list, &dump_list); | 1506 | list_add_tail_rcu(&dumper->list, &dump_list); |
1504 | err = 0; | 1507 | err = 0; |
1505 | } | 1508 | } |
1506 | spin_unlock_irqrestore(&dump_list_lock, flags); | 1509 | spin_unlock_irqrestore(&dump_list_lock, flags); |
@@ -1524,29 +1527,16 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper) | |||
1524 | spin_lock_irqsave(&dump_list_lock, flags); | 1527 | spin_lock_irqsave(&dump_list_lock, flags); |
1525 | if (dumper->registered) { | 1528 | if (dumper->registered) { |
1526 | dumper->registered = 0; | 1529 | dumper->registered = 0; |
1527 | list_del(&dumper->list); | 1530 | list_del_rcu(&dumper->list); |
1528 | err = 0; | 1531 | err = 0; |
1529 | } | 1532 | } |
1530 | spin_unlock_irqrestore(&dump_list_lock, flags); | 1533 | spin_unlock_irqrestore(&dump_list_lock, flags); |
1534 | synchronize_rcu(); | ||
1531 | 1535 | ||
1532 | return err; | 1536 | return err; |
1533 | } | 1537 | } |
1534 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); | 1538 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); |
1535 | 1539 | ||
1536 | static const char * const kmsg_reasons[] = { | ||
1537 | [KMSG_DUMP_OOPS] = "oops", | ||
1538 | [KMSG_DUMP_PANIC] = "panic", | ||
1539 | [KMSG_DUMP_KEXEC] = "kexec", | ||
1540 | }; | ||
1541 | |||
1542 | static const char *kmsg_to_str(enum kmsg_dump_reason reason) | ||
1543 | { | ||
1544 | if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0) | ||
1545 | return "unknown"; | ||
1546 | |||
1547 | return kmsg_reasons[reason]; | ||
1548 | } | ||
1549 | |||
1550 | /** | 1540 | /** |
1551 | * kmsg_dump - dump kernel log to kernel message dumpers. | 1541 | * kmsg_dump - dump kernel log to kernel message dumpers. |
1552 | * @reason: the reason (oops, panic etc) for dumping | 1542 | * @reason: the reason (oops, panic etc) for dumping |
@@ -1585,13 +1575,9 @@ void kmsg_dump(enum kmsg_dump_reason reason) | |||
1585 | l2 = chars; | 1575 | l2 = chars; |
1586 | } | 1576 | } |
1587 | 1577 | ||
1588 | if (!spin_trylock_irqsave(&dump_list_lock, flags)) { | 1578 | rcu_read_lock(); |
1589 | printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n", | 1579 | list_for_each_entry_rcu(dumper, &dump_list, list) |
1590 | kmsg_to_str(reason)); | ||
1591 | return; | ||
1592 | } | ||
1593 | list_for_each_entry(dumper, &dump_list, list) | ||
1594 | dumper->dump(dumper, reason, s1, l1, s2, l2); | 1580 | dumper->dump(dumper, reason, s1, l1, s2, l2); |
1595 | spin_unlock_irqrestore(&dump_list_lock, flags); | 1581 | rcu_read_unlock(); |
1596 | } | 1582 | } |
1597 | #endif | 1583 | #endif |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 034493724749..0c343b9a46d5 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -189,7 +189,8 @@ static int rcu_kthread(void *arg) | |||
189 | unsigned long flags; | 189 | unsigned long flags; |
190 | 190 | ||
191 | for (;;) { | 191 | for (;;) { |
192 | wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); | 192 | wait_event_interruptible(rcu_kthread_wq, |
193 | have_rcu_kthread_work != 0); | ||
193 | morework = rcu_boost(); | 194 | morework = rcu_boost(); |
194 | local_irq_save(flags); | 195 | local_irq_save(flags); |
195 | work = have_rcu_kthread_work; | 196 | work = have_rcu_kthread_work; |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d0ddfea6579d..dd4aea806f8e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -364,8 +364,8 @@ void rcu_irq_exit(void) | |||
364 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | 364 | WARN_ON_ONCE(rdtp->dynticks & 0x1); |
365 | 365 | ||
366 | /* If the interrupt queued a callback, get out of dyntick mode. */ | 366 | /* If the interrupt queued a callback, get out of dyntick mode. */ |
367 | if (__get_cpu_var(rcu_sched_data).nxtlist || | 367 | if (__this_cpu_read(rcu_sched_data.nxtlist) || |
368 | __get_cpu_var(rcu_bh_data).nxtlist) | 368 | __this_cpu_read(rcu_bh_data.nxtlist)) |
369 | set_need_resched(); | 369 | set_need_resched(); |
370 | } | 370 | } |
371 | 371 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 04949089e760..ea3e5eff3878 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -278,14 +278,12 @@ struct task_group { | |||
278 | #endif | 278 | #endif |
279 | }; | 279 | }; |
280 | 280 | ||
281 | #define root_task_group init_task_group | ||
282 | |||
283 | /* task_group_lock serializes the addition/removal of task groups */ | 281 | /* task_group_lock serializes the addition/removal of task groups */ |
284 | static DEFINE_SPINLOCK(task_group_lock); | 282 | static DEFINE_SPINLOCK(task_group_lock); |
285 | 283 | ||
286 | #ifdef CONFIG_FAIR_GROUP_SCHED | 284 | #ifdef CONFIG_FAIR_GROUP_SCHED |
287 | 285 | ||
288 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 286 | # define ROOT_TASK_GROUP_LOAD NICE_0_LOAD |
289 | 287 | ||
290 | /* | 288 | /* |
291 | * A weight of 0 or 1 can cause arithmetics problems. | 289 | * A weight of 0 or 1 can cause arithmetics problems. |
@@ -298,13 +296,13 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
298 | #define MIN_SHARES 2 | 296 | #define MIN_SHARES 2 |
299 | #define MAX_SHARES (1UL << 18) | 297 | #define MAX_SHARES (1UL << 18) |
300 | 298 | ||
301 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 299 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; |
302 | #endif | 300 | #endif |
303 | 301 | ||
304 | /* Default task group. | 302 | /* Default task group. |
305 | * Every task in system belong to this group at bootup. | 303 | * Every task in system belong to this group at bootup. |
306 | */ | 304 | */ |
307 | struct task_group init_task_group; | 305 | struct task_group root_task_group; |
308 | 306 | ||
309 | #endif /* CONFIG_CGROUP_SCHED */ | 307 | #endif /* CONFIG_CGROUP_SCHED */ |
310 | 308 | ||
@@ -743,7 +741,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
743 | buf[cnt] = 0; | 741 | buf[cnt] = 0; |
744 | cmp = strstrip(buf); | 742 | cmp = strstrip(buf); |
745 | 743 | ||
746 | if (strncmp(buf, "NO_", 3) == 0) { | 744 | if (strncmp(cmp, "NO_", 3) == 0) { |
747 | neg = 1; | 745 | neg = 1; |
748 | cmp += 3; | 746 | cmp += 3; |
749 | } | 747 | } |
@@ -2507,7 +2505,7 @@ out: | |||
2507 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2505 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2508 | * @p: the thread to be awakened | 2506 | * @p: the thread to be awakened |
2509 | * | 2507 | * |
2510 | * Put @p on the run-queue if it's not alredy there. The caller must | 2508 | * Put @p on the run-queue if it's not already there. The caller must |
2511 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2509 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2512 | * the current task. this_rq() stays locked over invocation. | 2510 | * the current task. this_rq() stays locked over invocation. |
2513 | */ | 2511 | */ |
@@ -7848,7 +7846,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
7848 | cfs_rq->tg = tg; | 7846 | cfs_rq->tg = tg; |
7849 | 7847 | ||
7850 | tg->se[cpu] = se; | 7848 | tg->se[cpu] = se; |
7851 | /* se could be NULL for init_task_group */ | 7849 | /* se could be NULL for root_task_group */ |
7852 | if (!se) | 7850 | if (!se) |
7853 | return; | 7851 | return; |
7854 | 7852 | ||
@@ -7908,18 +7906,18 @@ void __init sched_init(void) | |||
7908 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 7906 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
7909 | 7907 | ||
7910 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7908 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7911 | init_task_group.se = (struct sched_entity **)ptr; | 7909 | root_task_group.se = (struct sched_entity **)ptr; |
7912 | ptr += nr_cpu_ids * sizeof(void **); | 7910 | ptr += nr_cpu_ids * sizeof(void **); |
7913 | 7911 | ||
7914 | init_task_group.cfs_rq = (struct cfs_rq **)ptr; | 7912 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
7915 | ptr += nr_cpu_ids * sizeof(void **); | 7913 | ptr += nr_cpu_ids * sizeof(void **); |
7916 | 7914 | ||
7917 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7915 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
7918 | #ifdef CONFIG_RT_GROUP_SCHED | 7916 | #ifdef CONFIG_RT_GROUP_SCHED |
7919 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; | 7917 | root_task_group.rt_se = (struct sched_rt_entity **)ptr; |
7920 | ptr += nr_cpu_ids * sizeof(void **); | 7918 | ptr += nr_cpu_ids * sizeof(void **); |
7921 | 7919 | ||
7922 | init_task_group.rt_rq = (struct rt_rq **)ptr; | 7920 | root_task_group.rt_rq = (struct rt_rq **)ptr; |
7923 | ptr += nr_cpu_ids * sizeof(void **); | 7921 | ptr += nr_cpu_ids * sizeof(void **); |
7924 | 7922 | ||
7925 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7923 | #endif /* CONFIG_RT_GROUP_SCHED */ |
@@ -7939,13 +7937,13 @@ void __init sched_init(void) | |||
7939 | global_rt_period(), global_rt_runtime()); | 7937 | global_rt_period(), global_rt_runtime()); |
7940 | 7938 | ||
7941 | #ifdef CONFIG_RT_GROUP_SCHED | 7939 | #ifdef CONFIG_RT_GROUP_SCHED |
7942 | init_rt_bandwidth(&init_task_group.rt_bandwidth, | 7940 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
7943 | global_rt_period(), global_rt_runtime()); | 7941 | global_rt_period(), global_rt_runtime()); |
7944 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7942 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7945 | 7943 | ||
7946 | #ifdef CONFIG_CGROUP_SCHED | 7944 | #ifdef CONFIG_CGROUP_SCHED |
7947 | list_add(&init_task_group.list, &task_groups); | 7945 | list_add(&root_task_group.list, &task_groups); |
7948 | INIT_LIST_HEAD(&init_task_group.children); | 7946 | INIT_LIST_HEAD(&root_task_group.children); |
7949 | autogroup_init(&init_task); | 7947 | autogroup_init(&init_task); |
7950 | #endif /* CONFIG_CGROUP_SCHED */ | 7948 | #endif /* CONFIG_CGROUP_SCHED */ |
7951 | 7949 | ||
@@ -7960,34 +7958,34 @@ void __init sched_init(void) | |||
7960 | init_cfs_rq(&rq->cfs, rq); | 7958 | init_cfs_rq(&rq->cfs, rq); |
7961 | init_rt_rq(&rq->rt, rq); | 7959 | init_rt_rq(&rq->rt, rq); |
7962 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7960 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7963 | init_task_group.shares = init_task_group_load; | 7961 | root_task_group.shares = root_task_group_load; |
7964 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 7962 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
7965 | /* | 7963 | /* |
7966 | * How much cpu bandwidth does init_task_group get? | 7964 | * How much cpu bandwidth does root_task_group get? |
7967 | * | 7965 | * |
7968 | * In case of task-groups formed thr' the cgroup filesystem, it | 7966 | * In case of task-groups formed thr' the cgroup filesystem, it |
7969 | * gets 100% of the cpu resources in the system. This overall | 7967 | * gets 100% of the cpu resources in the system. This overall |
7970 | * system cpu resource is divided among the tasks of | 7968 | * system cpu resource is divided among the tasks of |
7971 | * init_task_group and its child task-groups in a fair manner, | 7969 | * root_task_group and its child task-groups in a fair manner, |
7972 | * based on each entity's (task or task-group's) weight | 7970 | * based on each entity's (task or task-group's) weight |
7973 | * (se->load.weight). | 7971 | * (se->load.weight). |
7974 | * | 7972 | * |
7975 | * In other words, if init_task_group has 10 tasks of weight | 7973 | * In other words, if root_task_group has 10 tasks of weight |
7976 | * 1024) and two child groups A0 and A1 (of weight 1024 each), | 7974 | * 1024) and two child groups A0 and A1 (of weight 1024 each), |
7977 | * then A0's share of the cpu resource is: | 7975 | * then A0's share of the cpu resource is: |
7978 | * | 7976 | * |
7979 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% | 7977 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% |
7980 | * | 7978 | * |
7981 | * We achieve this by letting init_task_group's tasks sit | 7979 | * We achieve this by letting root_task_group's tasks sit |
7982 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 7980 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). |
7983 | */ | 7981 | */ |
7984 | init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL); | 7982 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); |
7985 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7983 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
7986 | 7984 | ||
7987 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | 7985 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; |
7988 | #ifdef CONFIG_RT_GROUP_SCHED | 7986 | #ifdef CONFIG_RT_GROUP_SCHED |
7989 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7987 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
7990 | init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL); | 7988 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
7991 | #endif | 7989 | #endif |
7992 | 7990 | ||
7993 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7991 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
@@ -8379,6 +8377,7 @@ static void free_sched_group(struct task_group *tg) | |||
8379 | { | 8377 | { |
8380 | free_fair_sched_group(tg); | 8378 | free_fair_sched_group(tg); |
8381 | free_rt_sched_group(tg); | 8379 | free_rt_sched_group(tg); |
8380 | autogroup_free(tg); | ||
8382 | kfree(tg); | 8381 | kfree(tg); |
8383 | } | 8382 | } |
8384 | 8383 | ||
@@ -8812,7 +8811,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
8812 | 8811 | ||
8813 | if (!cgrp->parent) { | 8812 | if (!cgrp->parent) { |
8814 | /* This is early initialization for the top cgroup */ | 8813 | /* This is early initialization for the top cgroup */ |
8815 | return &init_task_group.css; | 8814 | return &root_task_group.css; |
8816 | } | 8815 | } |
8817 | 8816 | ||
8818 | parent = cgroup_tg(cgrp->parent); | 8817 | parent = cgroup_tg(cgrp->parent); |
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index c80fedcd476b..32a723b8f84c 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
@@ -9,10 +9,10 @@ unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | |||
9 | static struct autogroup autogroup_default; | 9 | static struct autogroup autogroup_default; |
10 | static atomic_t autogroup_seq_nr; | 10 | static atomic_t autogroup_seq_nr; |
11 | 11 | ||
12 | static void autogroup_init(struct task_struct *init_task) | 12 | static void __init autogroup_init(struct task_struct *init_task) |
13 | { | 13 | { |
14 | autogroup_default.tg = &init_task_group; | 14 | autogroup_default.tg = &root_task_group; |
15 | init_task_group.autogroup = &autogroup_default; | 15 | root_task_group.autogroup = &autogroup_default; |
16 | kref_init(&autogroup_default.kref); | 16 | kref_init(&autogroup_default.kref); |
17 | init_rwsem(&autogroup_default.lock); | 17 | init_rwsem(&autogroup_default.lock); |
18 | init_task->signal->autogroup = &autogroup_default; | 18 | init_task->signal->autogroup = &autogroup_default; |
@@ -63,7 +63,7 @@ static inline struct autogroup *autogroup_create(void) | |||
63 | if (!ag) | 63 | if (!ag) |
64 | goto out_fail; | 64 | goto out_fail; |
65 | 65 | ||
66 | tg = sched_create_group(&init_task_group); | 66 | tg = sched_create_group(&root_task_group); |
67 | 67 | ||
68 | if (IS_ERR(tg)) | 68 | if (IS_ERR(tg)) |
69 | goto out_free; | 69 | goto out_free; |
diff --git a/kernel/smp.c b/kernel/smp.c index 12ed8b013e2d..4ec30e069987 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | 15 | ||
16 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | ||
16 | static struct { | 17 | static struct { |
17 | struct list_head queue; | 18 | struct list_head queue; |
18 | raw_spinlock_t lock; | 19 | raw_spinlock_t lock; |
@@ -529,3 +530,21 @@ void ipi_call_unlock_irq(void) | |||
529 | { | 530 | { |
530 | raw_spin_unlock_irq(&call_function.lock); | 531 | raw_spin_unlock_irq(&call_function.lock); |
531 | } | 532 | } |
533 | #endif /* USE_GENERIC_SMP_HELPERS */ | ||
534 | |||
535 | /* | ||
536 | * Call a function on all processors | ||
537 | */ | ||
538 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | ||
539 | { | ||
540 | int ret = 0; | ||
541 | |||
542 | preempt_disable(); | ||
543 | ret = smp_call_function(func, info, wait); | ||
544 | local_irq_disable(); | ||
545 | func(info); | ||
546 | local_irq_enable(); | ||
547 | preempt_enable(); | ||
548 | return ret; | ||
549 | } | ||
550 | EXPORT_SYMBOL(on_each_cpu); | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index d4d918a91881..68eb5efec388 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -70,7 +70,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { | |||
70 | static void wakeup_softirqd(void) | 70 | static void wakeup_softirqd(void) |
71 | { | 71 | { |
72 | /* Interrupts are disabled: no need to stop preemption */ | 72 | /* Interrupts are disabled: no need to stop preemption */ |
73 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); | 73 | struct task_struct *tsk = __this_cpu_read(ksoftirqd); |
74 | 74 | ||
75 | if (tsk && tsk->state != TASK_RUNNING) | 75 | if (tsk && tsk->state != TASK_RUNNING) |
76 | wake_up_process(tsk); | 76 | wake_up_process(tsk); |
@@ -388,8 +388,8 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
388 | 388 | ||
389 | local_irq_save(flags); | 389 | local_irq_save(flags); |
390 | t->next = NULL; | 390 | t->next = NULL; |
391 | *__get_cpu_var(tasklet_vec).tail = t; | 391 | *__this_cpu_read(tasklet_vec.tail) = t; |
392 | __get_cpu_var(tasklet_vec).tail = &(t->next); | 392 | __this_cpu_write(tasklet_vec.tail, &(t->next)); |
393 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | 393 | raise_softirq_irqoff(TASKLET_SOFTIRQ); |
394 | local_irq_restore(flags); | 394 | local_irq_restore(flags); |
395 | } | 395 | } |
@@ -402,8 +402,8 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
402 | 402 | ||
403 | local_irq_save(flags); | 403 | local_irq_save(flags); |
404 | t->next = NULL; | 404 | t->next = NULL; |
405 | *__get_cpu_var(tasklet_hi_vec).tail = t; | 405 | *__this_cpu_read(tasklet_hi_vec.tail) = t; |
406 | __get_cpu_var(tasklet_hi_vec).tail = &(t->next); | 406 | __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); |
407 | raise_softirq_irqoff(HI_SOFTIRQ); | 407 | raise_softirq_irqoff(HI_SOFTIRQ); |
408 | local_irq_restore(flags); | 408 | local_irq_restore(flags); |
409 | } | 409 | } |
@@ -414,8 +414,8 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) | |||
414 | { | 414 | { |
415 | BUG_ON(!irqs_disabled()); | 415 | BUG_ON(!irqs_disabled()); |
416 | 416 | ||
417 | t->next = __get_cpu_var(tasklet_hi_vec).head; | 417 | t->next = __this_cpu_read(tasklet_hi_vec.head); |
418 | __get_cpu_var(tasklet_hi_vec).head = t; | 418 | __this_cpu_write(tasklet_hi_vec.head, t); |
419 | __raise_softirq_irqoff(HI_SOFTIRQ); | 419 | __raise_softirq_irqoff(HI_SOFTIRQ); |
420 | } | 420 | } |
421 | 421 | ||
@@ -426,9 +426,9 @@ static void tasklet_action(struct softirq_action *a) | |||
426 | struct tasklet_struct *list; | 426 | struct tasklet_struct *list; |
427 | 427 | ||
428 | local_irq_disable(); | 428 | local_irq_disable(); |
429 | list = __get_cpu_var(tasklet_vec).head; | 429 | list = __this_cpu_read(tasklet_vec.head); |
430 | __get_cpu_var(tasklet_vec).head = NULL; | 430 | __this_cpu_write(tasklet_vec.head, NULL); |
431 | __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head; | 431 | __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head); |
432 | local_irq_enable(); | 432 | local_irq_enable(); |
433 | 433 | ||
434 | while (list) { | 434 | while (list) { |
@@ -449,8 +449,8 @@ static void tasklet_action(struct softirq_action *a) | |||
449 | 449 | ||
450 | local_irq_disable(); | 450 | local_irq_disable(); |
451 | t->next = NULL; | 451 | t->next = NULL; |
452 | *__get_cpu_var(tasklet_vec).tail = t; | 452 | *__this_cpu_read(tasklet_vec.tail) = t; |
453 | __get_cpu_var(tasklet_vec).tail = &(t->next); | 453 | __this_cpu_write(tasklet_vec.tail, &(t->next)); |
454 | __raise_softirq_irqoff(TASKLET_SOFTIRQ); | 454 | __raise_softirq_irqoff(TASKLET_SOFTIRQ); |
455 | local_irq_enable(); | 455 | local_irq_enable(); |
456 | } | 456 | } |
@@ -461,9 +461,9 @@ static void tasklet_hi_action(struct softirq_action *a) | |||
461 | struct tasklet_struct *list; | 461 | struct tasklet_struct *list; |
462 | 462 | ||
463 | local_irq_disable(); | 463 | local_irq_disable(); |
464 | list = __get_cpu_var(tasklet_hi_vec).head; | 464 | list = __this_cpu_read(tasklet_hi_vec.head); |
465 | __get_cpu_var(tasklet_hi_vec).head = NULL; | 465 | __this_cpu_write(tasklet_hi_vec.head, NULL); |
466 | __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head; | 466 | __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head); |
467 | local_irq_enable(); | 467 | local_irq_enable(); |
468 | 468 | ||
469 | while (list) { | 469 | while (list) { |
@@ -484,8 +484,8 @@ static void tasklet_hi_action(struct softirq_action *a) | |||
484 | 484 | ||
485 | local_irq_disable(); | 485 | local_irq_disable(); |
486 | t->next = NULL; | 486 | t->next = NULL; |
487 | *__get_cpu_var(tasklet_hi_vec).tail = t; | 487 | *__this_cpu_read(tasklet_hi_vec.tail) = t; |
488 | __get_cpu_var(tasklet_hi_vec).tail = &(t->next); | 488 | __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); |
489 | __raise_softirq_irqoff(HI_SOFTIRQ); | 489 | __raise_softirq_irqoff(HI_SOFTIRQ); |
490 | local_irq_enable(); | 490 | local_irq_enable(); |
491 | } | 491 | } |
@@ -802,16 +802,16 @@ static void takeover_tasklets(unsigned int cpu) | |||
802 | 802 | ||
803 | /* Find end, append list for that CPU. */ | 803 | /* Find end, append list for that CPU. */ |
804 | if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { | 804 | if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { |
805 | *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head; | 805 | *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head; |
806 | __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; | 806 | this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); |
807 | per_cpu(tasklet_vec, cpu).head = NULL; | 807 | per_cpu(tasklet_vec, cpu).head = NULL; |
808 | per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; | 808 | per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; |
809 | } | 809 | } |
810 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | 810 | raise_softirq_irqoff(TASKLET_SOFTIRQ); |
811 | 811 | ||
812 | if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { | 812 | if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { |
813 | *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; | 813 | *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head; |
814 | __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; | 814 | __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail); |
815 | per_cpu(tasklet_hi_vec, cpu).head = NULL; | 815 | per_cpu(tasklet_hi_vec, cpu).head = NULL; |
816 | per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; | 816 | per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; |
817 | } | 817 | } |
@@ -853,7 +853,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
853 | cpumask_any(cpu_online_mask)); | 853 | cpumask_any(cpu_online_mask)); |
854 | case CPU_DEAD: | 854 | case CPU_DEAD: |
855 | case CPU_DEAD_FROZEN: { | 855 | case CPU_DEAD_FROZEN: { |
856 | static struct sched_param param = { | 856 | static const struct sched_param param = { |
857 | .sched_priority = MAX_RT_PRIO-1 | 857 | .sched_priority = MAX_RT_PRIO-1 |
858 | }; | 858 | }; |
859 | 859 | ||
@@ -885,25 +885,6 @@ static __init int spawn_ksoftirqd(void) | |||
885 | } | 885 | } |
886 | early_initcall(spawn_ksoftirqd); | 886 | early_initcall(spawn_ksoftirqd); |
887 | 887 | ||
888 | #ifdef CONFIG_SMP | ||
889 | /* | ||
890 | * Call a function on all processors | ||
891 | */ | ||
892 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | ||
893 | { | ||
894 | int ret = 0; | ||
895 | |||
896 | preempt_disable(); | ||
897 | ret = smp_call_function(func, info, wait); | ||
898 | local_irq_disable(); | ||
899 | func(info); | ||
900 | local_irq_enable(); | ||
901 | preempt_enable(); | ||
902 | return ret; | ||
903 | } | ||
904 | EXPORT_SYMBOL(on_each_cpu); | ||
905 | #endif | ||
906 | |||
907 | /* | 888 | /* |
908 | * [ These __weak aliases are kept in a separate compilation unit, so that | 889 | * [ These __weak aliases are kept in a separate compilation unit, so that |
909 | * GCC does not inline them incorrectly. ] | 890 | * GCC does not inline them incorrectly. ] |
diff --git a/kernel/srcu.c b/kernel/srcu.c index 98d8c1e80edb..73ce23feaea9 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -156,6 +156,16 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx) | |||
156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
157 | 157 | ||
158 | /* | 158 | /* |
159 | * We use an adaptive strategy for synchronize_srcu() and especially for | ||
160 | * synchronize_srcu_expedited(). We spin for a fixed time period | ||
161 | * (defined below) to allow SRCU readers to exit their read-side critical | ||
162 | * sections. If there are still some readers after 10 microseconds, | ||
163 | * we repeatedly block for 1-millisecond time periods. This approach | ||
164 | * has done well in testing, so there is no need for a config parameter. | ||
165 | */ | ||
166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 | ||
167 | |||
168 | /* | ||
159 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
160 | */ | 170 | */ |
161 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | 171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) |
@@ -207,11 +217,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
207 | * will have finished executing. We initially give readers | 217 | * will have finished executing. We initially give readers |
208 | * an arbitrarily chosen 10 microseconds to get out of their | 218 | * an arbitrarily chosen 10 microseconds to get out of their |
209 | * SRCU read-side critical sections, then loop waiting 1/HZ | 219 | * SRCU read-side critical sections, then loop waiting 1/HZ |
210 | * seconds per iteration. | 220 | * seconds per iteration. The 10-microsecond value has done |
221 | * very well in testing. | ||
211 | */ | 222 | */ |
212 | 223 | ||
213 | if (srcu_readers_active_idx(sp, idx)) | 224 | if (srcu_readers_active_idx(sp, idx)) |
214 | udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); | 225 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); |
215 | while (srcu_readers_active_idx(sp, idx)) | 226 | while (srcu_readers_active_idx(sp, idx)) |
216 | schedule_timeout_interruptible(1); | 227 | schedule_timeout_interruptible(1); |
217 | 228 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 2745dcdb6c6c..31b71a276b40 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/kprobes.h> | 43 | #include <linux/kprobes.h> |
44 | #include <linux/user_namespace.h> | 44 | #include <linux/user_namespace.h> |
45 | 45 | ||
46 | #include <linux/kmsg_dump.h> | ||
47 | |||
46 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
47 | #include <asm/io.h> | 49 | #include <asm/io.h> |
48 | #include <asm/unistd.h> | 50 | #include <asm/unistd.h> |
@@ -285,6 +287,7 @@ out_unlock: | |||
285 | */ | 287 | */ |
286 | void emergency_restart(void) | 288 | void emergency_restart(void) |
287 | { | 289 | { |
290 | kmsg_dump(KMSG_DUMP_EMERG); | ||
288 | machine_emergency_restart(); | 291 | machine_emergency_restart(); |
289 | } | 292 | } |
290 | EXPORT_SYMBOL_GPL(emergency_restart); | 293 | EXPORT_SYMBOL_GPL(emergency_restart); |
@@ -312,6 +315,7 @@ void kernel_restart(char *cmd) | |||
312 | printk(KERN_EMERG "Restarting system.\n"); | 315 | printk(KERN_EMERG "Restarting system.\n"); |
313 | else | 316 | else |
314 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); | 317 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); |
318 | kmsg_dump(KMSG_DUMP_RESTART); | ||
315 | machine_restart(cmd); | 319 | machine_restart(cmd); |
316 | } | 320 | } |
317 | EXPORT_SYMBOL_GPL(kernel_restart); | 321 | EXPORT_SYMBOL_GPL(kernel_restart); |
@@ -333,6 +337,7 @@ void kernel_halt(void) | |||
333 | kernel_shutdown_prepare(SYSTEM_HALT); | 337 | kernel_shutdown_prepare(SYSTEM_HALT); |
334 | sysdev_shutdown(); | 338 | sysdev_shutdown(); |
335 | printk(KERN_EMERG "System halted.\n"); | 339 | printk(KERN_EMERG "System halted.\n"); |
340 | kmsg_dump(KMSG_DUMP_HALT); | ||
336 | machine_halt(); | 341 | machine_halt(); |
337 | } | 342 | } |
338 | 343 | ||
@@ -351,6 +356,7 @@ void kernel_power_off(void) | |||
351 | disable_nonboot_cpus(); | 356 | disable_nonboot_cpus(); |
352 | sysdev_shutdown(); | 357 | sysdev_shutdown(); |
353 | printk(KERN_EMERG "Power down.\n"); | 358 | printk(KERN_EMERG "Power down.\n"); |
359 | kmsg_dump(KMSG_DUMP_POWEROFF); | ||
354 | machine_power_off(); | 360 | machine_power_off(); |
355 | } | 361 | } |
356 | EXPORT_SYMBOL_GPL(kernel_power_off); | 362 | EXPORT_SYMBOL_GPL(kernel_power_off); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ae5cbb1e3ced..bc86bb32e126 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/sysctl.h> | 25 | #include <linux/sysctl.h> |
26 | #include <linux/signal.h> | 26 | #include <linux/signal.h> |
27 | #include <linux/printk.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/security.h> | 29 | #include <linux/security.h> |
29 | #include <linux/ctype.h> | 30 | #include <linux/ctype.h> |
@@ -245,10 +246,6 @@ static struct ctl_table root_table[] = { | |||
245 | .mode = 0555, | 246 | .mode = 0555, |
246 | .child = dev_table, | 247 | .child = dev_table, |
247 | }, | 248 | }, |
248 | /* | ||
249 | * NOTE: do not add new entries to this table unless you have read | ||
250 | * Documentation/sysctl/ctl_unnumbered.txt | ||
251 | */ | ||
252 | { } | 249 | { } |
253 | }; | 250 | }; |
254 | 251 | ||
@@ -710,6 +707,15 @@ static struct ctl_table kern_table[] = { | |||
710 | .extra1 = &zero, | 707 | .extra1 = &zero, |
711 | .extra2 = &one, | 708 | .extra2 = &one, |
712 | }, | 709 | }, |
710 | { | ||
711 | .procname = "kptr_restrict", | ||
712 | .data = &kptr_restrict, | ||
713 | .maxlen = sizeof(int), | ||
714 | .mode = 0644, | ||
715 | .proc_handler = proc_dointvec_minmax, | ||
716 | .extra1 = &zero, | ||
717 | .extra2 = &two, | ||
718 | }, | ||
713 | #endif | 719 | #endif |
714 | { | 720 | { |
715 | .procname = "ngroups_max", | 721 | .procname = "ngroups_max", |
@@ -962,10 +968,6 @@ static struct ctl_table kern_table[] = { | |||
962 | .proc_handler = proc_dointvec, | 968 | .proc_handler = proc_dointvec, |
963 | }, | 969 | }, |
964 | #endif | 970 | #endif |
965 | /* | ||
966 | * NOTE: do not add new entries to this table unless you have read | ||
967 | * Documentation/sysctl/ctl_unnumbered.txt | ||
968 | */ | ||
969 | { } | 971 | { } |
970 | }; | 972 | }; |
971 | 973 | ||
@@ -1326,11 +1328,6 @@ static struct ctl_table vm_table[] = { | |||
1326 | .extra2 = &one, | 1328 | .extra2 = &one, |
1327 | }, | 1329 | }, |
1328 | #endif | 1330 | #endif |
1329 | |||
1330 | /* | ||
1331 | * NOTE: do not add new entries to this table unless you have read | ||
1332 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1333 | */ | ||
1334 | { } | 1331 | { } |
1335 | }; | 1332 | }; |
1336 | 1333 | ||
@@ -1486,10 +1483,6 @@ static struct ctl_table fs_table[] = { | |||
1486 | .proc_handler = &pipe_proc_fn, | 1483 | .proc_handler = &pipe_proc_fn, |
1487 | .extra1 = &pipe_min_size, | 1484 | .extra1 = &pipe_min_size, |
1488 | }, | 1485 | }, |
1489 | /* | ||
1490 | * NOTE: do not add new entries to this table unless you have read | ||
1491 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1492 | */ | ||
1493 | { } | 1486 | { } |
1494 | }; | 1487 | }; |
1495 | 1488 | ||
@@ -2899,7 +2892,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, | |||
2899 | } | 2892 | } |
2900 | } | 2893 | } |
2901 | 2894 | ||
2902 | #else /* CONFIG_PROC_FS */ | 2895 | #else /* CONFIG_PROC_SYSCTL */ |
2903 | 2896 | ||
2904 | int proc_dostring(struct ctl_table *table, int write, | 2897 | int proc_dostring(struct ctl_table *table, int write, |
2905 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2898 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -2951,7 +2944,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | |||
2951 | } | 2944 | } |
2952 | 2945 | ||
2953 | 2946 | ||
2954 | #endif /* CONFIG_PROC_FS */ | 2947 | #endif /* CONFIG_PROC_SYSCTL */ |
2955 | 2948 | ||
2956 | /* | 2949 | /* |
2957 | * No sense putting this after each symbol definition, twice, | 2950 | * No sense putting this after each symbol definition, twice, |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 4b2545a136ff..b875bedf7c9a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -1192,7 +1192,7 @@ static ssize_t bin_dn_node_address(struct file *file, | |||
1192 | 1192 | ||
1193 | buf[result] = '\0'; | 1193 | buf[result] = '\0'; |
1194 | 1194 | ||
1195 | /* Convert the decnet addresss to binary */ | 1195 | /* Convert the decnet address to binary */ |
1196 | result = -EIO; | 1196 | result = -EIO; |
1197 | nodep = strchr(buf, '.') + 1; | 1197 | nodep = strchr(buf, '.') + 1; |
1198 | if (!nodep) | 1198 | if (!nodep) |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 3308fd7f1b52..3971c6b9d58d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -89,8 +89,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
89 | return -ENOMEM; | 89 | return -ENOMEM; |
90 | 90 | ||
91 | if (!info) { | 91 | if (!info) { |
92 | int seq = get_cpu_var(taskstats_seqnum)++; | 92 | int seq = this_cpu_inc_return(taskstats_seqnum) - 1; |
93 | put_cpu_var(taskstats_seqnum); | ||
94 | 93 | ||
95 | reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); | 94 | reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); |
96 | } else | 95 | } else |
@@ -349,7 +348,7 @@ static int parse(struct nlattr *na, struct cpumask *mask) | |||
349 | return ret; | 348 | return ret; |
350 | } | 349 | } |
351 | 350 | ||
352 | #ifdef CONFIG_IA64 | 351 | #if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
353 | #define TASKSTATS_NEEDS_PADDING 1 | 352 | #define TASKSTATS_NEEDS_PADDING 1 |
354 | #endif | 353 | #endif |
355 | 354 | ||
@@ -612,7 +611,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
612 | fill_tgid_exit(tsk); | 611 | fill_tgid_exit(tsk); |
613 | } | 612 | } |
614 | 613 | ||
615 | listeners = &__raw_get_cpu_var(listener_array); | 614 | listeners = __this_cpu_ptr(&listener_array); |
616 | if (list_empty(&listeners->list)) | 615 | if (list_empty(&listeners->list)) |
617 | return; | 616 | return; |
618 | 617 | ||
diff --git a/kernel/time.c b/kernel/time.c index ba9b338d1835..32174359576f 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(current_fs_time); | |||
238 | * Avoid unnecessary multiplications/divisions in the | 238 | * Avoid unnecessary multiplications/divisions in the |
239 | * two most common HZ cases: | 239 | * two most common HZ cases: |
240 | */ | 240 | */ |
241 | unsigned int inline jiffies_to_msecs(const unsigned long j) | 241 | inline unsigned int jiffies_to_msecs(const unsigned long j) |
242 | { | 242 | { |
243 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | 243 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) |
244 | return (MSEC_PER_SEC / HZ) * j; | 244 | return (MSEC_PER_SEC / HZ) * j; |
@@ -254,7 +254,7 @@ unsigned int inline jiffies_to_msecs(const unsigned long j) | |||
254 | } | 254 | } |
255 | EXPORT_SYMBOL(jiffies_to_msecs); | 255 | EXPORT_SYMBOL(jiffies_to_msecs); |
256 | 256 | ||
257 | unsigned int inline jiffies_to_usecs(const unsigned long j) | 257 | inline unsigned int jiffies_to_usecs(const unsigned long j) |
258 | { | 258 | { |
259 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | 259 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) |
260 | return (USEC_PER_SEC / HZ) * j; | 260 | return (USEC_PER_SEC / HZ) * j; |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 8588abcac07b..6519cf62d9cd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -152,6 +152,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) | |||
152 | */ | 152 | */ |
153 | for (sft = 32; sft > 0; sft--) { | 153 | for (sft = 32; sft > 0; sft--) { |
154 | tmp = (u64) to << sft; | 154 | tmp = (u64) to << sft; |
155 | tmp += from / 2; | ||
155 | do_div(tmp, from); | 156 | do_div(tmp, from); |
156 | if ((tmp >> sftacc) == 0) | 157 | if ((tmp >> sftacc) == 0) |
157 | break; | 158 | break; |
@@ -678,7 +679,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); | |||
678 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | 679 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) |
679 | { | 680 | { |
680 | 681 | ||
681 | /* Intialize mult/shift and max_idle_ns */ | 682 | /* Initialize mult/shift and max_idle_ns */ |
682 | __clocksource_updatefreq_scale(cs, scale, freq); | 683 | __clocksource_updatefreq_scale(cs, scale, freq); |
683 | 684 | ||
684 | /* Add clocksource to the clcoksource list */ | 685 | /* Add clocksource to the clcoksource list */ |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index d2321891538f..5c00242fa921 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/timex.h> | 14 | #include <linux/timex.h> |
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/module.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * NTP timekeeping variables: | 20 | * NTP timekeeping variables: |
@@ -74,6 +75,162 @@ static long time_adjust; | |||
74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ | 75 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ |
75 | static s64 ntp_tick_adj; | 76 | static s64 ntp_tick_adj; |
76 | 77 | ||
78 | #ifdef CONFIG_NTP_PPS | ||
79 | |||
80 | /* | ||
81 | * The following variables are used when a pulse-per-second (PPS) signal | ||
82 | * is available. They establish the engineering parameters of the clock | ||
83 | * discipline loop when controlled by the PPS signal. | ||
84 | */ | ||
85 | #define PPS_VALID 10 /* PPS signal watchdog max (s) */ | ||
86 | #define PPS_POPCORN 4 /* popcorn spike threshold (shift) */ | ||
87 | #define PPS_INTMIN 2 /* min freq interval (s) (shift) */ | ||
88 | #define PPS_INTMAX 8 /* max freq interval (s) (shift) */ | ||
89 | #define PPS_INTCOUNT 4 /* number of consecutive good intervals to | ||
90 | increase pps_shift or consecutive bad | ||
91 | intervals to decrease it */ | ||
92 | #define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */ | ||
93 | |||
94 | static int pps_valid; /* signal watchdog counter */ | ||
95 | static long pps_tf[3]; /* phase median filter */ | ||
96 | static long pps_jitter; /* current jitter (ns) */ | ||
97 | static struct timespec pps_fbase; /* beginning of the last freq interval */ | ||
98 | static int pps_shift; /* current interval duration (s) (shift) */ | ||
99 | static int pps_intcnt; /* interval counter */ | ||
100 | static s64 pps_freq; /* frequency offset (scaled ns/s) */ | ||
101 | static long pps_stabil; /* current stability (scaled ns/s) */ | ||
102 | |||
103 | /* | ||
104 | * PPS signal quality monitors | ||
105 | */ | ||
106 | static long pps_calcnt; /* calibration intervals */ | ||
107 | static long pps_jitcnt; /* jitter limit exceeded */ | ||
108 | static long pps_stbcnt; /* stability limit exceeded */ | ||
109 | static long pps_errcnt; /* calibration errors */ | ||
110 | |||
111 | |||
112 | /* PPS kernel consumer compensates the whole phase error immediately. | ||
113 | * Otherwise, reduce the offset by a fixed factor times the time constant. | ||
114 | */ | ||
115 | static inline s64 ntp_offset_chunk(s64 offset) | ||
116 | { | ||
117 | if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) | ||
118 | return offset; | ||
119 | else | ||
120 | return shift_right(offset, SHIFT_PLL + time_constant); | ||
121 | } | ||
122 | |||
123 | static inline void pps_reset_freq_interval(void) | ||
124 | { | ||
125 | /* the PPS calibration interval may end | ||
126 | surprisingly early */ | ||
127 | pps_shift = PPS_INTMIN; | ||
128 | pps_intcnt = 0; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * pps_clear - Clears the PPS state variables | ||
133 | * | ||
134 | * Must be called while holding a write on the xtime_lock | ||
135 | */ | ||
136 | static inline void pps_clear(void) | ||
137 | { | ||
138 | pps_reset_freq_interval(); | ||
139 | pps_tf[0] = 0; | ||
140 | pps_tf[1] = 0; | ||
141 | pps_tf[2] = 0; | ||
142 | pps_fbase.tv_sec = pps_fbase.tv_nsec = 0; | ||
143 | pps_freq = 0; | ||
144 | } | ||
145 | |||
146 | /* Decrease pps_valid to indicate that another second has passed since | ||
147 | * the last PPS signal. When it reaches 0, indicate that PPS signal is | ||
148 | * missing. | ||
149 | * | ||
150 | * Must be called while holding a write on the xtime_lock | ||
151 | */ | ||
152 | static inline void pps_dec_valid(void) | ||
153 | { | ||
154 | if (pps_valid > 0) | ||
155 | pps_valid--; | ||
156 | else { | ||
157 | time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | | ||
158 | STA_PPSWANDER | STA_PPSERROR); | ||
159 | pps_clear(); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | static inline void pps_set_freq(s64 freq) | ||
164 | { | ||
165 | pps_freq = freq; | ||
166 | } | ||
167 | |||
168 | static inline int is_error_status(int status) | ||
169 | { | ||
170 | return (time_status & (STA_UNSYNC|STA_CLOCKERR)) | ||
171 | /* PPS signal lost when either PPS time or | ||
172 | * PPS frequency synchronization requested | ||
173 | */ | ||
174 | || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) | ||
175 | && !(time_status & STA_PPSSIGNAL)) | ||
176 | /* PPS jitter exceeded when | ||
177 | * PPS time synchronization requested */ | ||
178 | || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) | ||
179 | == (STA_PPSTIME|STA_PPSJITTER)) | ||
180 | /* PPS wander exceeded or calibration error when | ||
181 | * PPS frequency synchronization requested | ||
182 | */ | ||
183 | || ((time_status & STA_PPSFREQ) | ||
184 | && (time_status & (STA_PPSWANDER|STA_PPSERROR))); | ||
185 | } | ||
186 | |||
187 | static inline void pps_fill_timex(struct timex *txc) | ||
188 | { | ||
189 | txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) * | ||
190 | PPM_SCALE_INV, NTP_SCALE_SHIFT); | ||
191 | txc->jitter = pps_jitter; | ||
192 | if (!(time_status & STA_NANO)) | ||
193 | txc->jitter /= NSEC_PER_USEC; | ||
194 | txc->shift = pps_shift; | ||
195 | txc->stabil = pps_stabil; | ||
196 | txc->jitcnt = pps_jitcnt; | ||
197 | txc->calcnt = pps_calcnt; | ||
198 | txc->errcnt = pps_errcnt; | ||
199 | txc->stbcnt = pps_stbcnt; | ||
200 | } | ||
201 | |||
202 | #else /* !CONFIG_NTP_PPS */ | ||
203 | |||
204 | static inline s64 ntp_offset_chunk(s64 offset) | ||
205 | { | ||
206 | return shift_right(offset, SHIFT_PLL + time_constant); | ||
207 | } | ||
208 | |||
209 | static inline void pps_reset_freq_interval(void) {} | ||
210 | static inline void pps_clear(void) {} | ||
211 | static inline void pps_dec_valid(void) {} | ||
212 | static inline void pps_set_freq(s64 freq) {} | ||
213 | |||
214 | static inline int is_error_status(int status) | ||
215 | { | ||
216 | return status & (STA_UNSYNC|STA_CLOCKERR); | ||
217 | } | ||
218 | |||
219 | static inline void pps_fill_timex(struct timex *txc) | ||
220 | { | ||
221 | /* PPS is not implemented, so these are zero */ | ||
222 | txc->ppsfreq = 0; | ||
223 | txc->jitter = 0; | ||
224 | txc->shift = 0; | ||
225 | txc->stabil = 0; | ||
226 | txc->jitcnt = 0; | ||
227 | txc->calcnt = 0; | ||
228 | txc->errcnt = 0; | ||
229 | txc->stbcnt = 0; | ||
230 | } | ||
231 | |||
232 | #endif /* CONFIG_NTP_PPS */ | ||
233 | |||
77 | /* | 234 | /* |
78 | * NTP methods: | 235 | * NTP methods: |
79 | */ | 236 | */ |
@@ -185,6 +342,9 @@ void ntp_clear(void) | |||
185 | 342 | ||
186 | tick_length = tick_length_base; | 343 | tick_length = tick_length_base; |
187 | time_offset = 0; | 344 | time_offset = 0; |
345 | |||
346 | /* Clear PPS state variables */ | ||
347 | pps_clear(); | ||
188 | } | 348 | } |
189 | 349 | ||
190 | /* | 350 | /* |
@@ -250,16 +410,16 @@ void second_overflow(void) | |||
250 | time_status |= STA_UNSYNC; | 410 | time_status |= STA_UNSYNC; |
251 | } | 411 | } |
252 | 412 | ||
253 | /* | 413 | /* Compute the phase adjustment for the next second */ |
254 | * Compute the phase adjustment for the next second. The offset is | ||
255 | * reduced by a fixed factor times the time constant. | ||
256 | */ | ||
257 | tick_length = tick_length_base; | 414 | tick_length = tick_length_base; |
258 | 415 | ||
259 | delta = shift_right(time_offset, SHIFT_PLL + time_constant); | 416 | delta = ntp_offset_chunk(time_offset); |
260 | time_offset -= delta; | 417 | time_offset -= delta; |
261 | tick_length += delta; | 418 | tick_length += delta; |
262 | 419 | ||
420 | /* Check PPS signal */ | ||
421 | pps_dec_valid(); | ||
422 | |||
263 | if (!time_adjust) | 423 | if (!time_adjust) |
264 | return; | 424 | return; |
265 | 425 | ||
@@ -369,6 +529,8 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | |||
369 | if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { | 529 | if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { |
370 | time_state = TIME_OK; | 530 | time_state = TIME_OK; |
371 | time_status = STA_UNSYNC; | 531 | time_status = STA_UNSYNC; |
532 | /* restart PPS frequency calibration */ | ||
533 | pps_reset_freq_interval(); | ||
372 | } | 534 | } |
373 | 535 | ||
374 | /* | 536 | /* |
@@ -418,6 +580,8 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts | |||
418 | time_freq = txc->freq * PPM_SCALE; | 580 | time_freq = txc->freq * PPM_SCALE; |
419 | time_freq = min(time_freq, MAXFREQ_SCALED); | 581 | time_freq = min(time_freq, MAXFREQ_SCALED); |
420 | time_freq = max(time_freq, -MAXFREQ_SCALED); | 582 | time_freq = max(time_freq, -MAXFREQ_SCALED); |
583 | /* update pps_freq */ | ||
584 | pps_set_freq(time_freq); | ||
421 | } | 585 | } |
422 | 586 | ||
423 | if (txc->modes & ADJ_MAXERROR) | 587 | if (txc->modes & ADJ_MAXERROR) |
@@ -508,7 +672,8 @@ int do_adjtimex(struct timex *txc) | |||
508 | } | 672 | } |
509 | 673 | ||
510 | result = time_state; /* mostly `TIME_OK' */ | 674 | result = time_state; /* mostly `TIME_OK' */ |
511 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) | 675 | /* check for errors */ |
676 | if (is_error_status(time_status)) | ||
512 | result = TIME_ERROR; | 677 | result = TIME_ERROR; |
513 | 678 | ||
514 | txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * | 679 | txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * |
@@ -522,15 +687,8 @@ int do_adjtimex(struct timex *txc) | |||
522 | txc->tick = tick_usec; | 687 | txc->tick = tick_usec; |
523 | txc->tai = time_tai; | 688 | txc->tai = time_tai; |
524 | 689 | ||
525 | /* PPS is not implemented, so these are zero */ | 690 | /* fill PPS status fields */ |
526 | txc->ppsfreq = 0; | 691 | pps_fill_timex(txc); |
527 | txc->jitter = 0; | ||
528 | txc->shift = 0; | ||
529 | txc->stabil = 0; | ||
530 | txc->jitcnt = 0; | ||
531 | txc->calcnt = 0; | ||
532 | txc->errcnt = 0; | ||
533 | txc->stbcnt = 0; | ||
534 | 692 | ||
535 | write_sequnlock_irq(&xtime_lock); | 693 | write_sequnlock_irq(&xtime_lock); |
536 | 694 | ||
@@ -544,6 +702,243 @@ int do_adjtimex(struct timex *txc) | |||
544 | return result; | 702 | return result; |
545 | } | 703 | } |
546 | 704 | ||
705 | #ifdef CONFIG_NTP_PPS | ||
706 | |||
707 | /* actually struct pps_normtime is good old struct timespec, but it is | ||
708 | * semantically different (and it is the reason why it was invented): | ||
709 | * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] | ||
710 | * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */ | ||
711 | struct pps_normtime { | ||
712 | __kernel_time_t sec; /* seconds */ | ||
713 | long nsec; /* nanoseconds */ | ||
714 | }; | ||
715 | |||
716 | /* normalize the timestamp so that nsec is in the | ||
717 | ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */ | ||
718 | static inline struct pps_normtime pps_normalize_ts(struct timespec ts) | ||
719 | { | ||
720 | struct pps_normtime norm = { | ||
721 | .sec = ts.tv_sec, | ||
722 | .nsec = ts.tv_nsec | ||
723 | }; | ||
724 | |||
725 | if (norm.nsec > (NSEC_PER_SEC >> 1)) { | ||
726 | norm.nsec -= NSEC_PER_SEC; | ||
727 | norm.sec++; | ||
728 | } | ||
729 | |||
730 | return norm; | ||
731 | } | ||
732 | |||
733 | /* get current phase correction and jitter */ | ||
734 | static inline long pps_phase_filter_get(long *jitter) | ||
735 | { | ||
736 | *jitter = pps_tf[0] - pps_tf[1]; | ||
737 | if (*jitter < 0) | ||
738 | *jitter = -*jitter; | ||
739 | |||
740 | /* TODO: test various filters */ | ||
741 | return pps_tf[0]; | ||
742 | } | ||
743 | |||
744 | /* add the sample to the phase filter */ | ||
745 | static inline void pps_phase_filter_add(long err) | ||
746 | { | ||
747 | pps_tf[2] = pps_tf[1]; | ||
748 | pps_tf[1] = pps_tf[0]; | ||
749 | pps_tf[0] = err; | ||
750 | } | ||
751 | |||
752 | /* decrease frequency calibration interval length. | ||
753 | * It is halved after four consecutive unstable intervals. | ||
754 | */ | ||
755 | static inline void pps_dec_freq_interval(void) | ||
756 | { | ||
757 | if (--pps_intcnt <= -PPS_INTCOUNT) { | ||
758 | pps_intcnt = -PPS_INTCOUNT; | ||
759 | if (pps_shift > PPS_INTMIN) { | ||
760 | pps_shift--; | ||
761 | pps_intcnt = 0; | ||
762 | } | ||
763 | } | ||
764 | } | ||
765 | |||
766 | /* increase frequency calibration interval length. | ||
767 | * It is doubled after four consecutive stable intervals. | ||
768 | */ | ||
769 | static inline void pps_inc_freq_interval(void) | ||
770 | { | ||
771 | if (++pps_intcnt >= PPS_INTCOUNT) { | ||
772 | pps_intcnt = PPS_INTCOUNT; | ||
773 | if (pps_shift < PPS_INTMAX) { | ||
774 | pps_shift++; | ||
775 | pps_intcnt = 0; | ||
776 | } | ||
777 | } | ||
778 | } | ||
779 | |||
780 | /* update clock frequency based on MONOTONIC_RAW clock PPS signal | ||
781 | * timestamps | ||
782 | * | ||
783 | * At the end of the calibration interval the difference between the | ||
784 | * first and last MONOTONIC_RAW clock timestamps divided by the length | ||
785 | * of the interval becomes the frequency update. If the interval was | ||
786 | * too long, the data are discarded. | ||
787 | * Returns the difference between old and new frequency values. | ||
788 | */ | ||
789 | static long hardpps_update_freq(struct pps_normtime freq_norm) | ||
790 | { | ||
791 | long delta, delta_mod; | ||
792 | s64 ftemp; | ||
793 | |||
794 | /* check if the frequency interval was too long */ | ||
795 | if (freq_norm.sec > (2 << pps_shift)) { | ||
796 | time_status |= STA_PPSERROR; | ||
797 | pps_errcnt++; | ||
798 | pps_dec_freq_interval(); | ||
799 | pr_err("hardpps: PPSERROR: interval too long - %ld s\n", | ||
800 | freq_norm.sec); | ||
801 | return 0; | ||
802 | } | ||
803 | |||
804 | /* here the raw frequency offset and wander (stability) is | ||
805 | * calculated. If the wander is less than the wander threshold | ||
806 | * the interval is increased; otherwise it is decreased. | ||
807 | */ | ||
808 | ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT, | ||
809 | freq_norm.sec); | ||
810 | delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT); | ||
811 | pps_freq = ftemp; | ||
812 | if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { | ||
813 | pr_warning("hardpps: PPSWANDER: change=%ld\n", delta); | ||
814 | time_status |= STA_PPSWANDER; | ||
815 | pps_stbcnt++; | ||
816 | pps_dec_freq_interval(); | ||
817 | } else { /* good sample */ | ||
818 | pps_inc_freq_interval(); | ||
819 | } | ||
820 | |||
821 | /* the stability metric is calculated as the average of recent | ||
822 | * frequency changes, but is used only for performance | ||
823 | * monitoring | ||
824 | */ | ||
825 | delta_mod = delta; | ||
826 | if (delta_mod < 0) | ||
827 | delta_mod = -delta_mod; | ||
828 | pps_stabil += (div_s64(((s64)delta_mod) << | ||
829 | (NTP_SCALE_SHIFT - SHIFT_USEC), | ||
830 | NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN; | ||
831 | |||
832 | /* if enabled, the system clock frequency is updated */ | ||
833 | if ((time_status & STA_PPSFREQ) != 0 && | ||
834 | (time_status & STA_FREQHOLD) == 0) { | ||
835 | time_freq = pps_freq; | ||
836 | ntp_update_frequency(); | ||
837 | } | ||
838 | |||
839 | return delta; | ||
840 | } | ||
841 | |||
842 | /* correct REALTIME clock phase error against PPS signal */ | ||
843 | static void hardpps_update_phase(long error) | ||
844 | { | ||
845 | long correction = -error; | ||
846 | long jitter; | ||
847 | |||
848 | /* add the sample to the median filter */ | ||
849 | pps_phase_filter_add(correction); | ||
850 | correction = pps_phase_filter_get(&jitter); | ||
851 | |||
852 | /* Nominal jitter is due to PPS signal noise. If it exceeds the | ||
853 | * threshold, the sample is discarded; otherwise, if so enabled, | ||
854 | * the time offset is updated. | ||
855 | */ | ||
856 | if (jitter > (pps_jitter << PPS_POPCORN)) { | ||
857 | pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", | ||
858 | jitter, (pps_jitter << PPS_POPCORN)); | ||
859 | time_status |= STA_PPSJITTER; | ||
860 | pps_jitcnt++; | ||
861 | } else if (time_status & STA_PPSTIME) { | ||
862 | /* correct the time using the phase offset */ | ||
863 | time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT, | ||
864 | NTP_INTERVAL_FREQ); | ||
865 | /* cancel running adjtime() */ | ||
866 | time_adjust = 0; | ||
867 | } | ||
868 | /* update jitter */ | ||
869 | pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN; | ||
870 | } | ||
871 | |||
872 | /* | ||
873 | * hardpps() - discipline CPU clock oscillator to external PPS signal | ||
874 | * | ||
875 | * This routine is called at each PPS signal arrival in order to | ||
876 | * discipline the CPU clock oscillator to the PPS signal. It takes two | ||
877 | * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former | ||
878 | * is used to correct clock phase error and the latter is used to | ||
879 | * correct the frequency. | ||
880 | * | ||
881 | * This code is based on David Mills's reference nanokernel | ||
882 | * implementation. It was mostly rewritten but keeps the same idea. | ||
883 | */ | ||
884 | void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | ||
885 | { | ||
886 | struct pps_normtime pts_norm, freq_norm; | ||
887 | unsigned long flags; | ||
888 | |||
889 | pts_norm = pps_normalize_ts(*phase_ts); | ||
890 | |||
891 | write_seqlock_irqsave(&xtime_lock, flags); | ||
892 | |||
893 | /* clear the error bits, they will be set again if needed */ | ||
894 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); | ||
895 | |||
896 | /* indicate signal presence */ | ||
897 | time_status |= STA_PPSSIGNAL; | ||
898 | pps_valid = PPS_VALID; | ||
899 | |||
900 | /* when called for the first time, | ||
901 | * just start the frequency interval */ | ||
902 | if (unlikely(pps_fbase.tv_sec == 0)) { | ||
903 | pps_fbase = *raw_ts; | ||
904 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
905 | return; | ||
906 | } | ||
907 | |||
908 | /* ok, now we have a base for frequency calculation */ | ||
909 | freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase)); | ||
910 | |||
911 | /* check that the signal is in the range | ||
912 | * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */ | ||
913 | if ((freq_norm.sec == 0) || | ||
914 | (freq_norm.nsec > MAXFREQ * freq_norm.sec) || | ||
915 | (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) { | ||
916 | time_status |= STA_PPSJITTER; | ||
917 | /* restart the frequency calibration interval */ | ||
918 | pps_fbase = *raw_ts; | ||
919 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
920 | pr_err("hardpps: PPSJITTER: bad pulse\n"); | ||
921 | return; | ||
922 | } | ||
923 | |||
924 | /* signal is ok */ | ||
925 | |||
926 | /* check if the current frequency interval is finished */ | ||
927 | if (freq_norm.sec >= (1 << pps_shift)) { | ||
928 | pps_calcnt++; | ||
929 | /* restart the frequency calibration interval */ | ||
930 | pps_fbase = *raw_ts; | ||
931 | hardpps_update_freq(freq_norm); | ||
932 | } | ||
933 | |||
934 | hardpps_update_phase(pts_norm.nsec); | ||
935 | |||
936 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
937 | } | ||
938 | EXPORT_SYMBOL(hardpps); | ||
939 | |||
940 | #endif /* CONFIG_NTP_PPS */ | ||
941 | |||
547 | static int __init ntp_tick_adj_setup(char *str) | 942 | static int __init ntp_tick_adj_setup(char *str) |
548 | { | 943 | { |
549 | ntp_tick_adj = simple_strtol(str, NULL, 0); | 944 | ntp_tick_adj = simple_strtol(str, NULL, 0); |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b6b898d2eeef..051bc80a0c43 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -49,7 +49,7 @@ struct tick_device *tick_get_device(int cpu) | |||
49 | */ | 49 | */ |
50 | int tick_is_oneshot_available(void) | 50 | int tick_is_oneshot_available(void) |
51 | { | 51 | { |
52 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 52 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); |
53 | 53 | ||
54 | return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); | 54 | return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); |
55 | } | 55 | } |
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index aada0e52680a..5cbc101f908b 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
@@ -95,7 +95,7 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, | |||
95 | */ | 95 | */ |
96 | int tick_program_event(ktime_t expires, int force) | 96 | int tick_program_event(ktime_t expires, int force) |
97 | { | 97 | { |
98 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 98 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); |
99 | 99 | ||
100 | return tick_dev_program_event(dev, expires, force); | 100 | return tick_dev_program_event(dev, expires, force); |
101 | } | 101 | } |
@@ -167,7 +167,7 @@ int tick_oneshot_mode_active(void) | |||
167 | int ret; | 167 | int ret; |
168 | 168 | ||
169 | local_irq_save(flags); | 169 | local_irq_save(flags); |
170 | ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT; | 170 | ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT; |
171 | local_irq_restore(flags); | 171 | local_irq_restore(flags); |
172 | 172 | ||
173 | return ret; | 173 | return ret; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index eef7452bd8a9..d27c7562902c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -288,6 +288,49 @@ void ktime_get_ts(struct timespec *ts) | |||
288 | } | 288 | } |
289 | EXPORT_SYMBOL_GPL(ktime_get_ts); | 289 | EXPORT_SYMBOL_GPL(ktime_get_ts); |
290 | 290 | ||
291 | #ifdef CONFIG_NTP_PPS | ||
292 | |||
293 | /** | ||
294 | * getnstime_raw_and_real - get day and raw monotonic time in timespec format | ||
295 | * @ts_raw: pointer to the timespec to be set to raw monotonic time | ||
296 | * @ts_real: pointer to the timespec to be set to the time of day | ||
297 | * | ||
298 | * This function reads both the time of day and raw monotonic time at the | ||
299 | * same time atomically and stores the resulting timestamps in timespec | ||
300 | * format. | ||
301 | */ | ||
302 | void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) | ||
303 | { | ||
304 | unsigned long seq; | ||
305 | s64 nsecs_raw, nsecs_real; | ||
306 | |||
307 | WARN_ON_ONCE(timekeeping_suspended); | ||
308 | |||
309 | do { | ||
310 | u32 arch_offset; | ||
311 | |||
312 | seq = read_seqbegin(&xtime_lock); | ||
313 | |||
314 | *ts_raw = raw_time; | ||
315 | *ts_real = xtime; | ||
316 | |||
317 | nsecs_raw = timekeeping_get_ns_raw(); | ||
318 | nsecs_real = timekeeping_get_ns(); | ||
319 | |||
320 | /* If arch requires, add in gettimeoffset() */ | ||
321 | arch_offset = arch_gettimeoffset(); | ||
322 | nsecs_raw += arch_offset; | ||
323 | nsecs_real += arch_offset; | ||
324 | |||
325 | } while (read_seqretry(&xtime_lock, seq)); | ||
326 | |||
327 | timespec_add_ns(ts_raw, nsecs_raw); | ||
328 | timespec_add_ns(ts_real, nsecs_real); | ||
329 | } | ||
330 | EXPORT_SYMBOL(getnstime_raw_and_real); | ||
331 | |||
332 | #endif /* CONFIG_NTP_PPS */ | ||
333 | |||
291 | /** | 334 | /** |
292 | * do_gettimeofday - Returns the time of day in a timeval | 335 | * do_gettimeofday - Returns the time of day in a timeval |
293 | * @tv: pointer to the timeval to be set | 336 | * @tv: pointer to the timeval to be set |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 53f338190b26..761c510a06c5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o | |||
52 | endif | 52 | endif |
53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
55 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 55 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o |
56 | ifeq ($(CONFIG_TRACING),y) | 56 | ifeq ($(CONFIG_TRACING),y) |
57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o | 57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o |
58 | endif | 58 | endif |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7b8ec0281548..153562d0b93c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -758,53 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore, | |||
758 | * @q: queue the io is for | 758 | * @q: queue the io is for |
759 | * @bio: the source bio | 759 | * @bio: the source bio |
760 | * @what: the action | 760 | * @what: the action |
761 | * @error: error, if any | ||
761 | * | 762 | * |
762 | * Description: | 763 | * Description: |
763 | * Records an action against a bio. Will log the bio offset + size. | 764 | * Records an action against a bio. Will log the bio offset + size. |
764 | * | 765 | * |
765 | **/ | 766 | **/ |
766 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | 767 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, |
767 | u32 what) | 768 | u32 what, int error) |
768 | { | 769 | { |
769 | struct blk_trace *bt = q->blk_trace; | 770 | struct blk_trace *bt = q->blk_trace; |
770 | 771 | ||
771 | if (likely(!bt)) | 772 | if (likely(!bt)) |
772 | return; | 773 | return; |
773 | 774 | ||
775 | if (!error && !bio_flagged(bio, BIO_UPTODATE)) | ||
776 | error = EIO; | ||
777 | |||
774 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, | 778 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, |
775 | !bio_flagged(bio, BIO_UPTODATE), 0, NULL); | 779 | error, 0, NULL); |
776 | } | 780 | } |
777 | 781 | ||
778 | static void blk_add_trace_bio_bounce(void *ignore, | 782 | static void blk_add_trace_bio_bounce(void *ignore, |
779 | struct request_queue *q, struct bio *bio) | 783 | struct request_queue *q, struct bio *bio) |
780 | { | 784 | { |
781 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); | 785 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); |
782 | } | 786 | } |
783 | 787 | ||
784 | static void blk_add_trace_bio_complete(void *ignore, | 788 | static void blk_add_trace_bio_complete(void *ignore, |
785 | struct request_queue *q, struct bio *bio) | 789 | struct request_queue *q, struct bio *bio, |
790 | int error) | ||
786 | { | 791 | { |
787 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); | 792 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
788 | } | 793 | } |
789 | 794 | ||
790 | static void blk_add_trace_bio_backmerge(void *ignore, | 795 | static void blk_add_trace_bio_backmerge(void *ignore, |
791 | struct request_queue *q, | 796 | struct request_queue *q, |
792 | struct bio *bio) | 797 | struct bio *bio) |
793 | { | 798 | { |
794 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | 799 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); |
795 | } | 800 | } |
796 | 801 | ||
797 | static void blk_add_trace_bio_frontmerge(void *ignore, | 802 | static void blk_add_trace_bio_frontmerge(void *ignore, |
798 | struct request_queue *q, | 803 | struct request_queue *q, |
799 | struct bio *bio) | 804 | struct bio *bio) |
800 | { | 805 | { |
801 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | 806 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); |
802 | } | 807 | } |
803 | 808 | ||
804 | static void blk_add_trace_bio_queue(void *ignore, | 809 | static void blk_add_trace_bio_queue(void *ignore, |
805 | struct request_queue *q, struct bio *bio) | 810 | struct request_queue *q, struct bio *bio) |
806 | { | 811 | { |
807 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 812 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); |
808 | } | 813 | } |
809 | 814 | ||
810 | static void blk_add_trace_getrq(void *ignore, | 815 | static void blk_add_trace_getrq(void *ignore, |
@@ -812,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore, | |||
812 | struct bio *bio, int rw) | 817 | struct bio *bio, int rw) |
813 | { | 818 | { |
814 | if (bio) | 819 | if (bio) |
815 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ); | 820 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); |
816 | else { | 821 | else { |
817 | struct blk_trace *bt = q->blk_trace; | 822 | struct blk_trace *bt = q->blk_trace; |
818 | 823 | ||
@@ -827,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore, | |||
827 | struct bio *bio, int rw) | 832 | struct bio *bio, int rw) |
828 | { | 833 | { |
829 | if (bio) | 834 | if (bio) |
830 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); | 835 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); |
831 | else { | 836 | else { |
832 | struct blk_trace *bt = q->blk_trace; | 837 | struct blk_trace *bt = q->blk_trace; |
833 | 838 | ||
@@ -887,7 +892,7 @@ static void blk_add_trace_split(void *ignore, | |||
887 | } | 892 | } |
888 | 893 | ||
889 | /** | 894 | /** |
890 | * blk_add_trace_remap - Add a trace for a remap operation | 895 | * blk_add_trace_bio_remap - Add a trace for a bio-remap operation |
891 | * @ignore: trace callback data parameter (not used) | 896 | * @ignore: trace callback data parameter (not used) |
892 | * @q: queue the io is for | 897 | * @q: queue the io is for |
893 | * @bio: the source bio | 898 | * @bio: the source bio |
@@ -899,9 +904,9 @@ static void blk_add_trace_split(void *ignore, | |||
899 | * it spans a stripe (or similar). Add a trace for that action. | 904 | * it spans a stripe (or similar). Add a trace for that action. |
900 | * | 905 | * |
901 | **/ | 906 | **/ |
902 | static void blk_add_trace_remap(void *ignore, | 907 | static void blk_add_trace_bio_remap(void *ignore, |
903 | struct request_queue *q, struct bio *bio, | 908 | struct request_queue *q, struct bio *bio, |
904 | dev_t dev, sector_t from) | 909 | dev_t dev, sector_t from) |
905 | { | 910 | { |
906 | struct blk_trace *bt = q->blk_trace; | 911 | struct blk_trace *bt = q->blk_trace; |
907 | struct blk_io_trace_remap r; | 912 | struct blk_io_trace_remap r; |
@@ -1016,7 +1021,7 @@ static void blk_register_tracepoints(void) | |||
1016 | WARN_ON(ret); | 1021 | WARN_ON(ret); |
1017 | ret = register_trace_block_split(blk_add_trace_split, NULL); | 1022 | ret = register_trace_block_split(blk_add_trace_split, NULL); |
1018 | WARN_ON(ret); | 1023 | WARN_ON(ret); |
1019 | ret = register_trace_block_remap(blk_add_trace_remap, NULL); | 1024 | ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1020 | WARN_ON(ret); | 1025 | WARN_ON(ret); |
1021 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1026 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1022 | WARN_ON(ret); | 1027 | WARN_ON(ret); |
@@ -1025,7 +1030,7 @@ static void blk_register_tracepoints(void) | |||
1025 | static void blk_unregister_tracepoints(void) | 1030 | static void blk_unregister_tracepoints(void) |
1026 | { | 1031 | { |
1027 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1032 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1028 | unregister_trace_block_remap(blk_add_trace_remap, NULL); | 1033 | unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1029 | unregister_trace_block_split(blk_add_trace_split, NULL); | 1034 | unregister_trace_block_split(blk_add_trace_split, NULL); |
1030 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); | 1035 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); |
1031 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); | 1036 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8cf959bad45..dc53ecb80589 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1313 | 1313 | ||
1314 | __this_cpu_inc(user_stack_count); | 1314 | __this_cpu_inc(user_stack_count); |
1315 | 1315 | ||
1316 | |||
1317 | |||
1318 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1316 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
1319 | sizeof(*entry), flags, pc); | 1317 | sizeof(*entry), flags, pc); |
1320 | if (!event) | 1318 | if (!event) |
1321 | return; | 1319 | goto out_drop_count; |
1322 | entry = ring_buffer_event_data(event); | 1320 | entry = ring_buffer_event_data(event); |
1323 | 1321 | ||
1324 | entry->tgid = current->tgid; | 1322 | entry->tgid = current->tgid; |
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1333 | if (!filter_check_discard(call, entry, buffer, event)) | 1331 | if (!filter_check_discard(call, entry, buffer, event)) |
1334 | ring_buffer_unlock_commit(buffer, event); | 1332 | ring_buffer_unlock_commit(buffer, event); |
1335 | 1333 | ||
1334 | out_drop_count: | ||
1336 | __this_cpu_dec(user_stack_count); | 1335 | __this_cpu_dec(user_stack_count); |
1337 | |||
1338 | out: | 1336 | out: |
1339 | preempt_enable(); | 1337 | preempt_enable(); |
1340 | } | 1338 | } |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e3dfecaf13e6..6cf223764be8 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -53,7 +53,7 @@ | |||
53 | */ | 53 | */ |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * Function trace entry - function address and parent function addres: | 56 | * Function trace entry - function address and parent function address: |
57 | */ | 57 | */ |
58 | FTRACE_ENTRY(function, ftrace_entry, | 58 | FTRACE_ENTRY(function, ftrace_entry, |
59 | 59 | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 562c56e048fd..659732eba07c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) | |||
558 | static int trace_wakeup_test_thread(void *data) | 558 | static int trace_wakeup_test_thread(void *data) |
559 | { | 559 | { |
560 | /* Make this a RT thread, doesn't need to be too high */ | 560 | /* Make this a RT thread, doesn't need to be too high */ |
561 | static struct sched_param param = { .sched_priority = 5 }; | 561 | static const struct sched_param param = { .sched_priority = 5 }; |
562 | struct completion *x = data; | 562 | struct completion *x = data; |
563 | 563 | ||
564 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 564 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index bac752f0cfb5..b706529b4fc7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event, | |||
23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); | 23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); |
24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); | 24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); |
25 | 25 | ||
26 | /* All syscall exit events have the same fields */ | ||
27 | static LIST_HEAD(syscall_exit_fields); | ||
28 | |||
29 | static struct list_head * | 26 | static struct list_head * |
30 | syscall_get_enter_fields(struct ftrace_event_call *call) | 27 | syscall_get_enter_fields(struct ftrace_event_call *call) |
31 | { | 28 | { |
@@ -34,34 +31,28 @@ syscall_get_enter_fields(struct ftrace_event_call *call) | |||
34 | return &entry->enter_fields; | 31 | return &entry->enter_fields; |
35 | } | 32 | } |
36 | 33 | ||
37 | static struct list_head * | ||
38 | syscall_get_exit_fields(struct ftrace_event_call *call) | ||
39 | { | ||
40 | return &syscall_exit_fields; | ||
41 | } | ||
42 | |||
43 | struct trace_event_functions enter_syscall_print_funcs = { | 34 | struct trace_event_functions enter_syscall_print_funcs = { |
44 | .trace = print_syscall_enter, | 35 | .trace = print_syscall_enter, |
45 | }; | 36 | }; |
46 | 37 | ||
47 | struct trace_event_functions exit_syscall_print_funcs = { | 38 | struct trace_event_functions exit_syscall_print_funcs = { |
48 | .trace = print_syscall_exit, | 39 | .trace = print_syscall_exit, |
49 | }; | 40 | }; |
50 | 41 | ||
51 | struct ftrace_event_class event_class_syscall_enter = { | 42 | struct ftrace_event_class event_class_syscall_enter = { |
52 | .system = "syscalls", | 43 | .system = "syscalls", |
53 | .reg = syscall_enter_register, | 44 | .reg = syscall_enter_register, |
54 | .define_fields = syscall_enter_define_fields, | 45 | .define_fields = syscall_enter_define_fields, |
55 | .get_fields = syscall_get_enter_fields, | 46 | .get_fields = syscall_get_enter_fields, |
56 | .raw_init = init_syscall_trace, | 47 | .raw_init = init_syscall_trace, |
57 | }; | 48 | }; |
58 | 49 | ||
59 | struct ftrace_event_class event_class_syscall_exit = { | 50 | struct ftrace_event_class event_class_syscall_exit = { |
60 | .system = "syscalls", | 51 | .system = "syscalls", |
61 | .reg = syscall_exit_register, | 52 | .reg = syscall_exit_register, |
62 | .define_fields = syscall_exit_define_fields, | 53 | .define_fields = syscall_exit_define_fields, |
63 | .get_fields = syscall_get_exit_fields, | 54 | .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), |
64 | .raw_init = init_syscall_trace, | 55 | .raw_init = init_syscall_trace, |
65 | }; | 56 | }; |
66 | 57 | ||
67 | extern unsigned long __start_syscalls_metadata[]; | 58 | extern unsigned long __start_syscalls_metadata[]; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 25915832291a..9da289c34f22 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <linux/highuid.h> | 12 | #include <linux/highuid.h> |
13 | #include <linux/cred.h> | 13 | #include <linux/cred.h> |
14 | 14 | ||
15 | static struct kmem_cache *user_ns_cachep __read_mostly; | ||
16 | |||
15 | /* | 17 | /* |
16 | * Create a new user namespace, deriving the creator from the user in the | 18 | * Create a new user namespace, deriving the creator from the user in the |
17 | * passed credentials, and replacing that user with the new root user for the | 19 | * passed credentials, and replacing that user with the new root user for the |
@@ -26,7 +28,7 @@ int create_user_ns(struct cred *new) | |||
26 | struct user_struct *root_user; | 28 | struct user_struct *root_user; |
27 | int n; | 29 | int n; |
28 | 30 | ||
29 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); | 31 | ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL); |
30 | if (!ns) | 32 | if (!ns) |
31 | return -ENOMEM; | 33 | return -ENOMEM; |
32 | 34 | ||
@@ -38,7 +40,7 @@ int create_user_ns(struct cred *new) | |||
38 | /* Alloc new root user. */ | 40 | /* Alloc new root user. */ |
39 | root_user = alloc_uid(ns, 0); | 41 | root_user = alloc_uid(ns, 0); |
40 | if (!root_user) { | 42 | if (!root_user) { |
41 | kfree(ns); | 43 | kmem_cache_free(user_ns_cachep, ns); |
42 | return -ENOMEM; | 44 | return -ENOMEM; |
43 | } | 45 | } |
44 | 46 | ||
@@ -71,7 +73,7 @@ static void free_user_ns_work(struct work_struct *work) | |||
71 | struct user_namespace *ns = | 73 | struct user_namespace *ns = |
72 | container_of(work, struct user_namespace, destroyer); | 74 | container_of(work, struct user_namespace, destroyer); |
73 | free_uid(ns->creator); | 75 | free_uid(ns->creator); |
74 | kfree(ns); | 76 | kmem_cache_free(user_ns_cachep, ns); |
75 | } | 77 | } |
76 | 78 | ||
77 | void free_user_ns(struct kref *kref) | 79 | void free_user_ns(struct kref *kref) |
@@ -126,3 +128,10 @@ gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t | |||
126 | /* No useful relationship so no mapping */ | 128 | /* No useful relationship so no mapping */ |
127 | return overflowgid; | 129 | return overflowgid; |
128 | } | 130 | } |
131 | |||
132 | static __init int user_namespaces_init(void) | ||
133 | { | ||
134 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); | ||
135 | return 0; | ||
136 | } | ||
137 | module_init(user_namespaces_init); | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e7b575ac33c..d7ebdf4cea98 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -118,12 +118,12 @@ static void __touch_watchdog(void) | |||
118 | { | 118 | { |
119 | int this_cpu = smp_processor_id(); | 119 | int this_cpu = smp_processor_id(); |
120 | 120 | ||
121 | __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); | 121 | __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); |
122 | } | 122 | } |
123 | 123 | ||
124 | void touch_softlockup_watchdog(void) | 124 | void touch_softlockup_watchdog(void) |
125 | { | 125 | { |
126 | __raw_get_cpu_var(watchdog_touch_ts) = 0; | 126 | __this_cpu_write(watchdog_touch_ts, 0); |
127 | } | 127 | } |
128 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 128 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
129 | 129 | ||
@@ -167,12 +167,12 @@ void touch_softlockup_watchdog_sync(void) | |||
167 | /* watchdog detector functions */ | 167 | /* watchdog detector functions */ |
168 | static int is_hardlockup(void) | 168 | static int is_hardlockup(void) |
169 | { | 169 | { |
170 | unsigned long hrint = __get_cpu_var(hrtimer_interrupts); | 170 | unsigned long hrint = __this_cpu_read(hrtimer_interrupts); |
171 | 171 | ||
172 | if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) | 172 | if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) |
173 | return 1; | 173 | return 1; |
174 | 174 | ||
175 | __get_cpu_var(hrtimer_interrupts_saved) = hrint; | 175 | __this_cpu_write(hrtimer_interrupts_saved, hrint); |
176 | return 0; | 176 | return 0; |
177 | } | 177 | } |
178 | #endif | 178 | #endif |
@@ -205,8 +205,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, | |||
205 | /* Ensure the watchdog never gets throttled */ | 205 | /* Ensure the watchdog never gets throttled */ |
206 | event->hw.interrupts = 0; | 206 | event->hw.interrupts = 0; |
207 | 207 | ||
208 | if (__get_cpu_var(watchdog_nmi_touch) == true) { | 208 | if (__this_cpu_read(watchdog_nmi_touch) == true) { |
209 | __get_cpu_var(watchdog_nmi_touch) = false; | 209 | __this_cpu_write(watchdog_nmi_touch, false); |
210 | return; | 210 | return; |
211 | } | 211 | } |
212 | 212 | ||
@@ -220,7 +220,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, | |||
220 | int this_cpu = smp_processor_id(); | 220 | int this_cpu = smp_processor_id(); |
221 | 221 | ||
222 | /* only print hardlockups once */ | 222 | /* only print hardlockups once */ |
223 | if (__get_cpu_var(hard_watchdog_warn) == true) | 223 | if (__this_cpu_read(hard_watchdog_warn) == true) |
224 | return; | 224 | return; |
225 | 225 | ||
226 | if (hardlockup_panic) | 226 | if (hardlockup_panic) |
@@ -228,16 +228,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, | |||
228 | else | 228 | else |
229 | WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); | 229 | WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); |
230 | 230 | ||
231 | __get_cpu_var(hard_watchdog_warn) = true; | 231 | __this_cpu_write(hard_watchdog_warn, true); |
232 | return; | 232 | return; |
233 | } | 233 | } |
234 | 234 | ||
235 | __get_cpu_var(hard_watchdog_warn) = false; | 235 | __this_cpu_write(hard_watchdog_warn, false); |
236 | return; | 236 | return; |
237 | } | 237 | } |
238 | static void watchdog_interrupt_count(void) | 238 | static void watchdog_interrupt_count(void) |
239 | { | 239 | { |
240 | __get_cpu_var(hrtimer_interrupts)++; | 240 | __this_cpu_inc(hrtimer_interrupts); |
241 | } | 241 | } |
242 | #else | 242 | #else |
243 | static inline void watchdog_interrupt_count(void) { return; } | 243 | static inline void watchdog_interrupt_count(void) { return; } |
@@ -246,7 +246,7 @@ static inline void watchdog_interrupt_count(void) { return; } | |||
246 | /* watchdog kicker functions */ | 246 | /* watchdog kicker functions */ |
247 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | 247 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) |
248 | { | 248 | { |
249 | unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); | 249 | unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); |
250 | struct pt_regs *regs = get_irq_regs(); | 250 | struct pt_regs *regs = get_irq_regs(); |
251 | int duration; | 251 | int duration; |
252 | 252 | ||
@@ -254,18 +254,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
254 | watchdog_interrupt_count(); | 254 | watchdog_interrupt_count(); |
255 | 255 | ||
256 | /* kick the softlockup detector */ | 256 | /* kick the softlockup detector */ |
257 | wake_up_process(__get_cpu_var(softlockup_watchdog)); | 257 | wake_up_process(__this_cpu_read(softlockup_watchdog)); |
258 | 258 | ||
259 | /* .. and repeat */ | 259 | /* .. and repeat */ |
260 | hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); | 260 | hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); |
261 | 261 | ||
262 | if (touch_ts == 0) { | 262 | if (touch_ts == 0) { |
263 | if (unlikely(__get_cpu_var(softlockup_touch_sync))) { | 263 | if (unlikely(__this_cpu_read(softlockup_touch_sync))) { |
264 | /* | 264 | /* |
265 | * If the time stamp was touched atomically | 265 | * If the time stamp was touched atomically |
266 | * make sure the scheduler tick is up to date. | 266 | * make sure the scheduler tick is up to date. |
267 | */ | 267 | */ |
268 | __get_cpu_var(softlockup_touch_sync) = false; | 268 | __this_cpu_write(softlockup_touch_sync, false); |
269 | sched_clock_tick(); | 269 | sched_clock_tick(); |
270 | } | 270 | } |
271 | __touch_watchdog(); | 271 | __touch_watchdog(); |
@@ -281,7 +281,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
281 | duration = is_softlockup(touch_ts); | 281 | duration = is_softlockup(touch_ts); |
282 | if (unlikely(duration)) { | 282 | if (unlikely(duration)) { |
283 | /* only warn once */ | 283 | /* only warn once */ |
284 | if (__get_cpu_var(soft_watchdog_warn) == true) | 284 | if (__this_cpu_read(soft_watchdog_warn) == true) |
285 | return HRTIMER_RESTART; | 285 | return HRTIMER_RESTART; |
286 | 286 | ||
287 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", | 287 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", |
@@ -296,9 +296,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
296 | 296 | ||
297 | if (softlockup_panic) | 297 | if (softlockup_panic) |
298 | panic("softlockup: hung tasks"); | 298 | panic("softlockup: hung tasks"); |
299 | __get_cpu_var(soft_watchdog_warn) = true; | 299 | __this_cpu_write(soft_watchdog_warn, true); |
300 | } else | 300 | } else |
301 | __get_cpu_var(soft_watchdog_warn) = false; | 301 | __this_cpu_write(soft_watchdog_warn, false); |
302 | 302 | ||
303 | return HRTIMER_RESTART; | 303 | return HRTIMER_RESTART; |
304 | } | 304 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e785b0f2aea5..8ee6ec82f88a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -932,6 +932,38 @@ static void insert_work(struct cpu_workqueue_struct *cwq, | |||
932 | wake_up_worker(gcwq); | 932 | wake_up_worker(gcwq); |
933 | } | 933 | } |
934 | 934 | ||
935 | /* | ||
936 | * Test whether @work is being queued from another work executing on the | ||
937 | * same workqueue. This is rather expensive and should only be used from | ||
938 | * cold paths. | ||
939 | */ | ||
940 | static bool is_chained_work(struct workqueue_struct *wq) | ||
941 | { | ||
942 | unsigned long flags; | ||
943 | unsigned int cpu; | ||
944 | |||
945 | for_each_gcwq_cpu(cpu) { | ||
946 | struct global_cwq *gcwq = get_gcwq(cpu); | ||
947 | struct worker *worker; | ||
948 | struct hlist_node *pos; | ||
949 | int i; | ||
950 | |||
951 | spin_lock_irqsave(&gcwq->lock, flags); | ||
952 | for_each_busy_worker(worker, i, pos, gcwq) { | ||
953 | if (worker->task != current) | ||
954 | continue; | ||
955 | spin_unlock_irqrestore(&gcwq->lock, flags); | ||
956 | /* | ||
957 | * I'm @worker, no locking necessary. See if @work | ||
958 | * is headed to the same workqueue. | ||
959 | */ | ||
960 | return worker->current_cwq->wq == wq; | ||
961 | } | ||
962 | spin_unlock_irqrestore(&gcwq->lock, flags); | ||
963 | } | ||
964 | return false; | ||
965 | } | ||
966 | |||
935 | static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | 967 | static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, |
936 | struct work_struct *work) | 968 | struct work_struct *work) |
937 | { | 969 | { |
@@ -943,7 +975,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
943 | 975 | ||
944 | debug_work_activate(work); | 976 | debug_work_activate(work); |
945 | 977 | ||
946 | if (WARN_ON_ONCE(wq->flags & WQ_DYING)) | 978 | /* if dying, only works from the same workqueue are allowed */ |
979 | if (unlikely(wq->flags & WQ_DYING) && | ||
980 | WARN_ON_ONCE(!is_chained_work(wq))) | ||
947 | return; | 981 | return; |
948 | 982 | ||
949 | /* determine gcwq to use */ | 983 | /* determine gcwq to use */ |
@@ -2936,11 +2970,35 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key); | |||
2936 | */ | 2970 | */ |
2937 | void destroy_workqueue(struct workqueue_struct *wq) | 2971 | void destroy_workqueue(struct workqueue_struct *wq) |
2938 | { | 2972 | { |
2973 | unsigned int flush_cnt = 0; | ||
2939 | unsigned int cpu; | 2974 | unsigned int cpu; |
2940 | 2975 | ||
2976 | /* | ||
2977 | * Mark @wq dying and drain all pending works. Once WQ_DYING is | ||
2978 | * set, only chain queueing is allowed. IOW, only currently | ||
2979 | * pending or running work items on @wq can queue further work | ||
2980 | * items on it. @wq is flushed repeatedly until it becomes empty. | ||
2981 | * The number of flushing is detemined by the depth of chaining and | ||
2982 | * should be relatively short. Whine if it takes too long. | ||
2983 | */ | ||
2941 | wq->flags |= WQ_DYING; | 2984 | wq->flags |= WQ_DYING; |
2985 | reflush: | ||
2942 | flush_workqueue(wq); | 2986 | flush_workqueue(wq); |
2943 | 2987 | ||
2988 | for_each_cwq_cpu(cpu, wq) { | ||
2989 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | ||
2990 | |||
2991 | if (!cwq->nr_active && list_empty(&cwq->delayed_works)) | ||
2992 | continue; | ||
2993 | |||
2994 | if (++flush_cnt == 10 || | ||
2995 | (flush_cnt % 100 == 0 && flush_cnt <= 1000)) | ||
2996 | printk(KERN_WARNING "workqueue %s: flush on " | ||
2997 | "destruction isn't complete after %u tries\n", | ||
2998 | wq->name, flush_cnt); | ||
2999 | goto reflush; | ||
3000 | } | ||
3001 | |||
2944 | /* | 3002 | /* |
2945 | * wq list is used to freeze wq, remove from list after | 3003 | * wq list is used to freeze wq, remove from list after |
2946 | * flushing is complete in case freeze races us. | 3004 | * flushing is complete in case freeze races us. |