diff options
author | James Morris <jmorris@namei.org> | 2011-03-07 18:55:06 -0500 |
---|---|---|
committer | James Morris <jmorris@namei.org> | 2011-03-07 18:55:06 -0500 |
commit | 1cc26bada9f6807814806db2f0d78792eecdac71 (patch) | |
tree | 5509b5139db04af6c13db0a580c84116a4a54039 /kernel | |
parent | eae61f3c829439f8f9121b5cd48a14be04df451f (diff) | |
parent | 214d93b02c4fe93638ad268613c9702a81ed9192 (diff) |
Merge branch 'master'; commit 'v2.6.38-rc7' into next
Diffstat (limited to 'kernel')
73 files changed, 1425 insertions, 885 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0b5ff083fa22..353d3fe8ba33 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -43,7 +43,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |||
43 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 43 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
44 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o | 44 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o |
45 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 45 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
46 | obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o | 46 | obj-$(CONFIG_SMP) += smp.o |
47 | ifneq ($(CONFIG_SMP),y) | 47 | ifneq ($(CONFIG_SMP),y) |
48 | obj-y += up.o | 48 | obj-y += up.o |
49 | endif | 49 | endif |
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ | |||
100 | obj-$(CONFIG_TRACING) += trace/ | 100 | obj-$(CONFIG_TRACING) += trace/ |
101 | obj-$(CONFIG_X86_DS) += trace/ | 101 | obj-$(CONFIG_X86_DS) += trace/ |
102 | obj-$(CONFIG_RING_BUFFER) += trace/ | 102 | obj-$(CONFIG_RING_BUFFER) += trace/ |
103 | obj-$(CONFIG_TRACEPOINTS) += trace/ | ||
103 | obj-$(CONFIG_SMP) += sched_cpupri.o | 104 | obj-$(CONFIG_SMP) += sched_cpupri.o |
104 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 105 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
105 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 106 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
@@ -121,7 +122,7 @@ $(obj)/configs.o: $(obj)/config_data.h | |||
121 | # config_data.h contains the same information as ikconfig.h but gzipped. | 122 | # config_data.h contains the same information as ikconfig.h but gzipped. |
122 | # Info from config_data can be extracted from /proc/config* | 123 | # Info from config_data can be extracted from /proc/config* |
123 | targets += config_data.gz | 124 | targets += config_data.gz |
124 | $(obj)/config_data.gz: .config FORCE | 125 | $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE |
125 | $(call if_changed,gzip) | 126 | $(call if_changed,gzip) |
126 | 127 | ||
127 | quiet_cmd_ikconfiggz = IKCFG $@ | 128 | quiet_cmd_ikconfiggz = IKCFG $@ |
diff --git a/kernel/audit.c b/kernel/audit.c index 77770a034d59..e4956244ae50 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -400,7 +400,7 @@ static void kauditd_send_skb(struct sk_buff *skb) | |||
400 | if (err < 0) { | 400 | if (err < 0) { |
401 | BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ | 401 | BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ |
402 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | 402 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); |
403 | audit_log_lost("auditd dissapeared\n"); | 403 | audit_log_lost("auditd disappeared\n"); |
404 | audit_pid = 0; | 404 | audit_pid = 0; |
405 | /* we might get lucky and get this in the next auditd */ | 405 | /* we might get lucky and get this in the next auditd */ |
406 | audit_hold_skb(skb); | 406 | audit_hold_skb(skb); |
diff --git a/kernel/capability.c b/kernel/capability.c index 2f05303715a5..9e9385f132c8 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -306,7 +306,7 @@ int capable(int cap) | |||
306 | BUG(); | 306 | BUG(); |
307 | } | 307 | } |
308 | 308 | ||
309 | if (security_capable(cap) == 0) { | 309 | if (security_capable(current_cred(), cap) == 0) { |
310 | current->flags |= PF_SUPERPRIV; | 310 | current->flags |= PF_SUPERPRIV; |
311 | return 1; | 311 | return 1; |
312 | } | 312 | } |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 51cddc11cd85..b24d7027b83c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -763,9 +763,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); | |||
763 | * -> cgroup_mkdir. | 763 | * -> cgroup_mkdir. |
764 | */ | 764 | */ |
765 | 765 | ||
766 | static struct dentry *cgroup_lookup(struct inode *dir, | ||
767 | struct dentry *dentry, struct nameidata *nd); | ||
768 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); | 766 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); |
767 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); | ||
769 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 768 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
770 | static int cgroup_populate_dir(struct cgroup *cgrp); | 769 | static int cgroup_populate_dir(struct cgroup *cgrp); |
771 | static const struct inode_operations cgroup_dir_inode_operations; | 770 | static const struct inode_operations cgroup_dir_inode_operations; |
@@ -862,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
862 | iput(inode); | 861 | iput(inode); |
863 | } | 862 | } |
864 | 863 | ||
864 | static int cgroup_delete(const struct dentry *d) | ||
865 | { | ||
866 | return 1; | ||
867 | } | ||
868 | |||
865 | static void remove_dir(struct dentry *d) | 869 | static void remove_dir(struct dentry *d) |
866 | { | 870 | { |
867 | struct dentry *parent = dget(d->d_parent); | 871 | struct dentry *parent = dget(d->d_parent); |
@@ -912,7 +916,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
912 | 916 | ||
913 | parent = dentry->d_parent; | 917 | parent = dentry->d_parent; |
914 | spin_lock(&parent->d_lock); | 918 | spin_lock(&parent->d_lock); |
915 | spin_lock(&dentry->d_lock); | 919 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
916 | list_del_init(&dentry->d_u.d_child); | 920 | list_del_init(&dentry->d_u.d_child); |
917 | spin_unlock(&dentry->d_lock); | 921 | spin_unlock(&dentry->d_lock); |
918 | spin_unlock(&parent->d_lock); | 922 | spin_unlock(&parent->d_lock); |
@@ -1451,6 +1455,11 @@ static int cgroup_set_super(struct super_block *sb, void *data) | |||
1451 | 1455 | ||
1452 | static int cgroup_get_rootdir(struct super_block *sb) | 1456 | static int cgroup_get_rootdir(struct super_block *sb) |
1453 | { | 1457 | { |
1458 | static const struct dentry_operations cgroup_dops = { | ||
1459 | .d_iput = cgroup_diput, | ||
1460 | .d_delete = cgroup_delete, | ||
1461 | }; | ||
1462 | |||
1454 | struct inode *inode = | 1463 | struct inode *inode = |
1455 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); | 1464 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); |
1456 | struct dentry *dentry; | 1465 | struct dentry *dentry; |
@@ -1468,6 +1477,8 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1468 | return -ENOMEM; | 1477 | return -ENOMEM; |
1469 | } | 1478 | } |
1470 | sb->s_root = dentry; | 1479 | sb->s_root = dentry; |
1480 | /* for everything else we want ->d_op set */ | ||
1481 | sb->s_d_op = &cgroup_dops; | ||
1471 | return 0; | 1482 | return 0; |
1472 | } | 1483 | } |
1473 | 1484 | ||
@@ -2197,6 +2208,14 @@ static const struct inode_operations cgroup_dir_inode_operations = { | |||
2197 | .rename = cgroup_rename, | 2208 | .rename = cgroup_rename, |
2198 | }; | 2209 | }; |
2199 | 2210 | ||
2211 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
2212 | { | ||
2213 | if (dentry->d_name.len > NAME_MAX) | ||
2214 | return ERR_PTR(-ENAMETOOLONG); | ||
2215 | d_add(dentry, NULL); | ||
2216 | return NULL; | ||
2217 | } | ||
2218 | |||
2200 | /* | 2219 | /* |
2201 | * Check if a file is a control file | 2220 | * Check if a file is a control file |
2202 | */ | 2221 | */ |
@@ -2207,26 +2226,6 @@ static inline struct cftype *__file_cft(struct file *file) | |||
2207 | return __d_cft(file->f_dentry); | 2226 | return __d_cft(file->f_dentry); |
2208 | } | 2227 | } |
2209 | 2228 | ||
2210 | static int cgroup_delete_dentry(const struct dentry *dentry) | ||
2211 | { | ||
2212 | return 1; | ||
2213 | } | ||
2214 | |||
2215 | static struct dentry *cgroup_lookup(struct inode *dir, | ||
2216 | struct dentry *dentry, struct nameidata *nd) | ||
2217 | { | ||
2218 | static const struct dentry_operations cgroup_dentry_operations = { | ||
2219 | .d_delete = cgroup_delete_dentry, | ||
2220 | .d_iput = cgroup_diput, | ||
2221 | }; | ||
2222 | |||
2223 | if (dentry->d_name.len > NAME_MAX) | ||
2224 | return ERR_PTR(-ENAMETOOLONG); | ||
2225 | d_set_d_op(dentry, &cgroup_dentry_operations); | ||
2226 | d_add(dentry, NULL); | ||
2227 | return NULL; | ||
2228 | } | ||
2229 | |||
2230 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, | 2229 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, |
2231 | struct super_block *sb) | 2230 | struct super_block *sb) |
2232 | { | 2231 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4349935c2ad8..e92e98189032 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1575 | return -ENODEV; | 1575 | return -ENODEV; |
1576 | 1576 | ||
1577 | trialcs = alloc_trial_cpuset(cs); | 1577 | trialcs = alloc_trial_cpuset(cs); |
1578 | if (!trialcs) | 1578 | if (!trialcs) { |
1579 | return -ENOMEM; | 1579 | retval = -ENOMEM; |
1580 | goto out; | ||
1581 | } | ||
1580 | 1582 | ||
1581 | switch (cft->private) { | 1583 | switch (cft->private) { |
1582 | case FILE_CPULIST: | 1584 | case FILE_CPULIST: |
@@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1591 | } | 1593 | } |
1592 | 1594 | ||
1593 | free_trial_cpuset(trialcs); | 1595 | free_trial_cpuset(trialcs); |
1596 | out: | ||
1594 | cgroup_unlock(); | 1597 | cgroup_unlock(); |
1595 | return retval; | 1598 | return retval; |
1596 | } | 1599 | } |
diff --git a/kernel/cred.c b/kernel/cred.c index 6a1aa004e376..3a9d6dd53a6c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void) | |||
252 | #endif | 252 | #endif |
253 | 253 | ||
254 | atomic_set(&new->usage, 1); | 254 | atomic_set(&new->usage, 1); |
255 | #ifdef CONFIG_DEBUG_CREDENTIALS | ||
256 | new->magic = CRED_MAGIC; | ||
257 | #endif | ||
255 | 258 | ||
256 | if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) | 259 | if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) |
257 | goto error; | 260 | goto error; |
258 | 261 | ||
259 | #ifdef CONFIG_DEBUG_CREDENTIALS | ||
260 | new->magic = CRED_MAGIC; | ||
261 | #endif | ||
262 | return new; | 262 | return new; |
263 | 263 | ||
264 | error: | 264 | error: |
@@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
657 | validate_creds(old); | 657 | validate_creds(old); |
658 | 658 | ||
659 | *new = *old; | 659 | *new = *old; |
660 | atomic_set(&new->usage, 1); | ||
661 | set_cred_subscribers(new, 0); | ||
660 | get_uid(new->user); | 662 | get_uid(new->user); |
661 | get_group_info(new->group_info); | 663 | get_group_info(new->group_info); |
662 | 664 | ||
@@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
674 | if (security_prepare_creds(new, old, GFP_KERNEL) < 0) | 676 | if (security_prepare_creds(new, old, GFP_KERNEL) < 0) |
675 | goto error; | 677 | goto error; |
676 | 678 | ||
677 | atomic_set(&new->usage, 1); | ||
678 | set_cred_subscribers(new, 0); | ||
679 | put_cred(old); | 679 | put_cred(old); |
680 | validate_creds(new); | 680 | validate_creds(new); |
681 | return new; | 681 | return new; |
@@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred) | |||
748 | if (cred->magic != CRED_MAGIC) | 748 | if (cred->magic != CRED_MAGIC) |
749 | return true; | 749 | return true; |
750 | #ifdef CONFIG_SECURITY_SELINUX | 750 | #ifdef CONFIG_SECURITY_SELINUX |
751 | if (selinux_is_enabled()) { | 751 | /* |
752 | * cred->security == NULL if security_cred_alloc_blank() or | ||
753 | * security_prepare_creds() returned an error. | ||
754 | */ | ||
755 | if (selinux_is_enabled() && cred->security) { | ||
752 | if ((unsigned long) cred->security < PAGE_SIZE) | 756 | if ((unsigned long) cred->security < PAGE_SIZE) |
753 | return true; | 757 | return true; |
754 | if ((*(u32 *)cred->security & 0xffffff00) == | 758 | if ((*(u32 *)cred->security & 0xffffff00) == |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index a6e729766821..bd3e8e29caa3 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -2914,7 +2914,7 @@ static void __init kdb_cmd_init(void) | |||
2914 | } | 2914 | } |
2915 | } | 2915 | } |
2916 | 2916 | ||
2917 | /* Intialize kdb_printf, breakpoint tables and kdb state */ | 2917 | /* Initialize kdb_printf, breakpoint tables and kdb state */ |
2918 | void __init kdb_init(int lvl) | 2918 | void __init kdb_init(int lvl) |
2919 | { | 2919 | { |
2920 | static int kdb_init_lvl = KDB_NOT_INITIALIZED; | 2920 | static int kdb_init_lvl = KDB_NOT_INITIALIZED; |
diff --git a/kernel/exit.c b/kernel/exit.c index 89c74861a3da..f9a45ebcc7b1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code) | |||
994 | exit_fs(tsk); | 994 | exit_fs(tsk); |
995 | check_stack_usage(); | 995 | check_stack_usage(); |
996 | exit_thread(); | 996 | exit_thread(); |
997 | |||
998 | /* | ||
999 | * Flush inherited counters to the parent - before the parent | ||
1000 | * gets woken up by child-exit notifications. | ||
1001 | * | ||
1002 | * because of cgroup mode, must be called before cgroup_exit() | ||
1003 | */ | ||
1004 | perf_event_exit_task(tsk); | ||
1005 | |||
997 | cgroup_exit(tsk, 1); | 1006 | cgroup_exit(tsk, 1); |
998 | 1007 | ||
999 | if (group_dead) | 1008 | if (group_dead) |
@@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code) | |||
1007 | * FIXME: do that only when needed, using sched_exit tracepoint | 1016 | * FIXME: do that only when needed, using sched_exit tracepoint |
1008 | */ | 1017 | */ |
1009 | flush_ptrace_hw_breakpoint(tsk); | 1018 | flush_ptrace_hw_breakpoint(tsk); |
1010 | /* | ||
1011 | * Flush inherited counters to the parent - before the parent | ||
1012 | * gets woken up by child-exit notifications. | ||
1013 | */ | ||
1014 | perf_event_exit_task(tsk); | ||
1015 | 1019 | ||
1016 | exit_notify(tsk, group_dead); | 1020 | exit_notify(tsk, group_dead); |
1017 | #ifdef CONFIG_NUMA | 1021 | #ifdef CONFIG_NUMA |
diff --git a/kernel/fork.c b/kernel/fork.c index d9b44f20b6b0..25e429152ddc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -66,6 +66,7 @@ | |||
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | 67 | #include <linux/user-return-notifier.h> |
68 | #include <linux/oom.h> | 68 | #include <linux/oom.h> |
69 | #include <linux/khugepaged.h> | ||
69 | 70 | ||
70 | #include <asm/pgtable.h> | 71 | #include <asm/pgtable.h> |
71 | #include <asm/pgalloc.h> | 72 | #include <asm/pgalloc.h> |
@@ -330,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
330 | retval = ksm_fork(mm, oldmm); | 331 | retval = ksm_fork(mm, oldmm); |
331 | if (retval) | 332 | if (retval) |
332 | goto out; | 333 | goto out; |
334 | retval = khugepaged_fork(mm, oldmm); | ||
335 | if (retval) | ||
336 | goto out; | ||
333 | 337 | ||
334 | prev = NULL; | 338 | prev = NULL; |
335 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 339 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
@@ -529,6 +533,9 @@ void __mmdrop(struct mm_struct *mm) | |||
529 | mm_free_pgd(mm); | 533 | mm_free_pgd(mm); |
530 | destroy_context(mm); | 534 | destroy_context(mm); |
531 | mmu_notifier_mm_destroy(mm); | 535 | mmu_notifier_mm_destroy(mm); |
536 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
537 | VM_BUG_ON(mm->pmd_huge_pte); | ||
538 | #endif | ||
532 | free_mm(mm); | 539 | free_mm(mm); |
533 | } | 540 | } |
534 | EXPORT_SYMBOL_GPL(__mmdrop); | 541 | EXPORT_SYMBOL_GPL(__mmdrop); |
@@ -543,6 +550,7 @@ void mmput(struct mm_struct *mm) | |||
543 | if (atomic_dec_and_test(&mm->mm_users)) { | 550 | if (atomic_dec_and_test(&mm->mm_users)) { |
544 | exit_aio(mm); | 551 | exit_aio(mm); |
545 | ksm_exit(mm); | 552 | ksm_exit(mm); |
553 | khugepaged_exit(mm); /* must run before exit_mmap */ | ||
546 | exit_mmap(mm); | 554 | exit_mmap(mm); |
547 | set_mm_exe_file(mm, NULL); | 555 | set_mm_exe_file(mm, NULL); |
548 | if (!list_empty(&mm->mmlist)) { | 556 | if (!list_empty(&mm->mmlist)) { |
@@ -669,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
669 | mm->token_priority = 0; | 677 | mm->token_priority = 0; |
670 | mm->last_interval = 0; | 678 | mm->last_interval = 0; |
671 | 679 | ||
680 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
681 | mm->pmd_huge_pte = NULL; | ||
682 | #endif | ||
683 | |||
672 | if (!mm_init(mm, tsk)) | 684 | if (!mm_init(mm, tsk)) |
673 | goto fail_nomem; | 685 | goto fail_nomem; |
674 | 686 | ||
@@ -910,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
910 | 922 | ||
911 | sig->oom_adj = current->signal->oom_adj; | 923 | sig->oom_adj = current->signal->oom_adj; |
912 | sig->oom_score_adj = current->signal->oom_score_adj; | 924 | sig->oom_score_adj = current->signal->oom_score_adj; |
925 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | ||
913 | 926 | ||
914 | mutex_init(&sig->cred_guard_mutex); | 927 | mutex_init(&sig->cred_guard_mutex); |
915 | 928 | ||
@@ -1410,23 +1423,6 @@ long do_fork(unsigned long clone_flags, | |||
1410 | } | 1423 | } |
1411 | 1424 | ||
1412 | /* | 1425 | /* |
1413 | * We hope to recycle these flags after 2.6.26 | ||
1414 | */ | ||
1415 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
1416 | static int __read_mostly count = 100; | ||
1417 | |||
1418 | if (count > 0 && printk_ratelimit()) { | ||
1419 | char comm[TASK_COMM_LEN]; | ||
1420 | |||
1421 | count--; | ||
1422 | printk(KERN_INFO "fork(): process `%s' used deprecated " | ||
1423 | "clone flags 0x%lx\n", | ||
1424 | get_task_comm(comm, current), | ||
1425 | clone_flags & CLONE_STOPPED); | ||
1426 | } | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * When called from kernel_thread, don't do user tracing stuff. | 1426 | * When called from kernel_thread, don't do user tracing stuff. |
1431 | */ | 1427 | */ |
1432 | if (likely(user_mode(regs))) | 1428 | if (likely(user_mode(regs))) |
@@ -1464,16 +1460,7 @@ long do_fork(unsigned long clone_flags, | |||
1464 | */ | 1460 | */ |
1465 | p->flags &= ~PF_STARTING; | 1461 | p->flags &= ~PF_STARTING; |
1466 | 1462 | ||
1467 | if (unlikely(clone_flags & CLONE_STOPPED)) { | 1463 | wake_up_new_task(p, clone_flags); |
1468 | /* | ||
1469 | * We'll start up with an immediate SIGSTOP. | ||
1470 | */ | ||
1471 | sigaddset(&p->pending.signal, SIGSTOP); | ||
1472 | set_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1473 | __set_task_state(p, TASK_STOPPED); | ||
1474 | } else { | ||
1475 | wake_up_new_task(p, clone_flags); | ||
1476 | } | ||
1477 | 1464 | ||
1478 | tracehook_report_clone_complete(trace, regs, | 1465 | tracehook_report_clone_complete(trace, regs, |
1479 | clone_flags, nr, p); | 1466 | clone_flags, nr, p); |
diff --git a/kernel/freezer.c b/kernel/freezer.c index bd1d42b17cb2..66ecd2ead215 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
@@ -104,8 +104,13 @@ bool freeze_task(struct task_struct *p, bool sig_only) | |||
104 | } | 104 | } |
105 | 105 | ||
106 | if (should_send_signal(p)) { | 106 | if (should_send_signal(p)) { |
107 | if (!signal_pending(p)) | 107 | fake_signal_wake_up(p); |
108 | fake_signal_wake_up(p); | 108 | /* |
109 | * fake_signal_wake_up() goes through p's scheduler | ||
110 | * lock and guarantees that TASK_STOPPED/TRACED -> | ||
111 | * TASK_RUNNING transition can't race with task state | ||
112 | * testing in try_to_freeze_tasks(). | ||
113 | */ | ||
109 | } else if (sig_only) { | 114 | } else if (sig_only) { |
110 | return false; | 115 | return false; |
111 | } else { | 116 | } else { |
diff --git a/kernel/futex.c b/kernel/futex.c index 3019b92e6917..b766d28accd6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) | |||
233 | { | 233 | { |
234 | unsigned long address = (unsigned long)uaddr; | 234 | unsigned long address = (unsigned long)uaddr; |
235 | struct mm_struct *mm = current->mm; | 235 | struct mm_struct *mm = current->mm; |
236 | struct page *page; | 236 | struct page *page, *page_head; |
237 | int err; | 237 | int err; |
238 | 238 | ||
239 | /* | 239 | /* |
@@ -265,11 +265,46 @@ again: | |||
265 | if (err < 0) | 265 | if (err < 0) |
266 | return err; | 266 | return err; |
267 | 267 | ||
268 | page = compound_head(page); | 268 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
269 | lock_page(page); | 269 | page_head = page; |
270 | if (!page->mapping) { | 270 | if (unlikely(PageTail(page))) { |
271 | unlock_page(page); | ||
272 | put_page(page); | 271 | put_page(page); |
272 | /* serialize against __split_huge_page_splitting() */ | ||
273 | local_irq_disable(); | ||
274 | if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { | ||
275 | page_head = compound_head(page); | ||
276 | /* | ||
277 | * page_head is valid pointer but we must pin | ||
278 | * it before taking the PG_lock and/or | ||
279 | * PG_compound_lock. The moment we re-enable | ||
280 | * irqs __split_huge_page_splitting() can | ||
281 | * return and the head page can be freed from | ||
282 | * under us. We can't take the PG_lock and/or | ||
283 | * PG_compound_lock on a page that could be | ||
284 | * freed from under us. | ||
285 | */ | ||
286 | if (page != page_head) { | ||
287 | get_page(page_head); | ||
288 | put_page(page); | ||
289 | } | ||
290 | local_irq_enable(); | ||
291 | } else { | ||
292 | local_irq_enable(); | ||
293 | goto again; | ||
294 | } | ||
295 | } | ||
296 | #else | ||
297 | page_head = compound_head(page); | ||
298 | if (page != page_head) { | ||
299 | get_page(page_head); | ||
300 | put_page(page); | ||
301 | } | ||
302 | #endif | ||
303 | |||
304 | lock_page(page_head); | ||
305 | if (!page_head->mapping) { | ||
306 | unlock_page(page_head); | ||
307 | put_page(page_head); | ||
273 | goto again; | 308 | goto again; |
274 | } | 309 | } |
275 | 310 | ||
@@ -280,20 +315,20 @@ again: | |||
280 | * it's a read-only handle, it's expected that futexes attach to | 315 | * it's a read-only handle, it's expected that futexes attach to |
281 | * the object not the particular process. | 316 | * the object not the particular process. |
282 | */ | 317 | */ |
283 | if (PageAnon(page)) { | 318 | if (PageAnon(page_head)) { |
284 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ | 319 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ |
285 | key->private.mm = mm; | 320 | key->private.mm = mm; |
286 | key->private.address = address; | 321 | key->private.address = address; |
287 | } else { | 322 | } else { |
288 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ | 323 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
289 | key->shared.inode = page->mapping->host; | 324 | key->shared.inode = page_head->mapping->host; |
290 | key->shared.pgoff = page->index; | 325 | key->shared.pgoff = page_head->index; |
291 | } | 326 | } |
292 | 327 | ||
293 | get_futex_key_refs(key); | 328 | get_futex_key_refs(key); |
294 | 329 | ||
295 | unlock_page(page); | 330 | unlock_page(page_head); |
296 | put_page(page); | 331 | put_page(page_head); |
297 | return 0; | 332 | return 0; |
298 | } | 333 | } |
299 | 334 | ||
@@ -791,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
791 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 826 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
792 | 827 | ||
793 | /* | 828 | /* |
794 | * This happens when we have stolen the lock and the original | 829 | * It is possible that the next waiter (the one that brought |
795 | * pending owner did not enqueue itself back on the rt_mutex. | 830 | * this owner to the kernel) timed out and is no longer |
796 | * Thats not a tragedy. We know that way, that a lock waiter | 831 | * waiting on the lock. |
797 | * is on the fly. We make the futex_q waiter the pending owner. | ||
798 | */ | 832 | */ |
799 | if (!new_owner) | 833 | if (!new_owner) |
800 | new_owner = this->task; | 834 | new_owner = this->task; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 45da2b6920ab..0c8d7c048615 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1745,7 +1745,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, | |||
1745 | } | 1745 | } |
1746 | 1746 | ||
1747 | /* | 1747 | /* |
1748 | * A NULL parameter means "inifinte" | 1748 | * A NULL parameter means "infinite" |
1749 | */ | 1749 | */ |
1750 | if (!expires) { | 1750 | if (!expires) { |
1751 | schedule(); | 1751 | schedule(); |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 31d766bf5d2e..8e42fec7686d 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
@@ -9,9 +9,6 @@ menu "IRQ subsystem" | |||
9 | config GENERIC_HARDIRQS | 9 | config GENERIC_HARDIRQS |
10 | def_bool y | 10 | def_bool y |
11 | 11 | ||
12 | config GENERIC_HARDIRQS_NO__DO_IRQ | ||
13 | def_bool y | ||
14 | |||
15 | # Select this to disable the deprecated stuff | 12 | # Select this to disable the deprecated stuff |
16 | config GENERIC_HARDIRQS_NO_DEPRECATED | 13 | config GENERIC_HARDIRQS_NO_DEPRECATED |
17 | def_bool n | 14 | def_bool n |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e2347eb63306..3540a7190122 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
118 | 118 | ||
119 | return retval; | 119 | return retval; |
120 | } | 120 | } |
121 | |||
122 | #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
123 | |||
124 | #ifdef CONFIG_ENABLE_WARN_DEPRECATED | ||
125 | # warning __do_IRQ is deprecated. Please convert to proper flow handlers | ||
126 | #endif | ||
127 | |||
128 | /** | ||
129 | * __do_IRQ - original all in one highlevel IRQ handler | ||
130 | * @irq: the interrupt number | ||
131 | * | ||
132 | * __do_IRQ handles all normal device IRQ's (the special | ||
133 | * SMP cross-CPU interrupts have their own specific | ||
134 | * handlers). | ||
135 | * | ||
136 | * This is the original x86 implementation which is used for every | ||
137 | * interrupt type. | ||
138 | */ | ||
139 | unsigned int __do_IRQ(unsigned int irq) | ||
140 | { | ||
141 | struct irq_desc *desc = irq_to_desc(irq); | ||
142 | struct irqaction *action; | ||
143 | unsigned int status; | ||
144 | |||
145 | kstat_incr_irqs_this_cpu(irq, desc); | ||
146 | |||
147 | if (CHECK_IRQ_PER_CPU(desc->status)) { | ||
148 | irqreturn_t action_ret; | ||
149 | |||
150 | /* | ||
151 | * No locking required for CPU-local interrupts: | ||
152 | */ | ||
153 | if (desc->irq_data.chip->ack) | ||
154 | desc->irq_data.chip->ack(irq); | ||
155 | if (likely(!(desc->status & IRQ_DISABLED))) { | ||
156 | action_ret = handle_IRQ_event(irq, desc->action); | ||
157 | if (!noirqdebug) | ||
158 | note_interrupt(irq, desc, action_ret); | ||
159 | } | ||
160 | desc->irq_data.chip->end(irq); | ||
161 | return 1; | ||
162 | } | ||
163 | |||
164 | raw_spin_lock(&desc->lock); | ||
165 | if (desc->irq_data.chip->ack) | ||
166 | desc->irq_data.chip->ack(irq); | ||
167 | /* | ||
168 | * REPLAY is when Linux resends an IRQ that was dropped earlier | ||
169 | * WAITING is used by probe to mark irqs that are being tested | ||
170 | */ | ||
171 | status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); | ||
172 | status |= IRQ_PENDING; /* we _want_ to handle it */ | ||
173 | |||
174 | /* | ||
175 | * If the IRQ is disabled for whatever reason, we cannot | ||
176 | * use the action we have. | ||
177 | */ | ||
178 | action = NULL; | ||
179 | if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { | ||
180 | action = desc->action; | ||
181 | status &= ~IRQ_PENDING; /* we commit to handling */ | ||
182 | status |= IRQ_INPROGRESS; /* we are handling it */ | ||
183 | } | ||
184 | desc->status = status; | ||
185 | |||
186 | /* | ||
187 | * If there is no IRQ handler or it was disabled, exit early. | ||
188 | * Since we set PENDING, if another processor is handling | ||
189 | * a different instance of this same irq, the other processor | ||
190 | * will take care of it. | ||
191 | */ | ||
192 | if (unlikely(!action)) | ||
193 | goto out; | ||
194 | |||
195 | /* | ||
196 | * Edge triggered interrupts need to remember | ||
197 | * pending events. | ||
198 | * This applies to any hw interrupts that allow a second | ||
199 | * instance of the same irq to arrive while we are in do_IRQ | ||
200 | * or in the handler. But the code here only handles the _second_ | ||
201 | * instance of the irq, not the third or fourth. So it is mostly | ||
202 | * useful for irq hardware that does not mask cleanly in an | ||
203 | * SMP environment. | ||
204 | */ | ||
205 | for (;;) { | ||
206 | irqreturn_t action_ret; | ||
207 | |||
208 | raw_spin_unlock(&desc->lock); | ||
209 | |||
210 | action_ret = handle_IRQ_event(irq, action); | ||
211 | if (!noirqdebug) | ||
212 | note_interrupt(irq, desc, action_ret); | ||
213 | |||
214 | raw_spin_lock(&desc->lock); | ||
215 | if (likely(!(desc->status & IRQ_PENDING))) | ||
216 | break; | ||
217 | desc->status &= ~IRQ_PENDING; | ||
218 | } | ||
219 | desc->status &= ~IRQ_INPROGRESS; | ||
220 | |||
221 | out: | ||
222 | /* | ||
223 | * The ->end() handler has to deal with interrupts which got | ||
224 | * disabled while the handler was running. | ||
225 | */ | ||
226 | desc->irq_data.chip->end(irq); | ||
227 | raw_spin_unlock(&desc->lock); | ||
228 | |||
229 | return 1; | ||
230 | } | ||
231 | #endif | ||
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4571ae7e085a..99c3bc8a6fb4 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -3,6 +3,12 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/irqdesc.h> | 4 | #include <linux/irqdesc.h> |
5 | 5 | ||
6 | #ifdef CONFIG_SPARSE_IRQ | ||
7 | # define IRQ_BITMAP_BITS (NR_IRQS + 8196) | ||
8 | #else | ||
9 | # define IRQ_BITMAP_BITS NR_IRQS | ||
10 | #endif | ||
11 | |||
6 | extern int noirqdebug; | 12 | extern int noirqdebug; |
7 | 13 | ||
8 | #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) | 14 | #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9988d03797f5..2039bea31bdf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; } | |||
72 | 72 | ||
73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) |
74 | { | 74 | { |
75 | int cpu; | ||
76 | |||
75 | desc->irq_data.irq = irq; | 77 | desc->irq_data.irq = irq; |
76 | desc->irq_data.chip = &no_irq_chip; | 78 | desc->irq_data.chip = &no_irq_chip; |
77 | desc->irq_data.chip_data = NULL; | 79 | desc->irq_data.chip_data = NULL; |
@@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |||
83 | desc->irq_count = 0; | 85 | desc->irq_count = 0; |
84 | desc->irqs_unhandled = 0; | 86 | desc->irqs_unhandled = 0; |
85 | desc->name = NULL; | 87 | desc->name = NULL; |
86 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | 88 | for_each_possible_cpu(cpu) |
89 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; | ||
87 | desc_smp_init(desc, node); | 90 | desc_smp_init(desc, node); |
88 | } | 91 | } |
89 | 92 | ||
@@ -91,7 +94,7 @@ int nr_irqs = NR_IRQS; | |||
91 | EXPORT_SYMBOL_GPL(nr_irqs); | 94 | EXPORT_SYMBOL_GPL(nr_irqs); |
92 | 95 | ||
93 | static DEFINE_MUTEX(sparse_irq_lock); | 96 | static DEFINE_MUTEX(sparse_irq_lock); |
94 | static DECLARE_BITMAP(allocated_irqs, NR_IRQS); | 97 | static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); |
95 | 98 | ||
96 | #ifdef CONFIG_SPARSE_IRQ | 99 | #ifdef CONFIG_SPARSE_IRQ |
97 | 100 | ||
@@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
133 | if (!desc) | 136 | if (!desc) |
134 | return NULL; | 137 | return NULL; |
135 | /* allocate based on nr_cpu_ids */ | 138 | /* allocate based on nr_cpu_ids */ |
136 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | 139 | desc->kstat_irqs = alloc_percpu(unsigned int); |
137 | gfp, node); | ||
138 | if (!desc->kstat_irqs) | 140 | if (!desc->kstat_irqs) |
139 | goto err_desc; | 141 | goto err_desc; |
140 | 142 | ||
@@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
149 | return desc; | 151 | return desc; |
150 | 152 | ||
151 | err_kstat: | 153 | err_kstat: |
152 | kfree(desc->kstat_irqs); | 154 | free_percpu(desc->kstat_irqs); |
153 | err_desc: | 155 | err_desc: |
154 | kfree(desc); | 156 | kfree(desc); |
155 | return NULL; | 157 | return NULL; |
@@ -166,7 +168,7 @@ static void free_desc(unsigned int irq) | |||
166 | mutex_unlock(&sparse_irq_lock); | 168 | mutex_unlock(&sparse_irq_lock); |
167 | 169 | ||
168 | free_masks(desc); | 170 | free_masks(desc); |
169 | kfree(desc->kstat_irqs); | 171 | free_percpu(desc->kstat_irqs); |
170 | kfree(desc); | 172 | kfree(desc); |
171 | } | 173 | } |
172 | 174 | ||
@@ -215,6 +217,15 @@ int __init early_irq_init(void) | |||
215 | initcnt = arch_probe_nr_irqs(); | 217 | initcnt = arch_probe_nr_irqs(); |
216 | printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); | 218 | printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); |
217 | 219 | ||
220 | if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) | ||
221 | nr_irqs = IRQ_BITMAP_BITS; | ||
222 | |||
223 | if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) | ||
224 | initcnt = IRQ_BITMAP_BITS; | ||
225 | |||
226 | if (initcnt > nr_irqs) | ||
227 | nr_irqs = initcnt; | ||
228 | |||
218 | for (i = 0; i < initcnt; i++) { | 229 | for (i = 0; i < initcnt; i++) { |
219 | desc = alloc_desc(i, node); | 230 | desc = alloc_desc(i, node); |
220 | set_bit(i, allocated_irqs); | 231 | set_bit(i, allocated_irqs); |
@@ -234,7 +245,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | |||
234 | } | 245 | } |
235 | }; | 246 | }; |
236 | 247 | ||
237 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
238 | int __init early_irq_init(void) | 248 | int __init early_irq_init(void) |
239 | { | 249 | { |
240 | int count, i, node = first_online_node; | 250 | int count, i, node = first_online_node; |
@@ -250,7 +260,8 @@ int __init early_irq_init(void) | |||
250 | for (i = 0; i < count; i++) { | 260 | for (i = 0; i < count; i++) { |
251 | desc[i].irq_data.irq = i; | 261 | desc[i].irq_data.irq = i; |
252 | desc[i].irq_data.chip = &no_irq_chip; | 262 | desc[i].irq_data.chip = &no_irq_chip; |
253 | desc[i].kstat_irqs = kstat_irqs_all[i]; | 263 | /* TODO : do this allocation on-demand ... */ |
264 | desc[i].kstat_irqs = alloc_percpu(unsigned int); | ||
254 | alloc_masks(desc + i, GFP_KERNEL, node); | 265 | alloc_masks(desc + i, GFP_KERNEL, node); |
255 | desc_smp_init(desc + i, node); | 266 | desc_smp_init(desc + i, node); |
256 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 267 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
@@ -275,6 +286,22 @@ static void free_desc(unsigned int irq) | |||
275 | 286 | ||
276 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | 287 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) |
277 | { | 288 | { |
289 | #if defined(CONFIG_KSTAT_IRQS_ONDEMAND) | ||
290 | struct irq_desc *desc; | ||
291 | unsigned int i; | ||
292 | |||
293 | for (i = 0; i < cnt; i++) { | ||
294 | desc = irq_to_desc(start + i); | ||
295 | if (desc && !desc->kstat_irqs) { | ||
296 | unsigned int __percpu *stats = alloc_percpu(unsigned int); | ||
297 | |||
298 | if (!stats) | ||
299 | return -1; | ||
300 | if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL) | ||
301 | free_percpu(stats); | ||
302 | } | ||
303 | } | ||
304 | #endif | ||
278 | return start; | 305 | return start; |
279 | } | 306 | } |
280 | #endif /* !CONFIG_SPARSE_IRQ */ | 307 | #endif /* !CONFIG_SPARSE_IRQ */ |
@@ -391,7 +418,9 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
391 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | 418 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) |
392 | { | 419 | { |
393 | struct irq_desc *desc = irq_to_desc(irq); | 420 | struct irq_desc *desc = irq_to_desc(irq); |
394 | return desc ? desc->kstat_irqs[cpu] : 0; | 421 | |
422 | return desc && desc->kstat_irqs ? | ||
423 | *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; | ||
395 | } | 424 | } |
396 | 425 | ||
397 | #ifdef CONFIG_GENERIC_HARDIRQS | 426 | #ifdef CONFIG_GENERIC_HARDIRQS |
@@ -401,10 +430,10 @@ unsigned int kstat_irqs(unsigned int irq) | |||
401 | int cpu; | 430 | int cpu; |
402 | int sum = 0; | 431 | int sum = 0; |
403 | 432 | ||
404 | if (!desc) | 433 | if (!desc || !desc->kstat_irqs) |
405 | return 0; | 434 | return 0; |
406 | for_each_possible_cpu(cpu) | 435 | for_each_possible_cpu(cpu) |
407 | sum += desc->kstat_irqs[cpu]; | 436 | sum += *per_cpu_ptr(desc->kstat_irqs, cpu); |
408 | return sum; | 437 | return sum; |
409 | } | 438 | } |
410 | #endif /* CONFIG_GENERIC_HARDIRQS */ | 439 | #endif /* CONFIG_GENERIC_HARDIRQS */ |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0caa59f747dd..9033c1c70828 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -1100,7 +1100,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1100 | if (retval) | 1100 | if (retval) |
1101 | kfree(action); | 1101 | kfree(action); |
1102 | 1102 | ||
1103 | #ifdef CONFIG_DEBUG_SHIRQ | 1103 | #ifdef CONFIG_DEBUG_SHIRQ_FIXME |
1104 | if (!retval && (irqflags & IRQF_SHARED)) { | 1104 | if (!retval && (irqflags & IRQF_SHARED)) { |
1105 | /* | 1105 | /* |
1106 | * It's a shared IRQ -- the driver ought to be prepared for it | 1106 | * It's a shared IRQ -- the driver ought to be prepared for it |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 1d2541940480..441fd629ff04 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -56,6 +56,7 @@ void move_masked_irq(int irq) | |||
56 | void move_native_irq(int irq) | 56 | void move_native_irq(int irq) |
57 | { | 57 | { |
58 | struct irq_desc *desc = irq_to_desc(irq); | 58 | struct irq_desc *desc = irq_to_desc(irq); |
59 | bool masked; | ||
59 | 60 | ||
60 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | 61 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
61 | return; | 62 | return; |
@@ -63,8 +64,15 @@ void move_native_irq(int irq) | |||
63 | if (unlikely(desc->status & IRQ_DISABLED)) | 64 | if (unlikely(desc->status & IRQ_DISABLED)) |
64 | return; | 65 | return; |
65 | 66 | ||
66 | desc->irq_data.chip->irq_mask(&desc->irq_data); | 67 | /* |
68 | * Be careful vs. already masked interrupts. If this is a | ||
69 | * threaded interrupt with ONESHOT set, we can end up with an | ||
70 | * interrupt storm. | ||
71 | */ | ||
72 | masked = desc->status & IRQ_MASKED; | ||
73 | if (!masked) | ||
74 | desc->irq_data.chip->irq_mask(&desc->irq_data); | ||
67 | move_masked_irq(irq); | 75 | move_masked_irq(irq); |
68 | desc->irq_data.chip->irq_unmask(&desc->irq_data); | 76 | if (!masked) |
77 | desc->irq_data.chip->irq_unmask(&desc->irq_data); | ||
69 | } | 78 | } |
70 | |||
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 891115a929aa..dc49358b73fa 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #ifdef CONFIG_HARDIRQS_SW_RESEND | 23 | #ifdef CONFIG_HARDIRQS_SW_RESEND |
24 | 24 | ||
25 | /* Bitmap to handle software resend of interrupts: */ | 25 | /* Bitmap to handle software resend of interrupts: */ |
26 | static DECLARE_BITMAP(irqs_resend, NR_IRQS); | 26 | static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Run software resends of IRQ's | 29 | * Run software resends of IRQ's |
diff --git a/kernel/kexec.c b/kernel/kexec.c index b55045bc7563..ec19b92c7ebd 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -163,7 +163,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, | |||
163 | * just verifies it is an address we can use. | 163 | * just verifies it is an address we can use. |
164 | * | 164 | * |
165 | * Since the kernel does everything in page size chunks ensure | 165 | * Since the kernel does everything in page size chunks ensure |
166 | * the destination addreses are page aligned. Too many | 166 | * the destination addresses are page aligned. Too many |
167 | * special cases crop of when we don't do this. The most | 167 | * special cases crop of when we don't do this. The most |
168 | * insidious is getting overlapping destination addresses | 168 | * insidious is getting overlapping destination addresses |
169 | * simply because addresses are changed to page size | 169 | * simply because addresses are changed to page size |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 17110a4a4fc2..ee74b35e528d 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -241,24 +241,19 @@ static int lstats_show(struct seq_file *m, void *v) | |||
241 | seq_puts(m, "Latency Top version : v0.1\n"); | 241 | seq_puts(m, "Latency Top version : v0.1\n"); |
242 | 242 | ||
243 | for (i = 0; i < MAXLR; i++) { | 243 | for (i = 0; i < MAXLR; i++) { |
244 | if (latency_record[i].backtrace[0]) { | 244 | struct latency_record *lr = &latency_record[i]; |
245 | |||
246 | if (lr->backtrace[0]) { | ||
245 | int q; | 247 | int q; |
246 | seq_printf(m, "%i %lu %lu ", | 248 | seq_printf(m, "%i %lu %lu", |
247 | latency_record[i].count, | 249 | lr->count, lr->time, lr->max); |
248 | latency_record[i].time, | ||
249 | latency_record[i].max); | ||
250 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { | 250 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { |
251 | char sym[KSYM_SYMBOL_LEN]; | 251 | unsigned long bt = lr->backtrace[q]; |
252 | char *c; | 252 | if (!bt) |
253 | if (!latency_record[i].backtrace[q]) | ||
254 | break; | 253 | break; |
255 | if (latency_record[i].backtrace[q] == ULONG_MAX) | 254 | if (bt == ULONG_MAX) |
256 | break; | 255 | break; |
257 | sprint_symbol(sym, latency_record[i].backtrace[q]); | 256 | seq_printf(m, " %ps", (void *)bt); |
258 | c = strchr(sym, '+'); | ||
259 | if (c) | ||
260 | *c = 0; | ||
261 | seq_printf(m, "%s ", sym); | ||
262 | } | 257 | } |
263 | seq_printf(m, "\n"); | 258 | seq_printf(m, "\n"); |
264 | } | 259 | } |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 42ba65dff7d9..0d2058da80f5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -2292,22 +2292,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
2292 | } | 2292 | } |
2293 | 2293 | ||
2294 | /* | 2294 | /* |
2295 | * Debugging helper: via this flag we know that we are in | ||
2296 | * 'early bootup code', and will warn about any invalid irqs-on event: | ||
2297 | */ | ||
2298 | static int early_boot_irqs_enabled; | ||
2299 | |||
2300 | void early_boot_irqs_off(void) | ||
2301 | { | ||
2302 | early_boot_irqs_enabled = 0; | ||
2303 | } | ||
2304 | |||
2305 | void early_boot_irqs_on(void) | ||
2306 | { | ||
2307 | early_boot_irqs_enabled = 1; | ||
2308 | } | ||
2309 | |||
2310 | /* | ||
2311 | * Hardirqs will be enabled: | 2295 | * Hardirqs will be enabled: |
2312 | */ | 2296 | */ |
2313 | void trace_hardirqs_on_caller(unsigned long ip) | 2297 | void trace_hardirqs_on_caller(unsigned long ip) |
@@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2319 | if (unlikely(!debug_locks || current->lockdep_recursion)) | 2303 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
2320 | return; | 2304 | return; |
2321 | 2305 | ||
2322 | if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) | 2306 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) |
2323 | return; | 2307 | return; |
2324 | 2308 | ||
2325 | if (unlikely(curr->hardirqs_enabled)) { | 2309 | if (unlikely(curr->hardirqs_enabled)) { |
diff --git a/kernel/module.c b/kernel/module.c index 34e00b708fad..efa290ea94bf 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2460 | #endif | 2460 | #endif |
2461 | 2461 | ||
2462 | #ifdef CONFIG_TRACEPOINTS | 2462 | #ifdef CONFIG_TRACEPOINTS |
2463 | mod->tracepoints = section_objs(info, "__tracepoints", | 2463 | mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", |
2464 | sizeof(*mod->tracepoints), | 2464 | sizeof(*mod->tracepoints_ptrs), |
2465 | &mod->num_tracepoints); | 2465 | &mod->num_tracepoints); |
2466 | #endif | 2466 | #endif |
2467 | #ifdef HAVE_JUMP_LABEL | 2467 | #ifdef HAVE_JUMP_LABEL |
2468 | mod->jump_entries = section_objs(info, "__jump_table", | 2468 | mod->jump_entries = section_objs(info, "__jump_table", |
@@ -3393,7 +3393,7 @@ void module_layout(struct module *mod, | |||
3393 | struct modversion_info *ver, | 3393 | struct modversion_info *ver, |
3394 | struct kernel_param *kp, | 3394 | struct kernel_param *kp, |
3395 | struct kernel_symbol *ks, | 3395 | struct kernel_symbol *ks, |
3396 | struct tracepoint *tp) | 3396 | struct tracepoint * const *tp) |
3397 | { | 3397 | { |
3398 | } | 3398 | } |
3399 | EXPORT_SYMBOL(module_layout); | 3399 | EXPORT_SYMBOL(module_layout); |
@@ -3407,8 +3407,8 @@ void module_update_tracepoints(void) | |||
3407 | mutex_lock(&module_mutex); | 3407 | mutex_lock(&module_mutex); |
3408 | list_for_each_entry(mod, &modules, list) | 3408 | list_for_each_entry(mod, &modules, list) |
3409 | if (!mod->taints) | 3409 | if (!mod->taints) |
3410 | tracepoint_update_probe_range(mod->tracepoints, | 3410 | tracepoint_update_probe_range(mod->tracepoints_ptrs, |
3411 | mod->tracepoints + mod->num_tracepoints); | 3411 | mod->tracepoints_ptrs + mod->num_tracepoints); |
3412 | mutex_unlock(&module_mutex); | 3412 | mutex_unlock(&module_mutex); |
3413 | } | 3413 | } |
3414 | 3414 | ||
@@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter) | |||
3432 | else if (iter_mod > iter->module) | 3432 | else if (iter_mod > iter->module) |
3433 | iter->tracepoint = NULL; | 3433 | iter->tracepoint = NULL; |
3434 | found = tracepoint_get_iter_range(&iter->tracepoint, | 3434 | found = tracepoint_get_iter_range(&iter->tracepoint, |
3435 | iter_mod->tracepoints, | 3435 | iter_mod->tracepoints_ptrs, |
3436 | iter_mod->tracepoints | 3436 | iter_mod->tracepoints_ptrs |
3437 | + iter_mod->num_tracepoints); | 3437 | + iter_mod->num_tracepoints); |
3438 | if (found) { | 3438 | if (found) { |
3439 | iter->module = iter_mod; | 3439 | iter->module = iter_mod; |
diff --git a/kernel/panic.c b/kernel/panic.c index 4c13b1a88ebb..991bb87a1704 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -34,6 +34,7 @@ static int pause_on_oops_flag; | |||
34 | static DEFINE_SPINLOCK(pause_on_oops_lock); | 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
35 | 35 | ||
36 | int panic_timeout; | 36 | int panic_timeout; |
37 | EXPORT_SYMBOL_GPL(panic_timeout); | ||
37 | 38 | ||
38 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); | 39 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); |
39 | 40 | ||
diff --git a/kernel/params.c b/kernel/params.c index 08107d181758..0da1411222b9 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) | |||
719 | params[i].ops->free(params[i].arg); | 719 | params[i].ops->free(params[i].arg); |
720 | } | 720 | } |
721 | 721 | ||
722 | static void __init kernel_add_sysfs_param(const char *name, | 722 | static struct module_kobject * __init locate_module_kobject(const char *name) |
723 | struct kernel_param *kparam, | ||
724 | unsigned int name_skip) | ||
725 | { | 723 | { |
726 | struct module_kobject *mk; | 724 | struct module_kobject *mk; |
727 | struct kobject *kobj; | 725 | struct kobject *kobj; |
@@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name, | |||
729 | 727 | ||
730 | kobj = kset_find_obj(module_kset, name); | 728 | kobj = kset_find_obj(module_kset, name); |
731 | if (kobj) { | 729 | if (kobj) { |
732 | /* We already have one. Remove params so we can add more. */ | ||
733 | mk = to_module_kobject(kobj); | 730 | mk = to_module_kobject(kobj); |
734 | /* We need to remove it before adding parameters. */ | ||
735 | sysfs_remove_group(&mk->kobj, &mk->mp->grp); | ||
736 | } else { | 731 | } else { |
737 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); | 732 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); |
738 | BUG_ON(!mk); | 733 | BUG_ON(!mk); |
@@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name, | |||
743 | "%s", name); | 738 | "%s", name); |
744 | if (err) { | 739 | if (err) { |
745 | kobject_put(&mk->kobj); | 740 | kobject_put(&mk->kobj); |
746 | printk(KERN_ERR "Module '%s' failed add to sysfs, " | 741 | printk(KERN_ERR |
747 | "error number %d\n", name, err); | 742 | "Module '%s' failed add to sysfs, error number %d\n", |
748 | printk(KERN_ERR "The system will be unstable now.\n"); | 743 | name, err); |
749 | return; | 744 | printk(KERN_ERR |
745 | "The system will be unstable now.\n"); | ||
746 | return NULL; | ||
750 | } | 747 | } |
751 | /* So that exit path is even. */ | 748 | |
749 | /* So that we hold reference in both cases. */ | ||
752 | kobject_get(&mk->kobj); | 750 | kobject_get(&mk->kobj); |
753 | } | 751 | } |
754 | 752 | ||
753 | return mk; | ||
754 | } | ||
755 | |||
756 | static void __init kernel_add_sysfs_param(const char *name, | ||
757 | struct kernel_param *kparam, | ||
758 | unsigned int name_skip) | ||
759 | { | ||
760 | struct module_kobject *mk; | ||
761 | int err; | ||
762 | |||
763 | mk = locate_module_kobject(name); | ||
764 | if (!mk) | ||
765 | return; | ||
766 | |||
767 | /* We need to remove old parameters before adding more. */ | ||
768 | if (mk->mp) | ||
769 | sysfs_remove_group(&mk->kobj, &mk->mp->grp); | ||
770 | |||
755 | /* These should not fail at boot. */ | 771 | /* These should not fail at boot. */ |
756 | err = add_sysfs_param(mk, kparam, kparam->name + name_skip); | 772 | err = add_sysfs_param(mk, kparam, kparam->name + name_skip); |
757 | BUG_ON(err); | 773 | BUG_ON(err); |
@@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void) | |||
796 | } | 812 | } |
797 | } | 813 | } |
798 | 814 | ||
815 | ssize_t __modver_version_show(struct module_attribute *mattr, | ||
816 | struct module *mod, char *buf) | ||
817 | { | ||
818 | struct module_version_attribute *vattr = | ||
819 | container_of(mattr, struct module_version_attribute, mattr); | ||
820 | |||
821 | return sprintf(buf, "%s\n", vattr->version); | ||
822 | } | ||
823 | |||
824 | extern struct module_version_attribute __start___modver[], __stop___modver[]; | ||
825 | |||
826 | static void __init version_sysfs_builtin(void) | ||
827 | { | ||
828 | const struct module_version_attribute *vattr; | ||
829 | struct module_kobject *mk; | ||
830 | int err; | ||
831 | |||
832 | for (vattr = __start___modver; vattr < __stop___modver; vattr++) { | ||
833 | mk = locate_module_kobject(vattr->module_name); | ||
834 | if (mk) { | ||
835 | err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); | ||
836 | kobject_uevent(&mk->kobj, KOBJ_ADD); | ||
837 | kobject_put(&mk->kobj); | ||
838 | } | ||
839 | } | ||
840 | } | ||
799 | 841 | ||
800 | /* module-related sysfs stuff */ | 842 | /* module-related sysfs stuff */ |
801 | 843 | ||
@@ -875,6 +917,7 @@ static int __init param_sysfs_init(void) | |||
875 | } | 917 | } |
876 | module_sysfs_initialized = 1; | 918 | module_sysfs_initialized = 1; |
877 | 919 | ||
920 | version_sysfs_builtin(); | ||
878 | param_sysfs_builtin(); | 921 | param_sysfs_builtin(); |
879 | 922 | ||
880 | return 0; | 923 | return 0; |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 11847bf1e8cc..656222fcf767 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -38,6 +38,12 @@ | |||
38 | 38 | ||
39 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
40 | 40 | ||
41 | enum event_type_t { | ||
42 | EVENT_FLEXIBLE = 0x1, | ||
43 | EVENT_PINNED = 0x2, | ||
44 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
45 | }; | ||
46 | |||
41 | atomic_t perf_task_events __read_mostly; | 47 | atomic_t perf_task_events __read_mostly; |
42 | static atomic_t nr_mmap_events __read_mostly; | 48 | static atomic_t nr_mmap_events __read_mostly; |
43 | static atomic_t nr_comm_events __read_mostly; | 49 | static atomic_t nr_comm_events __read_mostly; |
@@ -65,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
65 | 71 | ||
66 | static atomic64_t perf_event_id; | 72 | static atomic64_t perf_event_id; |
67 | 73 | ||
74 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
75 | enum event_type_t event_type); | ||
76 | |||
77 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
78 | enum event_type_t event_type); | ||
79 | |||
68 | void __weak perf_event_print_debug(void) { } | 80 | void __weak perf_event_print_debug(void) { } |
69 | 81 | ||
70 | extern __weak const char *perf_pmu_name(void) | 82 | extern __weak const char *perf_pmu_name(void) |
@@ -72,6 +84,11 @@ extern __weak const char *perf_pmu_name(void) | |||
72 | return "pmu"; | 84 | return "pmu"; |
73 | } | 85 | } |
74 | 86 | ||
87 | static inline u64 perf_clock(void) | ||
88 | { | ||
89 | return local_clock(); | ||
90 | } | ||
91 | |||
75 | void perf_pmu_disable(struct pmu *pmu) | 92 | void perf_pmu_disable(struct pmu *pmu) |
76 | { | 93 | { |
77 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 94 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
@@ -240,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
240 | put_ctx(ctx); | 257 | put_ctx(ctx); |
241 | } | 258 | } |
242 | 259 | ||
243 | static inline u64 perf_clock(void) | ||
244 | { | ||
245 | return local_clock(); | ||
246 | } | ||
247 | |||
248 | /* | 260 | /* |
249 | * Update the record of the current time in a context. | 261 | * Update the record of the current time in a context. |
250 | */ | 262 | */ |
@@ -256,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx) | |||
256 | ctx->timestamp = now; | 268 | ctx->timestamp = now; |
257 | } | 269 | } |
258 | 270 | ||
271 | static u64 perf_event_time(struct perf_event *event) | ||
272 | { | ||
273 | struct perf_event_context *ctx = event->ctx; | ||
274 | return ctx ? ctx->time : 0; | ||
275 | } | ||
276 | |||
259 | /* | 277 | /* |
260 | * Update the total_time_enabled and total_time_running fields for a event. | 278 | * Update the total_time_enabled and total_time_running fields for a event. |
261 | */ | 279 | */ |
@@ -269,7 +287,7 @@ static void update_event_times(struct perf_event *event) | |||
269 | return; | 287 | return; |
270 | 288 | ||
271 | if (ctx->is_active) | 289 | if (ctx->is_active) |
272 | run_end = ctx->time; | 290 | run_end = perf_event_time(event); |
273 | else | 291 | else |
274 | run_end = event->tstamp_stopped; | 292 | run_end = event->tstamp_stopped; |
275 | 293 | ||
@@ -278,7 +296,7 @@ static void update_event_times(struct perf_event *event) | |||
278 | if (event->state == PERF_EVENT_STATE_INACTIVE) | 296 | if (event->state == PERF_EVENT_STATE_INACTIVE) |
279 | run_end = event->tstamp_stopped; | 297 | run_end = event->tstamp_stopped; |
280 | else | 298 | else |
281 | run_end = ctx->time; | 299 | run_end = perf_event_time(event); |
282 | 300 | ||
283 | event->total_time_running = run_end - event->tstamp_running; | 301 | event->total_time_running = run_end - event->tstamp_running; |
284 | } | 302 | } |
@@ -534,6 +552,7 @@ event_sched_out(struct perf_event *event, | |||
534 | struct perf_cpu_context *cpuctx, | 552 | struct perf_cpu_context *cpuctx, |
535 | struct perf_event_context *ctx) | 553 | struct perf_event_context *ctx) |
536 | { | 554 | { |
555 | u64 tstamp = perf_event_time(event); | ||
537 | u64 delta; | 556 | u64 delta; |
538 | /* | 557 | /* |
539 | * An event which could not be activated because of | 558 | * An event which could not be activated because of |
@@ -545,7 +564,7 @@ event_sched_out(struct perf_event *event, | |||
545 | && !event_filter_match(event)) { | 564 | && !event_filter_match(event)) { |
546 | delta = ctx->time - event->tstamp_stopped; | 565 | delta = ctx->time - event->tstamp_stopped; |
547 | event->tstamp_running += delta; | 566 | event->tstamp_running += delta; |
548 | event->tstamp_stopped = ctx->time; | 567 | event->tstamp_stopped = tstamp; |
549 | } | 568 | } |
550 | 569 | ||
551 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 570 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
@@ -556,7 +575,7 @@ event_sched_out(struct perf_event *event, | |||
556 | event->pending_disable = 0; | 575 | event->pending_disable = 0; |
557 | event->state = PERF_EVENT_STATE_OFF; | 576 | event->state = PERF_EVENT_STATE_OFF; |
558 | } | 577 | } |
559 | event->tstamp_stopped = ctx->time; | 578 | event->tstamp_stopped = tstamp; |
560 | event->pmu->del(event, 0); | 579 | event->pmu->del(event, 0); |
561 | event->oncpu = -1; | 580 | event->oncpu = -1; |
562 | 581 | ||
@@ -763,16 +782,33 @@ retry: | |||
763 | raw_spin_unlock_irq(&ctx->lock); | 782 | raw_spin_unlock_irq(&ctx->lock); |
764 | } | 783 | } |
765 | 784 | ||
785 | #define MAX_INTERRUPTS (~0ULL) | ||
786 | |||
787 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
788 | |||
766 | static int | 789 | static int |
767 | event_sched_in(struct perf_event *event, | 790 | event_sched_in(struct perf_event *event, |
768 | struct perf_cpu_context *cpuctx, | 791 | struct perf_cpu_context *cpuctx, |
769 | struct perf_event_context *ctx) | 792 | struct perf_event_context *ctx) |
770 | { | 793 | { |
794 | u64 tstamp = perf_event_time(event); | ||
795 | |||
771 | if (event->state <= PERF_EVENT_STATE_OFF) | 796 | if (event->state <= PERF_EVENT_STATE_OFF) |
772 | return 0; | 797 | return 0; |
773 | 798 | ||
774 | event->state = PERF_EVENT_STATE_ACTIVE; | 799 | event->state = PERF_EVENT_STATE_ACTIVE; |
775 | event->oncpu = smp_processor_id(); | 800 | event->oncpu = smp_processor_id(); |
801 | |||
802 | /* | ||
803 | * Unthrottle events, since we scheduled we might have missed several | ||
804 | * ticks already, also for a heavily scheduling task there is little | ||
805 | * guarantee it'll get a tick in a timely manner. | ||
806 | */ | ||
807 | if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { | ||
808 | perf_log_throttle(event, 1); | ||
809 | event->hw.interrupts = 0; | ||
810 | } | ||
811 | |||
776 | /* | 812 | /* |
777 | * The new state must be visible before we turn it on in the hardware: | 813 | * The new state must be visible before we turn it on in the hardware: |
778 | */ | 814 | */ |
@@ -784,9 +820,9 @@ event_sched_in(struct perf_event *event, | |||
784 | return -EAGAIN; | 820 | return -EAGAIN; |
785 | } | 821 | } |
786 | 822 | ||
787 | event->tstamp_running += ctx->time - event->tstamp_stopped; | 823 | event->tstamp_running += tstamp - event->tstamp_stopped; |
788 | 824 | ||
789 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | 825 | event->shadow_ctx_time = tstamp - ctx->timestamp; |
790 | 826 | ||
791 | if (!is_software_event(event)) | 827 | if (!is_software_event(event)) |
792 | cpuctx->active_oncpu++; | 828 | cpuctx->active_oncpu++; |
@@ -898,11 +934,13 @@ static int group_can_go_on(struct perf_event *event, | |||
898 | static void add_event_to_ctx(struct perf_event *event, | 934 | static void add_event_to_ctx(struct perf_event *event, |
899 | struct perf_event_context *ctx) | 935 | struct perf_event_context *ctx) |
900 | { | 936 | { |
937 | u64 tstamp = perf_event_time(event); | ||
938 | |||
901 | list_add_event(event, ctx); | 939 | list_add_event(event, ctx); |
902 | perf_group_attach(event); | 940 | perf_group_attach(event); |
903 | event->tstamp_enabled = ctx->time; | 941 | event->tstamp_enabled = tstamp; |
904 | event->tstamp_running = ctx->time; | 942 | event->tstamp_running = tstamp; |
905 | event->tstamp_stopped = ctx->time; | 943 | event->tstamp_stopped = tstamp; |
906 | } | 944 | } |
907 | 945 | ||
908 | /* | 946 | /* |
@@ -937,7 +975,7 @@ static void __perf_install_in_context(void *info) | |||
937 | 975 | ||
938 | add_event_to_ctx(event, ctx); | 976 | add_event_to_ctx(event, ctx); |
939 | 977 | ||
940 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 978 | if (!event_filter_match(event)) |
941 | goto unlock; | 979 | goto unlock; |
942 | 980 | ||
943 | /* | 981 | /* |
@@ -1042,14 +1080,13 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
1042 | struct perf_event_context *ctx) | 1080 | struct perf_event_context *ctx) |
1043 | { | 1081 | { |
1044 | struct perf_event *sub; | 1082 | struct perf_event *sub; |
1083 | u64 tstamp = perf_event_time(event); | ||
1045 | 1084 | ||
1046 | event->state = PERF_EVENT_STATE_INACTIVE; | 1085 | event->state = PERF_EVENT_STATE_INACTIVE; |
1047 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 1086 | event->tstamp_enabled = tstamp - event->total_time_enabled; |
1048 | list_for_each_entry(sub, &event->sibling_list, group_entry) { | 1087 | list_for_each_entry(sub, &event->sibling_list, group_entry) { |
1049 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { | 1088 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) |
1050 | sub->tstamp_enabled = | 1089 | sub->tstamp_enabled = tstamp - sub->total_time_enabled; |
1051 | ctx->time - sub->total_time_enabled; | ||
1052 | } | ||
1053 | } | 1090 | } |
1054 | } | 1091 | } |
1055 | 1092 | ||
@@ -1082,7 +1119,7 @@ static void __perf_event_enable(void *info) | |||
1082 | goto unlock; | 1119 | goto unlock; |
1083 | __perf_event_mark_enabled(event, ctx); | 1120 | __perf_event_mark_enabled(event, ctx); |
1084 | 1121 | ||
1085 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1122 | if (!event_filter_match(event)) |
1086 | goto unlock; | 1123 | goto unlock; |
1087 | 1124 | ||
1088 | /* | 1125 | /* |
@@ -1193,12 +1230,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1193 | return 0; | 1230 | return 0; |
1194 | } | 1231 | } |
1195 | 1232 | ||
1196 | enum event_type_t { | ||
1197 | EVENT_FLEXIBLE = 0x1, | ||
1198 | EVENT_PINNED = 0x2, | ||
1199 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1200 | }; | ||
1201 | |||
1202 | static void ctx_sched_out(struct perf_event_context *ctx, | 1233 | static void ctx_sched_out(struct perf_event_context *ctx, |
1203 | struct perf_cpu_context *cpuctx, | 1234 | struct perf_cpu_context *cpuctx, |
1204 | enum event_type_t event_type) | 1235 | enum event_type_t event_type) |
@@ -1435,7 +1466,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, | |||
1435 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 1466 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1436 | if (event->state <= PERF_EVENT_STATE_OFF) | 1467 | if (event->state <= PERF_EVENT_STATE_OFF) |
1437 | continue; | 1468 | continue; |
1438 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1469 | if (!event_filter_match(event)) |
1439 | continue; | 1470 | continue; |
1440 | 1471 | ||
1441 | if (group_can_go_on(event, cpuctx, 1)) | 1472 | if (group_can_go_on(event, cpuctx, 1)) |
@@ -1467,7 +1498,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1467 | * Listen to the 'cpu' scheduling filter constraint | 1498 | * Listen to the 'cpu' scheduling filter constraint |
1468 | * of events: | 1499 | * of events: |
1469 | */ | 1500 | */ |
1470 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1501 | if (!event_filter_match(event)) |
1471 | continue; | 1502 | continue; |
1472 | 1503 | ||
1473 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | 1504 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
@@ -1580,10 +1611,6 @@ void __perf_event_task_sched_in(struct task_struct *task) | |||
1580 | } | 1611 | } |
1581 | } | 1612 | } |
1582 | 1613 | ||
1583 | #define MAX_INTERRUPTS (~0ULL) | ||
1584 | |||
1585 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1586 | |||
1587 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 1614 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
1588 | { | 1615 | { |
1589 | u64 frequency = event->attr.sample_freq; | 1616 | u64 frequency = event->attr.sample_freq; |
@@ -1694,7 +1721,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
1694 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1721 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1695 | continue; | 1722 | continue; |
1696 | 1723 | ||
1697 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1724 | if (!event_filter_match(event)) |
1698 | continue; | 1725 | continue; |
1699 | 1726 | ||
1700 | hwc = &event->hw; | 1727 | hwc = &event->hw; |
@@ -1885,11 +1912,12 @@ static void __perf_event_read(void *info) | |||
1885 | return; | 1912 | return; |
1886 | 1913 | ||
1887 | raw_spin_lock(&ctx->lock); | 1914 | raw_spin_lock(&ctx->lock); |
1888 | update_context_time(ctx); | 1915 | if (ctx->is_active) |
1916 | update_context_time(ctx); | ||
1889 | update_event_times(event); | 1917 | update_event_times(event); |
1918 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1919 | event->pmu->read(event); | ||
1890 | raw_spin_unlock(&ctx->lock); | 1920 | raw_spin_unlock(&ctx->lock); |
1891 | |||
1892 | event->pmu->read(event); | ||
1893 | } | 1921 | } |
1894 | 1922 | ||
1895 | static inline u64 perf_event_count(struct perf_event *event) | 1923 | static inline u64 perf_event_count(struct perf_event *event) |
@@ -1983,8 +2011,7 @@ static int alloc_callchain_buffers(void) | |||
1983 | * accessed from NMI. Use a temporary manual per cpu allocation | 2011 | * accessed from NMI. Use a temporary manual per cpu allocation |
1984 | * until that gets sorted out. | 2012 | * until that gets sorted out. |
1985 | */ | 2013 | */ |
1986 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | 2014 | size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); |
1987 | num_possible_cpus(); | ||
1988 | 2015 | ||
1989 | entries = kzalloc(size, GFP_KERNEL); | 2016 | entries = kzalloc(size, GFP_KERNEL); |
1990 | if (!entries) | 2017 | if (!entries) |
@@ -2185,13 +2212,6 @@ find_lively_task_by_vpid(pid_t vpid) | |||
2185 | if (!task) | 2212 | if (!task) |
2186 | return ERR_PTR(-ESRCH); | 2213 | return ERR_PTR(-ESRCH); |
2187 | 2214 | ||
2188 | /* | ||
2189 | * Can't attach events to a dying task. | ||
2190 | */ | ||
2191 | err = -ESRCH; | ||
2192 | if (task->flags & PF_EXITING) | ||
2193 | goto errout; | ||
2194 | |||
2195 | /* Reuse ptrace permission checks for now. */ | 2215 | /* Reuse ptrace permission checks for now. */ |
2196 | err = -EACCES; | 2216 | err = -EACCES; |
2197 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2217 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
@@ -2212,14 +2232,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
2212 | unsigned long flags; | 2232 | unsigned long flags; |
2213 | int ctxn, err; | 2233 | int ctxn, err; |
2214 | 2234 | ||
2215 | if (!task && cpu != -1) { | 2235 | if (!task) { |
2216 | /* Must be root to operate on a CPU event: */ | 2236 | /* Must be root to operate on a CPU event: */ |
2217 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | 2237 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
2218 | return ERR_PTR(-EACCES); | 2238 | return ERR_PTR(-EACCES); |
2219 | 2239 | ||
2220 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
2221 | return ERR_PTR(-EINVAL); | ||
2222 | |||
2223 | /* | 2240 | /* |
2224 | * We could be clever and allow to attach a event to an | 2241 | * We could be clever and allow to attach a event to an |
2225 | * offline CPU and activate it when the CPU comes up, but | 2242 | * offline CPU and activate it when the CPU comes up, but |
@@ -2255,14 +2272,27 @@ retry: | |||
2255 | 2272 | ||
2256 | get_ctx(ctx); | 2273 | get_ctx(ctx); |
2257 | 2274 | ||
2258 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | 2275 | err = 0; |
2259 | /* | 2276 | mutex_lock(&task->perf_event_mutex); |
2260 | * We raced with some other task; use | 2277 | /* |
2261 | * the context they set. | 2278 | * If it has already passed perf_event_exit_task(). |
2262 | */ | 2279 | * we must see PF_EXITING, it takes this mutex too. |
2280 | */ | ||
2281 | if (task->flags & PF_EXITING) | ||
2282 | err = -ESRCH; | ||
2283 | else if (task->perf_event_ctxp[ctxn]) | ||
2284 | err = -EAGAIN; | ||
2285 | else | ||
2286 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | ||
2287 | mutex_unlock(&task->perf_event_mutex); | ||
2288 | |||
2289 | if (unlikely(err)) { | ||
2263 | put_task_struct(task); | 2290 | put_task_struct(task); |
2264 | kfree(ctx); | 2291 | kfree(ctx); |
2265 | goto retry; | 2292 | |
2293 | if (err == -EAGAIN) | ||
2294 | goto retry; | ||
2295 | goto errout; | ||
2266 | } | 2296 | } |
2267 | } | 2297 | } |
2268 | 2298 | ||
@@ -3893,7 +3923,7 @@ static int perf_event_task_match(struct perf_event *event) | |||
3893 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 3923 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
3894 | return 0; | 3924 | return 0; |
3895 | 3925 | ||
3896 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3926 | if (!event_filter_match(event)) |
3897 | return 0; | 3927 | return 0; |
3898 | 3928 | ||
3899 | if (event->attr.comm || event->attr.mmap || | 3929 | if (event->attr.comm || event->attr.mmap || |
@@ -4030,7 +4060,7 @@ static int perf_event_comm_match(struct perf_event *event) | |||
4030 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4060 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
4031 | return 0; | 4061 | return 0; |
4032 | 4062 | ||
4033 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4063 | if (!event_filter_match(event)) |
4034 | return 0; | 4064 | return 0; |
4035 | 4065 | ||
4036 | if (event->attr.comm) | 4066 | if (event->attr.comm) |
@@ -4178,7 +4208,7 @@ static int perf_event_mmap_match(struct perf_event *event, | |||
4178 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4208 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
4179 | return 0; | 4209 | return 0; |
4180 | 4210 | ||
4181 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4211 | if (!event_filter_match(event)) |
4182 | return 0; | 4212 | return 0; |
4183 | 4213 | ||
4184 | if ((!executable && event->attr.mmap_data) || | 4214 | if ((!executable && event->attr.mmap_data) || |
@@ -4648,7 +4678,7 @@ int perf_swevent_get_recursion_context(void) | |||
4648 | } | 4678 | } |
4649 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4679 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
4650 | 4680 | ||
4651 | void inline perf_swevent_put_recursion_context(int rctx) | 4681 | inline void perf_swevent_put_recursion_context(int rctx) |
4652 | { | 4682 | { |
4653 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 4683 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4654 | 4684 | ||
@@ -5361,6 +5391,8 @@ free_dev: | |||
5361 | goto out; | 5391 | goto out; |
5362 | } | 5392 | } |
5363 | 5393 | ||
5394 | static struct lock_class_key cpuctx_mutex; | ||
5395 | |||
5364 | int perf_pmu_register(struct pmu *pmu, char *name, int type) | 5396 | int perf_pmu_register(struct pmu *pmu, char *name, int type) |
5365 | { | 5397 | { |
5366 | int cpu, ret; | 5398 | int cpu, ret; |
@@ -5409,6 +5441,7 @@ skip_type: | |||
5409 | 5441 | ||
5410 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5442 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
5411 | __perf_event_init_context(&cpuctx->ctx); | 5443 | __perf_event_init_context(&cpuctx->ctx); |
5444 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); | ||
5412 | cpuctx->ctx.type = cpu_context; | 5445 | cpuctx->ctx.type = cpu_context; |
5413 | cpuctx->ctx.pmu = pmu; | 5446 | cpuctx->ctx.pmu = pmu; |
5414 | cpuctx->jiffies_interval = 1; | 5447 | cpuctx->jiffies_interval = 1; |
@@ -5525,6 +5558,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5525 | struct hw_perf_event *hwc; | 5558 | struct hw_perf_event *hwc; |
5526 | long err; | 5559 | long err; |
5527 | 5560 | ||
5561 | if ((unsigned)cpu >= nr_cpu_ids) { | ||
5562 | if (!task || cpu != -1) | ||
5563 | return ERR_PTR(-EINVAL); | ||
5564 | } | ||
5565 | |||
5528 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 5566 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
5529 | if (!event) | 5567 | if (!event) |
5530 | return ERR_PTR(-ENOMEM); | 5568 | return ERR_PTR(-ENOMEM); |
@@ -5573,7 +5611,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5573 | 5611 | ||
5574 | if (!overflow_handler && parent_event) | 5612 | if (!overflow_handler && parent_event) |
5575 | overflow_handler = parent_event->overflow_handler; | 5613 | overflow_handler = parent_event->overflow_handler; |
5576 | 5614 | ||
5577 | event->overflow_handler = overflow_handler; | 5615 | event->overflow_handler = overflow_handler; |
5578 | 5616 | ||
5579 | if (attr->disabled) | 5617 | if (attr->disabled) |
@@ -6109,7 +6147,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
6109 | * scheduled, so we are now safe from rescheduling changing | 6147 | * scheduled, so we are now safe from rescheduling changing |
6110 | * our context. | 6148 | * our context. |
6111 | */ | 6149 | */ |
6112 | child_ctx = child->perf_event_ctxp[ctxn]; | 6150 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); |
6113 | task_ctx_sched_out(child_ctx, EVENT_ALL); | 6151 | task_ctx_sched_out(child_ctx, EVENT_ALL); |
6114 | 6152 | ||
6115 | /* | 6153 | /* |
@@ -6422,11 +6460,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6422 | unsigned long flags; | 6460 | unsigned long flags; |
6423 | int ret = 0; | 6461 | int ret = 0; |
6424 | 6462 | ||
6425 | child->perf_event_ctxp[ctxn] = NULL; | ||
6426 | |||
6427 | mutex_init(&child->perf_event_mutex); | ||
6428 | INIT_LIST_HEAD(&child->perf_event_list); | ||
6429 | |||
6430 | if (likely(!parent->perf_event_ctxp[ctxn])) | 6463 | if (likely(!parent->perf_event_ctxp[ctxn])) |
6431 | return 0; | 6464 | return 0; |
6432 | 6465 | ||
@@ -6478,7 +6511,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6478 | 6511 | ||
6479 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | 6512 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); |
6480 | parent_ctx->rotate_disable = 0; | 6513 | parent_ctx->rotate_disable = 0; |
6481 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6482 | 6514 | ||
6483 | child_ctx = child->perf_event_ctxp[ctxn]; | 6515 | child_ctx = child->perf_event_ctxp[ctxn]; |
6484 | 6516 | ||
@@ -6486,12 +6518,11 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6486 | /* | 6518 | /* |
6487 | * Mark the child context as a clone of the parent | 6519 | * Mark the child context as a clone of the parent |
6488 | * context, or of whatever the parent is a clone of. | 6520 | * context, or of whatever the parent is a clone of. |
6489 | * Note that if the parent is a clone, it could get | 6521 | * |
6490 | * uncloned at any point, but that doesn't matter | 6522 | * Note that if the parent is a clone, the holding of |
6491 | * because the list of events and the generation | 6523 | * parent_ctx->lock avoids it from being uncloned. |
6492 | * count can't have changed since we took the mutex. | ||
6493 | */ | 6524 | */ |
6494 | cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); | 6525 | cloned_ctx = parent_ctx->parent_ctx; |
6495 | if (cloned_ctx) { | 6526 | if (cloned_ctx) { |
6496 | child_ctx->parent_ctx = cloned_ctx; | 6527 | child_ctx->parent_ctx = cloned_ctx; |
6497 | child_ctx->parent_gen = parent_ctx->parent_gen; | 6528 | child_ctx->parent_gen = parent_ctx->parent_gen; |
@@ -6502,6 +6533,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6502 | get_ctx(child_ctx->parent_ctx); | 6533 | get_ctx(child_ctx->parent_ctx); |
6503 | } | 6534 | } |
6504 | 6535 | ||
6536 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6505 | mutex_unlock(&parent_ctx->mutex); | 6537 | mutex_unlock(&parent_ctx->mutex); |
6506 | 6538 | ||
6507 | perf_unpin_context(parent_ctx); | 6539 | perf_unpin_context(parent_ctx); |
@@ -6516,6 +6548,10 @@ int perf_event_init_task(struct task_struct *child) | |||
6516 | { | 6548 | { |
6517 | int ctxn, ret; | 6549 | int ctxn, ret; |
6518 | 6550 | ||
6551 | memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); | ||
6552 | mutex_init(&child->perf_event_mutex); | ||
6553 | INIT_LIST_HEAD(&child->perf_event_list); | ||
6554 | |||
6519 | for_each_task_context_nr(ctxn) { | 6555 | for_each_task_context_nr(ctxn) { |
6520 | ret = perf_event_init_context(child, ctxn); | 6556 | ret = perf_event_init_context(child, ctxn); |
6521 | if (ret) | 6557 | if (ret) |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index a5aff3ebad38..265729966ece 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG | |||
100 | depends on PM_ADVANCED_DEBUG | 100 | depends on PM_ADVANCED_DEBUG |
101 | default n | 101 | default n |
102 | 102 | ||
103 | config SUSPEND_NVS | ||
104 | bool | ||
105 | |||
106 | config SUSPEND | 103 | config SUSPEND |
107 | bool "Suspend to RAM and standby" | 104 | bool "Suspend to RAM and standby" |
108 | depends on PM && ARCH_SUSPEND_POSSIBLE | 105 | depends on PM && ARCH_SUSPEND_POSSIBLE |
109 | select SUSPEND_NVS if HAS_IOMEM | ||
110 | default y | 106 | default y |
111 | ---help--- | 107 | ---help--- |
112 | Allow the system to enter sleep states in which main memory is | 108 | Allow the system to enter sleep states in which main memory is |
@@ -140,7 +136,6 @@ config HIBERNATION | |||
140 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE | 136 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE |
141 | select LZO_COMPRESS | 137 | select LZO_COMPRESS |
142 | select LZO_DECOMPRESS | 138 | select LZO_DECOMPRESS |
143 | select SUSPEND_NVS if HAS_IOMEM | ||
144 | ---help--- | 139 | ---help--- |
145 | Enable the suspend to disk (STD) functionality, which is usually | 140 | Enable the suspend to disk (STD) functionality, which is usually |
146 | called "hibernation" in user interfaces. STD checkpoints the | 141 | called "hibernation" in user interfaces. STD checkpoints the |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index f9063c6b185d..c350e18b53e3 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -1,7 +1,4 @@ | |||
1 | 1 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | |
2 | ifeq ($(CONFIG_PM_DEBUG),y) | ||
3 | EXTRA_CFLAGS += -DDEBUG | ||
4 | endif | ||
5 | 2 | ||
6 | obj-$(CONFIG_PM) += main.o | 3 | obj-$(CONFIG_PM) += main.o |
7 | obj-$(CONFIG_PM_SLEEP) += console.o | 4 | obj-$(CONFIG_PM_SLEEP) += console.o |
@@ -10,6 +7,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o | |||
10 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o | 7 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o |
11 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ | 8 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ |
12 | block_io.o | 9 | block_io.o |
13 | obj-$(CONFIG_SUSPEND_NVS) += nvs.o | ||
14 | 10 | ||
15 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | 11 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 048d0b514831..1832bd264219 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -51,18 +51,18 @@ enum { | |||
51 | 51 | ||
52 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | 52 | static int hibernation_mode = HIBERNATION_SHUTDOWN; |
53 | 53 | ||
54 | static struct platform_hibernation_ops *hibernation_ops; | 54 | static const struct platform_hibernation_ops *hibernation_ops; |
55 | 55 | ||
56 | /** | 56 | /** |
57 | * hibernation_set_ops - set the global hibernate operations | 57 | * hibernation_set_ops - set the global hibernate operations |
58 | * @ops: the hibernation operations to use in subsequent hibernation transitions | 58 | * @ops: the hibernation operations to use in subsequent hibernation transitions |
59 | */ | 59 | */ |
60 | 60 | ||
61 | void hibernation_set_ops(struct platform_hibernation_ops *ops) | 61 | void hibernation_set_ops(const struct platform_hibernation_ops *ops) |
62 | { | 62 | { |
63 | if (ops && !(ops->begin && ops->end && ops->pre_snapshot | 63 | if (ops && !(ops->begin && ops->end && ops->pre_snapshot |
64 | && ops->prepare && ops->finish && ops->enter && ops->pre_restore | 64 | && ops->prepare && ops->finish && ops->enter && ops->pre_restore |
65 | && ops->restore_cleanup)) { | 65 | && ops->restore_cleanup && ops->leave)) { |
66 | WARN_ON(1); | 66 | WARN_ON(1); |
67 | return; | 67 | return; |
68 | } | 68 | } |
@@ -278,7 +278,7 @@ static int create_image(int platform_mode) | |||
278 | goto Enable_irqs; | 278 | goto Enable_irqs; |
279 | } | 279 | } |
280 | 280 | ||
281 | if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events()) | 281 | if (hibernation_test(TEST_CORE) || pm_wakeup_pending()) |
282 | goto Power_up; | 282 | goto Power_up; |
283 | 283 | ||
284 | in_suspend = 1; | 284 | in_suspend = 1; |
@@ -516,7 +516,7 @@ int hibernation_platform_enter(void) | |||
516 | 516 | ||
517 | local_irq_disable(); | 517 | local_irq_disable(); |
518 | sysdev_suspend(PMSG_HIBERNATE); | 518 | sysdev_suspend(PMSG_HIBERNATE); |
519 | if (!pm_check_wakeup_events()) { | 519 | if (pm_wakeup_pending()) { |
520 | error = -EAGAIN; | 520 | error = -EAGAIN; |
521 | goto Power_up; | 521 | goto Power_up; |
522 | } | 522 | } |
@@ -647,6 +647,7 @@ int hibernate(void) | |||
647 | swsusp_free(); | 647 | swsusp_free(); |
648 | if (!error) | 648 | if (!error) |
649 | power_down(); | 649 | power_down(); |
650 | in_suspend = 0; | ||
650 | pm_restore_gfp_mask(); | 651 | pm_restore_gfp_mask(); |
651 | } else { | 652 | } else { |
652 | pr_debug("PM: Image restored successfully.\n"); | 653 | pr_debug("PM: Image restored successfully.\n"); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 7b5db6a8561e..701853042c28 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq); | |||
326 | 326 | ||
327 | static int __init pm_start_workqueue(void) | 327 | static int __init pm_start_workqueue(void) |
328 | { | 328 | { |
329 | pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); | 329 | pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); |
330 | 330 | ||
331 | return pm_wq ? 0 : -ENOMEM; | 331 | return pm_wq ? 0 : -ENOMEM; |
332 | } | 332 | } |
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c deleted file mode 100644 index 1836db60bbb6..000000000000 --- a/kernel/power/nvs.c +++ /dev/null | |||
@@ -1,136 +0,0 @@ | |||
1 | /* | ||
2 | * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory | ||
3 | * | ||
4 | * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. | ||
5 | * | ||
6 | * This file is released under the GPLv2. | ||
7 | */ | ||
8 | |||
9 | #include <linux/io.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/suspend.h> | ||
15 | |||
16 | /* | ||
17 | * Platforms, like ACPI, may want us to save some memory used by them during | ||
18 | * suspend and to restore the contents of this memory during the subsequent | ||
19 | * resume. The code below implements a mechanism allowing us to do that. | ||
20 | */ | ||
21 | |||
22 | struct nvs_page { | ||
23 | unsigned long phys_start; | ||
24 | unsigned int size; | ||
25 | void *kaddr; | ||
26 | void *data; | ||
27 | struct list_head node; | ||
28 | }; | ||
29 | |||
30 | static LIST_HEAD(nvs_list); | ||
31 | |||
32 | /** | ||
33 | * suspend_nvs_register - register platform NVS memory region to save | ||
34 | * @start - physical address of the region | ||
35 | * @size - size of the region | ||
36 | * | ||
37 | * The NVS region need not be page-aligned (both ends) and we arrange | ||
38 | * things so that the data from page-aligned addresses in this region will | ||
39 | * be copied into separate RAM pages. | ||
40 | */ | ||
41 | int suspend_nvs_register(unsigned long start, unsigned long size) | ||
42 | { | ||
43 | struct nvs_page *entry, *next; | ||
44 | |||
45 | while (size > 0) { | ||
46 | unsigned int nr_bytes; | ||
47 | |||
48 | entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); | ||
49 | if (!entry) | ||
50 | goto Error; | ||
51 | |||
52 | list_add_tail(&entry->node, &nvs_list); | ||
53 | entry->phys_start = start; | ||
54 | nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); | ||
55 | entry->size = (size < nr_bytes) ? size : nr_bytes; | ||
56 | |||
57 | start += entry->size; | ||
58 | size -= entry->size; | ||
59 | } | ||
60 | return 0; | ||
61 | |||
62 | Error: | ||
63 | list_for_each_entry_safe(entry, next, &nvs_list, node) { | ||
64 | list_del(&entry->node); | ||
65 | kfree(entry); | ||
66 | } | ||
67 | return -ENOMEM; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * suspend_nvs_free - free data pages allocated for saving NVS regions | ||
72 | */ | ||
73 | void suspend_nvs_free(void) | ||
74 | { | ||
75 | struct nvs_page *entry; | ||
76 | |||
77 | list_for_each_entry(entry, &nvs_list, node) | ||
78 | if (entry->data) { | ||
79 | free_page((unsigned long)entry->data); | ||
80 | entry->data = NULL; | ||
81 | if (entry->kaddr) { | ||
82 | iounmap(entry->kaddr); | ||
83 | entry->kaddr = NULL; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * suspend_nvs_alloc - allocate memory necessary for saving NVS regions | ||
90 | */ | ||
91 | int suspend_nvs_alloc(void) | ||
92 | { | ||
93 | struct nvs_page *entry; | ||
94 | |||
95 | list_for_each_entry(entry, &nvs_list, node) { | ||
96 | entry->data = (void *)__get_free_page(GFP_KERNEL); | ||
97 | if (!entry->data) { | ||
98 | suspend_nvs_free(); | ||
99 | return -ENOMEM; | ||
100 | } | ||
101 | } | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * suspend_nvs_save - save NVS memory regions | ||
107 | */ | ||
108 | void suspend_nvs_save(void) | ||
109 | { | ||
110 | struct nvs_page *entry; | ||
111 | |||
112 | printk(KERN_INFO "PM: Saving platform NVS memory\n"); | ||
113 | |||
114 | list_for_each_entry(entry, &nvs_list, node) | ||
115 | if (entry->data) { | ||
116 | entry->kaddr = ioremap(entry->phys_start, entry->size); | ||
117 | memcpy(entry->data, entry->kaddr, entry->size); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * suspend_nvs_restore - restore NVS memory regions | ||
123 | * | ||
124 | * This function is going to be called with interrupts disabled, so it | ||
125 | * cannot iounmap the virtual addresses used to access the NVS region. | ||
126 | */ | ||
127 | void suspend_nvs_restore(void) | ||
128 | { | ||
129 | struct nvs_page *entry; | ||
130 | |||
131 | printk(KERN_INFO "PM: Restoring platform NVS memory\n"); | ||
132 | |||
133 | list_for_each_entry(entry, &nvs_list, node) | ||
134 | if (entry->data) | ||
135 | memcpy(entry->kaddr, entry->data, entry->size); | ||
136 | } | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index e50b4c1b2a0f..0cf3a27a6c9d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -22,7 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | #define TIMEOUT (20 * HZ) | 23 | #define TIMEOUT (20 * HZ) |
24 | 24 | ||
25 | static inline int freezeable(struct task_struct * p) | 25 | static inline int freezable(struct task_struct * p) |
26 | { | 26 | { |
27 | if ((p == current) || | 27 | if ((p == current) || |
28 | (p->flags & PF_NOFREEZE) || | 28 | (p->flags & PF_NOFREEZE) || |
@@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only) | |||
53 | todo = 0; | 53 | todo = 0; |
54 | read_lock(&tasklist_lock); | 54 | read_lock(&tasklist_lock); |
55 | do_each_thread(g, p) { | 55 | do_each_thread(g, p) { |
56 | if (frozen(p) || !freezeable(p)) | 56 | if (frozen(p) || !freezable(p)) |
57 | continue; | 57 | continue; |
58 | 58 | ||
59 | if (!freeze_task(p, sig_only)) | 59 | if (!freeze_task(p, sig_only)) |
@@ -64,6 +64,12 @@ static int try_to_freeze_tasks(bool sig_only) | |||
64 | * perturb a task in TASK_STOPPED or TASK_TRACED. | 64 | * perturb a task in TASK_STOPPED or TASK_TRACED. |
65 | * It is "frozen enough". If the task does wake | 65 | * It is "frozen enough". If the task does wake |
66 | * up, it will immediately call try_to_freeze. | 66 | * up, it will immediately call try_to_freeze. |
67 | * | ||
68 | * Because freeze_task() goes through p's | ||
69 | * scheduler lock after setting TIF_FREEZE, it's | ||
70 | * guaranteed that either we see TASK_RUNNING or | ||
71 | * try_to_stop() after schedule() in ptrace/signal | ||
72 | * stop sees TIF_FREEZE. | ||
67 | */ | 73 | */ |
68 | if (!task_is_stopped_or_traced(p) && | 74 | if (!task_is_stopped_or_traced(p) && |
69 | !freezer_should_skip(p)) | 75 | !freezer_should_skip(p)) |
@@ -79,7 +85,7 @@ static int try_to_freeze_tasks(bool sig_only) | |||
79 | if (!todo || time_after(jiffies, end_time)) | 85 | if (!todo || time_after(jiffies, end_time)) |
80 | break; | 86 | break; |
81 | 87 | ||
82 | if (!pm_check_wakeup_events()) { | 88 | if (pm_wakeup_pending()) { |
83 | wakeup = true; | 89 | wakeup = true; |
84 | break; | 90 | break; |
85 | } | 91 | } |
@@ -161,7 +167,7 @@ static void thaw_tasks(bool nosig_only) | |||
161 | 167 | ||
162 | read_lock(&tasklist_lock); | 168 | read_lock(&tasklist_lock); |
163 | do_each_thread(g, p) { | 169 | do_each_thread(g, p) { |
164 | if (!freezeable(p)) | 170 | if (!freezable(p)) |
165 | continue; | 171 | continue; |
166 | 172 | ||
167 | if (nosig_only && should_send_signal(p)) | 173 | if (nosig_only && should_send_signal(p)) |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0dac75ea4456..64db648ff911 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1519,11 +1519,8 @@ static int | |||
1519 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | 1519 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, |
1520 | unsigned int nr_pages, unsigned int nr_highmem) | 1520 | unsigned int nr_pages, unsigned int nr_highmem) |
1521 | { | 1521 | { |
1522 | int error = 0; | ||
1523 | |||
1524 | if (nr_highmem > 0) { | 1522 | if (nr_highmem > 0) { |
1525 | error = get_highmem_buffer(PG_ANY); | 1523 | if (get_highmem_buffer(PG_ANY)) |
1526 | if (error) | ||
1527 | goto err_out; | 1524 | goto err_out; |
1528 | if (nr_highmem > alloc_highmem) { | 1525 | if (nr_highmem > alloc_highmem) { |
1529 | nr_highmem -= alloc_highmem; | 1526 | nr_highmem -= alloc_highmem; |
@@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | |||
1546 | 1543 | ||
1547 | err_out: | 1544 | err_out: |
1548 | swsusp_free(); | 1545 | swsusp_free(); |
1549 | return error; | 1546 | return -ENOMEM; |
1550 | } | 1547 | } |
1551 | 1548 | ||
1552 | asmlinkage int swsusp_save(void) | 1549 | asmlinkage int swsusp_save(void) |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 031d5e3a6197..de6f86bfa303 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -31,13 +31,13 @@ const char *const pm_states[PM_SUSPEND_MAX] = { | |||
31 | [PM_SUSPEND_MEM] = "mem", | 31 | [PM_SUSPEND_MEM] = "mem", |
32 | }; | 32 | }; |
33 | 33 | ||
34 | static struct platform_suspend_ops *suspend_ops; | 34 | static const struct platform_suspend_ops *suspend_ops; |
35 | 35 | ||
36 | /** | 36 | /** |
37 | * suspend_set_ops - Set the global suspend method table. | 37 | * suspend_set_ops - Set the global suspend method table. |
38 | * @ops: Pointer to ops structure. | 38 | * @ops: Pointer to ops structure. |
39 | */ | 39 | */ |
40 | void suspend_set_ops(struct platform_suspend_ops *ops) | 40 | void suspend_set_ops(const struct platform_suspend_ops *ops) |
41 | { | 41 | { |
42 | mutex_lock(&pm_mutex); | 42 | mutex_lock(&pm_mutex); |
43 | suspend_ops = ops; | 43 | suspend_ops = ops; |
@@ -164,7 +164,7 @@ static int suspend_enter(suspend_state_t state) | |||
164 | 164 | ||
165 | error = sysdev_suspend(PMSG_SUSPEND); | 165 | error = sysdev_suspend(PMSG_SUSPEND); |
166 | if (!error) { | 166 | if (!error) { |
167 | if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) { | 167 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { |
168 | error = suspend_ops->enter(state); | 168 | error = suspend_ops->enter(state); |
169 | events_check_enabled = false; | 169 | events_check_enabled = false; |
170 | } | 170 | } |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8c7e4832b9be..7c97c3a0eee3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void) | |||
224 | return res; | 224 | return res; |
225 | 225 | ||
226 | root_swap = res; | 226 | root_swap = res; |
227 | res = blkdev_get(hib_resume_bdev, FMODE_WRITE); | 227 | res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); |
228 | if (res) | 228 | if (res) |
229 | return res; | 229 | return res; |
230 | 230 | ||
@@ -888,7 +888,7 @@ out_finish: | |||
888 | /** | 888 | /** |
889 | * swsusp_read - read the hibernation image. | 889 | * swsusp_read - read the hibernation image. |
890 | * @flags_p: flags passed by the "frozen" kernel in the image header should | 890 | * @flags_p: flags passed by the "frozen" kernel in the image header should |
891 | * be written into this memeory location | 891 | * be written into this memory location |
892 | */ | 892 | */ |
893 | 893 | ||
894 | int swsusp_read(unsigned int *flags_p) | 894 | int swsusp_read(unsigned int *flags_p) |
@@ -930,7 +930,8 @@ int swsusp_check(void) | |||
930 | { | 930 | { |
931 | int error; | 931 | int error; |
932 | 932 | ||
933 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 933 | hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, |
934 | FMODE_READ, NULL); | ||
934 | if (!IS_ERR(hib_resume_bdev)) { | 935 | if (!IS_ERR(hib_resume_bdev)) { |
935 | set_blocksize(hib_resume_bdev, PAGE_SIZE); | 936 | set_blocksize(hib_resume_bdev, PAGE_SIZE); |
936 | clear_page(swsusp_header); | 937 | clear_page(swsusp_header); |
diff --git a/kernel/printk.c b/kernel/printk.c index f64b8997fc76..36231525e22f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/syslog.h> | 39 | #include <linux/syslog.h> |
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/rculist.h> | ||
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | 45 | ||
@@ -96,7 +97,7 @@ static int console_locked, console_suspended; | |||
96 | /* | 97 | /* |
97 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars | 98 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars |
98 | * It is also used in interesting ways to provide interlocking in | 99 | * It is also used in interesting ways to provide interlocking in |
99 | * release_console_sem(). | 100 | * console_unlock();. |
100 | */ | 101 | */ |
101 | static DEFINE_SPINLOCK(logbuf_lock); | 102 | static DEFINE_SPINLOCK(logbuf_lock); |
102 | 103 | ||
@@ -261,25 +262,47 @@ int dmesg_restrict = 1; | |||
261 | int dmesg_restrict; | 262 | int dmesg_restrict; |
262 | #endif | 263 | #endif |
263 | 264 | ||
265 | static int syslog_action_restricted(int type) | ||
266 | { | ||
267 | if (dmesg_restrict) | ||
268 | return 1; | ||
269 | /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ | ||
270 | return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; | ||
271 | } | ||
272 | |||
273 | static int check_syslog_permissions(int type, bool from_file) | ||
274 | { | ||
275 | /* | ||
276 | * If this is from /proc/kmsg and we've already opened it, then we've | ||
277 | * already done the capabilities checks at open time. | ||
278 | */ | ||
279 | if (from_file && type != SYSLOG_ACTION_OPEN) | ||
280 | return 0; | ||
281 | |||
282 | if (syslog_action_restricted(type)) { | ||
283 | if (capable(CAP_SYSLOG)) | ||
284 | return 0; | ||
285 | /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ | ||
286 | if (capable(CAP_SYS_ADMIN)) { | ||
287 | WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " | ||
288 | "but no CAP_SYSLOG (deprecated).\n"); | ||
289 | return 0; | ||
290 | } | ||
291 | return -EPERM; | ||
292 | } | ||
293 | return 0; | ||
294 | } | ||
295 | |||
264 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 296 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
265 | { | 297 | { |
266 | unsigned i, j, limit, count; | 298 | unsigned i, j, limit, count; |
267 | int do_clear = 0; | 299 | int do_clear = 0; |
268 | char c; | 300 | char c; |
269 | int error = 0; | 301 | int error; |
270 | 302 | ||
271 | /* | 303 | error = check_syslog_permissions(type, from_file); |
272 | * If this is from /proc/kmsg we only do the capabilities checks | 304 | if (error) |
273 | * at open time. | 305 | goto out; |
274 | */ | ||
275 | if (type == SYSLOG_ACTION_OPEN || !from_file) { | ||
276 | if (dmesg_restrict && !capable(CAP_SYSLOG)) | ||
277 | goto warn; /* switch to return -EPERM after 2.6.39 */ | ||
278 | if ((type != SYSLOG_ACTION_READ_ALL && | ||
279 | type != SYSLOG_ACTION_SIZE_BUFFER) && | ||
280 | !capable(CAP_SYSLOG)) | ||
281 | goto warn; /* switch to return -EPERM after 2.6.39 */ | ||
282 | } | ||
283 | 306 | ||
284 | error = security_syslog(type); | 307 | error = security_syslog(type); |
285 | if (error) | 308 | if (error) |
@@ -422,12 +445,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
422 | } | 445 | } |
423 | out: | 446 | out: |
424 | return error; | 447 | return error; |
425 | warn: | ||
426 | /* remove after 2.6.39 */ | ||
427 | if (capable(CAP_SYS_ADMIN)) | ||
428 | WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " | ||
429 | "but no CAP_SYSLOG (deprecated and denied).\n"); | ||
430 | return -EPERM; | ||
431 | } | 448 | } |
432 | 449 | ||
433 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | 450 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) |
@@ -500,7 +517,7 @@ static void _call_console_drivers(unsigned start, | |||
500 | /* | 517 | /* |
501 | * Call the console drivers, asking them to write out | 518 | * Call the console drivers, asking them to write out |
502 | * log_buf[start] to log_buf[end - 1]. | 519 | * log_buf[start] to log_buf[end - 1]. |
503 | * The console_sem must be held. | 520 | * The console_lock must be held. |
504 | */ | 521 | */ |
505 | static void call_console_drivers(unsigned start, unsigned end) | 522 | static void call_console_drivers(unsigned start, unsigned end) |
506 | { | 523 | { |
@@ -603,11 +620,11 @@ static int have_callable_console(void) | |||
603 | * | 620 | * |
604 | * This is printk(). It can be called from any context. We want it to work. | 621 | * This is printk(). It can be called from any context. We want it to work. |
605 | * | 622 | * |
606 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and | 623 | * We try to grab the console_lock. If we succeed, it's easy - we log the output and |
607 | * call the console drivers. If we fail to get the semaphore we place the output | 624 | * call the console drivers. If we fail to get the semaphore we place the output |
608 | * into the log buffer and return. The current holder of the console_sem will | 625 | * into the log buffer and return. The current holder of the console_sem will |
609 | * notice the new output in release_console_sem() and will send it to the | 626 | * notice the new output in console_unlock(); and will send it to the |
610 | * consoles before releasing the semaphore. | 627 | * consoles before releasing the lock. |
611 | * | 628 | * |
612 | * One effect of this deferred printing is that code which calls printk() and | 629 | * One effect of this deferred printing is that code which calls printk() and |
613 | * then changes console_loglevel may break. This is because console_loglevel | 630 | * then changes console_loglevel may break. This is because console_loglevel |
@@ -658,19 +675,19 @@ static inline int can_use_console(unsigned int cpu) | |||
658 | /* | 675 | /* |
659 | * Try to get console ownership to actually show the kernel | 676 | * Try to get console ownership to actually show the kernel |
660 | * messages from a 'printk'. Return true (and with the | 677 | * messages from a 'printk'. Return true (and with the |
661 | * console_semaphore held, and 'console_locked' set) if it | 678 | * console_lock held, and 'console_locked' set) if it |
662 | * is successful, false otherwise. | 679 | * is successful, false otherwise. |
663 | * | 680 | * |
664 | * This gets called with the 'logbuf_lock' spinlock held and | 681 | * This gets called with the 'logbuf_lock' spinlock held and |
665 | * interrupts disabled. It should return with 'lockbuf_lock' | 682 | * interrupts disabled. It should return with 'lockbuf_lock' |
666 | * released but interrupts still disabled. | 683 | * released but interrupts still disabled. |
667 | */ | 684 | */ |
668 | static int acquire_console_semaphore_for_printk(unsigned int cpu) | 685 | static int console_trylock_for_printk(unsigned int cpu) |
669 | __releases(&logbuf_lock) | 686 | __releases(&logbuf_lock) |
670 | { | 687 | { |
671 | int retval = 0; | 688 | int retval = 0; |
672 | 689 | ||
673 | if (!try_acquire_console_sem()) { | 690 | if (console_trylock()) { |
674 | retval = 1; | 691 | retval = 1; |
675 | 692 | ||
676 | /* | 693 | /* |
@@ -826,12 +843,12 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
826 | * actual magic (print out buffers, wake up klogd, | 843 | * actual magic (print out buffers, wake up klogd, |
827 | * etc). | 844 | * etc). |
828 | * | 845 | * |
829 | * The acquire_console_semaphore_for_printk() function | 846 | * The console_trylock_for_printk() function |
830 | * will release 'logbuf_lock' regardless of whether it | 847 | * will release 'logbuf_lock' regardless of whether it |
831 | * actually gets the semaphore or not. | 848 | * actually gets the semaphore or not. |
832 | */ | 849 | */ |
833 | if (acquire_console_semaphore_for_printk(this_cpu)) | 850 | if (console_trylock_for_printk(this_cpu)) |
834 | release_console_sem(); | 851 | console_unlock(); |
835 | 852 | ||
836 | lockdep_on(); | 853 | lockdep_on(); |
837 | out_restore_irqs: | 854 | out_restore_irqs: |
@@ -992,7 +1009,7 @@ void suspend_console(void) | |||
992 | if (!console_suspend_enabled) | 1009 | if (!console_suspend_enabled) |
993 | return; | 1010 | return; |
994 | printk("Suspending console(s) (use no_console_suspend to debug)\n"); | 1011 | printk("Suspending console(s) (use no_console_suspend to debug)\n"); |
995 | acquire_console_sem(); | 1012 | console_lock(); |
996 | console_suspended = 1; | 1013 | console_suspended = 1; |
997 | up(&console_sem); | 1014 | up(&console_sem); |
998 | } | 1015 | } |
@@ -1003,7 +1020,7 @@ void resume_console(void) | |||
1003 | return; | 1020 | return; |
1004 | down(&console_sem); | 1021 | down(&console_sem); |
1005 | console_suspended = 0; | 1022 | console_suspended = 0; |
1006 | release_console_sem(); | 1023 | console_unlock(); |
1007 | } | 1024 | } |
1008 | 1025 | ||
1009 | /** | 1026 | /** |
@@ -1026,21 +1043,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self, | |||
1026 | case CPU_DYING: | 1043 | case CPU_DYING: |
1027 | case CPU_DOWN_FAILED: | 1044 | case CPU_DOWN_FAILED: |
1028 | case CPU_UP_CANCELED: | 1045 | case CPU_UP_CANCELED: |
1029 | acquire_console_sem(); | 1046 | console_lock(); |
1030 | release_console_sem(); | 1047 | console_unlock(); |
1031 | } | 1048 | } |
1032 | return NOTIFY_OK; | 1049 | return NOTIFY_OK; |
1033 | } | 1050 | } |
1034 | 1051 | ||
1035 | /** | 1052 | /** |
1036 | * acquire_console_sem - lock the console system for exclusive use. | 1053 | * console_lock - lock the console system for exclusive use. |
1037 | * | 1054 | * |
1038 | * Acquires a semaphore which guarantees that the caller has | 1055 | * Acquires a lock which guarantees that the caller has |
1039 | * exclusive access to the console system and the console_drivers list. | 1056 | * exclusive access to the console system and the console_drivers list. |
1040 | * | 1057 | * |
1041 | * Can sleep, returns nothing. | 1058 | * Can sleep, returns nothing. |
1042 | */ | 1059 | */ |
1043 | void acquire_console_sem(void) | 1060 | void console_lock(void) |
1044 | { | 1061 | { |
1045 | BUG_ON(in_interrupt()); | 1062 | BUG_ON(in_interrupt()); |
1046 | down(&console_sem); | 1063 | down(&console_sem); |
@@ -1049,21 +1066,29 @@ void acquire_console_sem(void) | |||
1049 | console_locked = 1; | 1066 | console_locked = 1; |
1050 | console_may_schedule = 1; | 1067 | console_may_schedule = 1; |
1051 | } | 1068 | } |
1052 | EXPORT_SYMBOL(acquire_console_sem); | 1069 | EXPORT_SYMBOL(console_lock); |
1053 | 1070 | ||
1054 | int try_acquire_console_sem(void) | 1071 | /** |
1072 | * console_trylock - try to lock the console system for exclusive use. | ||
1073 | * | ||
1074 | * Tried to acquire a lock which guarantees that the caller has | ||
1075 | * exclusive access to the console system and the console_drivers list. | ||
1076 | * | ||
1077 | * returns 1 on success, and 0 on failure to acquire the lock. | ||
1078 | */ | ||
1079 | int console_trylock(void) | ||
1055 | { | 1080 | { |
1056 | if (down_trylock(&console_sem)) | 1081 | if (down_trylock(&console_sem)) |
1057 | return -1; | 1082 | return 0; |
1058 | if (console_suspended) { | 1083 | if (console_suspended) { |
1059 | up(&console_sem); | 1084 | up(&console_sem); |
1060 | return -1; | 1085 | return 0; |
1061 | } | 1086 | } |
1062 | console_locked = 1; | 1087 | console_locked = 1; |
1063 | console_may_schedule = 0; | 1088 | console_may_schedule = 0; |
1064 | return 0; | 1089 | return 1; |
1065 | } | 1090 | } |
1066 | EXPORT_SYMBOL(try_acquire_console_sem); | 1091 | EXPORT_SYMBOL(console_trylock); |
1067 | 1092 | ||
1068 | int is_console_locked(void) | 1093 | int is_console_locked(void) |
1069 | { | 1094 | { |
@@ -1094,20 +1119,20 @@ void wake_up_klogd(void) | |||
1094 | } | 1119 | } |
1095 | 1120 | ||
1096 | /** | 1121 | /** |
1097 | * release_console_sem - unlock the console system | 1122 | * console_unlock - unlock the console system |
1098 | * | 1123 | * |
1099 | * Releases the semaphore which the caller holds on the console system | 1124 | * Releases the console_lock which the caller holds on the console system |
1100 | * and the console driver list. | 1125 | * and the console driver list. |
1101 | * | 1126 | * |
1102 | * While the semaphore was held, console output may have been buffered | 1127 | * While the console_lock was held, console output may have been buffered |
1103 | * by printk(). If this is the case, release_console_sem() emits | 1128 | * by printk(). If this is the case, console_unlock(); emits |
1104 | * the output prior to releasing the semaphore. | 1129 | * the output prior to releasing the lock. |
1105 | * | 1130 | * |
1106 | * If there is output waiting for klogd, we wake it up. | 1131 | * If there is output waiting for klogd, we wake it up. |
1107 | * | 1132 | * |
1108 | * release_console_sem() may be called from any context. | 1133 | * console_unlock(); may be called from any context. |
1109 | */ | 1134 | */ |
1110 | void release_console_sem(void) | 1135 | void console_unlock(void) |
1111 | { | 1136 | { |
1112 | unsigned long flags; | 1137 | unsigned long flags; |
1113 | unsigned _con_start, _log_end; | 1138 | unsigned _con_start, _log_end; |
@@ -1140,7 +1165,7 @@ void release_console_sem(void) | |||
1140 | if (wake_klogd) | 1165 | if (wake_klogd) |
1141 | wake_up_klogd(); | 1166 | wake_up_klogd(); |
1142 | } | 1167 | } |
1143 | EXPORT_SYMBOL(release_console_sem); | 1168 | EXPORT_SYMBOL(console_unlock); |
1144 | 1169 | ||
1145 | /** | 1170 | /** |
1146 | * console_conditional_schedule - yield the CPU if required | 1171 | * console_conditional_schedule - yield the CPU if required |
@@ -1149,7 +1174,7 @@ EXPORT_SYMBOL(release_console_sem); | |||
1149 | * if this CPU should yield the CPU to another task, do | 1174 | * if this CPU should yield the CPU to another task, do |
1150 | * so here. | 1175 | * so here. |
1151 | * | 1176 | * |
1152 | * Must be called within acquire_console_sem(). | 1177 | * Must be called within console_lock();. |
1153 | */ | 1178 | */ |
1154 | void __sched console_conditional_schedule(void) | 1179 | void __sched console_conditional_schedule(void) |
1155 | { | 1180 | { |
@@ -1170,14 +1195,14 @@ void console_unblank(void) | |||
1170 | if (down_trylock(&console_sem) != 0) | 1195 | if (down_trylock(&console_sem) != 0) |
1171 | return; | 1196 | return; |
1172 | } else | 1197 | } else |
1173 | acquire_console_sem(); | 1198 | console_lock(); |
1174 | 1199 | ||
1175 | console_locked = 1; | 1200 | console_locked = 1; |
1176 | console_may_schedule = 0; | 1201 | console_may_schedule = 0; |
1177 | for_each_console(c) | 1202 | for_each_console(c) |
1178 | if ((c->flags & CON_ENABLED) && c->unblank) | 1203 | if ((c->flags & CON_ENABLED) && c->unblank) |
1179 | c->unblank(); | 1204 | c->unblank(); |
1180 | release_console_sem(); | 1205 | console_unlock(); |
1181 | } | 1206 | } |
1182 | 1207 | ||
1183 | /* | 1208 | /* |
@@ -1188,7 +1213,7 @@ struct tty_driver *console_device(int *index) | |||
1188 | struct console *c; | 1213 | struct console *c; |
1189 | struct tty_driver *driver = NULL; | 1214 | struct tty_driver *driver = NULL; |
1190 | 1215 | ||
1191 | acquire_console_sem(); | 1216 | console_lock(); |
1192 | for_each_console(c) { | 1217 | for_each_console(c) { |
1193 | if (!c->device) | 1218 | if (!c->device) |
1194 | continue; | 1219 | continue; |
@@ -1196,7 +1221,7 @@ struct tty_driver *console_device(int *index) | |||
1196 | if (driver) | 1221 | if (driver) |
1197 | break; | 1222 | break; |
1198 | } | 1223 | } |
1199 | release_console_sem(); | 1224 | console_unlock(); |
1200 | return driver; | 1225 | return driver; |
1201 | } | 1226 | } |
1202 | 1227 | ||
@@ -1207,17 +1232,17 @@ struct tty_driver *console_device(int *index) | |||
1207 | */ | 1232 | */ |
1208 | void console_stop(struct console *console) | 1233 | void console_stop(struct console *console) |
1209 | { | 1234 | { |
1210 | acquire_console_sem(); | 1235 | console_lock(); |
1211 | console->flags &= ~CON_ENABLED; | 1236 | console->flags &= ~CON_ENABLED; |
1212 | release_console_sem(); | 1237 | console_unlock(); |
1213 | } | 1238 | } |
1214 | EXPORT_SYMBOL(console_stop); | 1239 | EXPORT_SYMBOL(console_stop); |
1215 | 1240 | ||
1216 | void console_start(struct console *console) | 1241 | void console_start(struct console *console) |
1217 | { | 1242 | { |
1218 | acquire_console_sem(); | 1243 | console_lock(); |
1219 | console->flags |= CON_ENABLED; | 1244 | console->flags |= CON_ENABLED; |
1220 | release_console_sem(); | 1245 | console_unlock(); |
1221 | } | 1246 | } |
1222 | EXPORT_SYMBOL(console_start); | 1247 | EXPORT_SYMBOL(console_start); |
1223 | 1248 | ||
@@ -1339,7 +1364,7 @@ void register_console(struct console *newcon) | |||
1339 | * Put this console in the list - keep the | 1364 | * Put this console in the list - keep the |
1340 | * preferred driver at the head of the list. | 1365 | * preferred driver at the head of the list. |
1341 | */ | 1366 | */ |
1342 | acquire_console_sem(); | 1367 | console_lock(); |
1343 | if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { | 1368 | if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { |
1344 | newcon->next = console_drivers; | 1369 | newcon->next = console_drivers; |
1345 | console_drivers = newcon; | 1370 | console_drivers = newcon; |
@@ -1351,14 +1376,14 @@ void register_console(struct console *newcon) | |||
1351 | } | 1376 | } |
1352 | if (newcon->flags & CON_PRINTBUFFER) { | 1377 | if (newcon->flags & CON_PRINTBUFFER) { |
1353 | /* | 1378 | /* |
1354 | * release_console_sem() will print out the buffered messages | 1379 | * console_unlock(); will print out the buffered messages |
1355 | * for us. | 1380 | * for us. |
1356 | */ | 1381 | */ |
1357 | spin_lock_irqsave(&logbuf_lock, flags); | 1382 | spin_lock_irqsave(&logbuf_lock, flags); |
1358 | con_start = log_start; | 1383 | con_start = log_start; |
1359 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1384 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1360 | } | 1385 | } |
1361 | release_console_sem(); | 1386 | console_unlock(); |
1362 | console_sysfs_notify(); | 1387 | console_sysfs_notify(); |
1363 | 1388 | ||
1364 | /* | 1389 | /* |
@@ -1395,7 +1420,7 @@ int unregister_console(struct console *console) | |||
1395 | return braille_unregister_console(console); | 1420 | return braille_unregister_console(console); |
1396 | #endif | 1421 | #endif |
1397 | 1422 | ||
1398 | acquire_console_sem(); | 1423 | console_lock(); |
1399 | if (console_drivers == console) { | 1424 | if (console_drivers == console) { |
1400 | console_drivers=console->next; | 1425 | console_drivers=console->next; |
1401 | res = 0; | 1426 | res = 0; |
@@ -1417,7 +1442,7 @@ int unregister_console(struct console *console) | |||
1417 | if (console_drivers != NULL && console->flags & CON_CONSDEV) | 1442 | if (console_drivers != NULL && console->flags & CON_CONSDEV) |
1418 | console_drivers->flags |= CON_CONSDEV; | 1443 | console_drivers->flags |= CON_CONSDEV; |
1419 | 1444 | ||
1420 | release_console_sem(); | 1445 | console_unlock(); |
1421 | console_sysfs_notify(); | 1446 | console_sysfs_notify(); |
1422 | return res; | 1447 | return res; |
1423 | } | 1448 | } |
@@ -1502,7 +1527,7 @@ int kmsg_dump_register(struct kmsg_dumper *dumper) | |||
1502 | /* Don't allow registering multiple times */ | 1527 | /* Don't allow registering multiple times */ |
1503 | if (!dumper->registered) { | 1528 | if (!dumper->registered) { |
1504 | dumper->registered = 1; | 1529 | dumper->registered = 1; |
1505 | list_add_tail(&dumper->list, &dump_list); | 1530 | list_add_tail_rcu(&dumper->list, &dump_list); |
1506 | err = 0; | 1531 | err = 0; |
1507 | } | 1532 | } |
1508 | spin_unlock_irqrestore(&dump_list_lock, flags); | 1533 | spin_unlock_irqrestore(&dump_list_lock, flags); |
@@ -1526,29 +1551,16 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper) | |||
1526 | spin_lock_irqsave(&dump_list_lock, flags); | 1551 | spin_lock_irqsave(&dump_list_lock, flags); |
1527 | if (dumper->registered) { | 1552 | if (dumper->registered) { |
1528 | dumper->registered = 0; | 1553 | dumper->registered = 0; |
1529 | list_del(&dumper->list); | 1554 | list_del_rcu(&dumper->list); |
1530 | err = 0; | 1555 | err = 0; |
1531 | } | 1556 | } |
1532 | spin_unlock_irqrestore(&dump_list_lock, flags); | 1557 | spin_unlock_irqrestore(&dump_list_lock, flags); |
1558 | synchronize_rcu(); | ||
1533 | 1559 | ||
1534 | return err; | 1560 | return err; |
1535 | } | 1561 | } |
1536 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); | 1562 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); |
1537 | 1563 | ||
1538 | static const char * const kmsg_reasons[] = { | ||
1539 | [KMSG_DUMP_OOPS] = "oops", | ||
1540 | [KMSG_DUMP_PANIC] = "panic", | ||
1541 | [KMSG_DUMP_KEXEC] = "kexec", | ||
1542 | }; | ||
1543 | |||
1544 | static const char *kmsg_to_str(enum kmsg_dump_reason reason) | ||
1545 | { | ||
1546 | if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0) | ||
1547 | return "unknown"; | ||
1548 | |||
1549 | return kmsg_reasons[reason]; | ||
1550 | } | ||
1551 | |||
1552 | /** | 1564 | /** |
1553 | * kmsg_dump - dump kernel log to kernel message dumpers. | 1565 | * kmsg_dump - dump kernel log to kernel message dumpers. |
1554 | * @reason: the reason (oops, panic etc) for dumping | 1566 | * @reason: the reason (oops, panic etc) for dumping |
@@ -1587,13 +1599,9 @@ void kmsg_dump(enum kmsg_dump_reason reason) | |||
1587 | l2 = chars; | 1599 | l2 = chars; |
1588 | } | 1600 | } |
1589 | 1601 | ||
1590 | if (!spin_trylock_irqsave(&dump_list_lock, flags)) { | 1602 | rcu_read_lock(); |
1591 | printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n", | 1603 | list_for_each_entry_rcu(dumper, &dump_list, list) |
1592 | kmsg_to_str(reason)); | ||
1593 | return; | ||
1594 | } | ||
1595 | list_for_each_entry(dumper, &dump_list, list) | ||
1596 | dumper->dump(dumper, reason, s1, l1, s2, l2); | 1604 | dumper->dump(dumper, reason, s1, l1, s2, l2); |
1597 | spin_unlock_irqrestore(&dump_list_lock, flags); | 1605 | rcu_read_unlock(); |
1598 | } | 1606 | } |
1599 | #endif | 1607 | #endif |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 99bbaa3e5b0d..e2302e40b360 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
163 | return !err; | 163 | return !err; |
164 | } | 164 | } |
165 | 165 | ||
166 | int ptrace_attach(struct task_struct *task) | 166 | static int ptrace_attach(struct task_struct *task) |
167 | { | 167 | { |
168 | int retval; | 168 | int retval; |
169 | 169 | ||
@@ -219,7 +219,7 @@ out: | |||
219 | * Performs checks and sets PT_PTRACED. | 219 | * Performs checks and sets PT_PTRACED. |
220 | * Should be used by all ptrace implementations for PTRACE_TRACEME. | 220 | * Should be used by all ptrace implementations for PTRACE_TRACEME. |
221 | */ | 221 | */ |
222 | int ptrace_traceme(void) | 222 | static int ptrace_traceme(void) |
223 | { | 223 | { |
224 | int ret = -EPERM; | 224 | int ret = -EPERM; |
225 | 225 | ||
@@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) | |||
293 | return false; | 293 | return false; |
294 | } | 294 | } |
295 | 295 | ||
296 | int ptrace_detach(struct task_struct *child, unsigned int data) | 296 | static int ptrace_detach(struct task_struct *child, unsigned int data) |
297 | { | 297 | { |
298 | bool dead = false; | 298 | bool dead = false; |
299 | 299 | ||
@@ -313,7 +313,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
313 | child->exit_code = data; | 313 | child->exit_code = data; |
314 | dead = __ptrace_detach(current, child); | 314 | dead = __ptrace_detach(current, child); |
315 | if (!child->exit_state) | 315 | if (!child->exit_state) |
316 | wake_up_process(child); | 316 | wake_up_state(child, TASK_TRACED | TASK_STOPPED); |
317 | } | 317 | } |
318 | write_unlock_irq(&tasklist_lock); | 318 | write_unlock_irq(&tasklist_lock); |
319 | 319 | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 034493724749..0c343b9a46d5 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -189,7 +189,8 @@ static int rcu_kthread(void *arg) | |||
189 | unsigned long flags; | 189 | unsigned long flags; |
190 | 190 | ||
191 | for (;;) { | 191 | for (;;) { |
192 | wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); | 192 | wait_event_interruptible(rcu_kthread_wq, |
193 | have_rcu_kthread_work != 0); | ||
193 | morework = rcu_boost(); | 194 | morework = rcu_boost(); |
194 | local_irq_save(flags); | 195 | local_irq_save(flags); |
195 | work = have_rcu_kthread_work; | 196 | work = have_rcu_kthread_work; |
diff --git a/kernel/sched.c b/kernel/sched.c index a0eb0941fa84..18d38e4ec7ba 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -553,9 +553,6 @@ struct rq { | |||
553 | /* try_to_wake_up() stats */ | 553 | /* try_to_wake_up() stats */ |
554 | unsigned int ttwu_count; | 554 | unsigned int ttwu_count; |
555 | unsigned int ttwu_local; | 555 | unsigned int ttwu_local; |
556 | |||
557 | /* BKL stats */ | ||
558 | unsigned int bkl_count; | ||
559 | #endif | 556 | #endif |
560 | }; | 557 | }; |
561 | 558 | ||
@@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
609 | struct task_group *tg; | 606 | struct task_group *tg; |
610 | struct cgroup_subsys_state *css; | 607 | struct cgroup_subsys_state *css; |
611 | 608 | ||
609 | if (p->flags & PF_EXITING) | ||
610 | return &root_task_group; | ||
611 | |||
612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
613 | lockdep_is_held(&task_rq(p)->lock)); | 613 | lockdep_is_held(&task_rq(p)->lock)); |
614 | tg = container_of(css, struct task_group, css); | 614 | tg = container_of(css, struct task_group, css); |
@@ -2505,7 +2505,7 @@ out: | |||
2505 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2505 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2506 | * @p: the thread to be awakened | 2506 | * @p: the thread to be awakened |
2507 | * | 2507 | * |
2508 | * Put @p on the run-queue if it's not alredy there. The caller must | 2508 | * Put @p on the run-queue if it's not already there. The caller must |
2509 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2509 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2510 | * the current task. this_rq() stays locked over invocation. | 2510 | * the current task. this_rq() stays locked over invocation. |
2511 | */ | 2511 | */ |
@@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
3887 | schedstat_inc(this_rq(), sched_count); | 3887 | schedstat_inc(this_rq(), sched_count); |
3888 | #ifdef CONFIG_SCHEDSTATS | 3888 | #ifdef CONFIG_SCHEDSTATS |
3889 | if (unlikely(prev->lock_depth >= 0)) { | 3889 | if (unlikely(prev->lock_depth >= 0)) { |
3890 | schedstat_inc(this_rq(), bkl_count); | 3890 | schedstat_inc(this_rq(), rq_sched_info.bkl_count); |
3891 | schedstat_inc(prev, sched_info.bkl_count); | 3891 | schedstat_inc(prev, sched_info.bkl_count); |
3892 | } | 3892 | } |
3893 | #endif | 3893 | #endif |
@@ -4871,7 +4871,8 @@ recheck: | |||
4871 | * assigned. | 4871 | * assigned. |
4872 | */ | 4872 | */ |
4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
4874 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | 4874 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
4875 | !task_group_is_autogroup(task_group(p))) { | ||
4875 | __task_rq_unlock(rq); | 4876 | __task_rq_unlock(rq); |
4876 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 4877 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4877 | return -EPERM; | 4878 | return -EPERM; |
@@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
8882 | } | 8883 | } |
8883 | } | 8884 | } |
8884 | 8885 | ||
8886 | static void | ||
8887 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) | ||
8888 | { | ||
8889 | /* | ||
8890 | * cgroup_exit() is called in the copy_process() failure path. | ||
8891 | * Ignore this case since the task hasn't ran yet, this avoids | ||
8892 | * trying to poke a half freed task state from generic code. | ||
8893 | */ | ||
8894 | if (!(task->flags & PF_EXITING)) | ||
8895 | return; | ||
8896 | |||
8897 | sched_move_task(task); | ||
8898 | } | ||
8899 | |||
8885 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8900 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8886 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 8901 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
8887 | u64 shareval) | 8902 | u64 shareval) |
@@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8954 | .destroy = cpu_cgroup_destroy, | 8969 | .destroy = cpu_cgroup_destroy, |
8955 | .can_attach = cpu_cgroup_can_attach, | 8970 | .can_attach = cpu_cgroup_can_attach, |
8956 | .attach = cpu_cgroup_attach, | 8971 | .attach = cpu_cgroup_attach, |
8972 | .exit = cpu_cgroup_exit, | ||
8957 | .populate = cpu_cgroup_populate, | 8973 | .populate = cpu_cgroup_populate, |
8958 | .subsys_id = cpu_cgroup_subsys_id, | 8974 | .subsys_id = cpu_cgroup_subsys_id, |
8959 | .early_init = 1, | 8975 | .early_init = 1, |
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 32a723b8f84c..9fb656283157 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
@@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref) | |||
27 | { | 27 | { |
28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); | 28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); |
29 | 29 | ||
30 | #ifdef CONFIG_RT_GROUP_SCHED | ||
31 | /* We've redirected RT tasks to the root task group... */ | ||
32 | ag->tg->rt_se = NULL; | ||
33 | ag->tg->rt_rq = NULL; | ||
34 | #endif | ||
30 | sched_destroy_group(ag->tg); | 35 | sched_destroy_group(ag->tg); |
31 | } | 36 | } |
32 | 37 | ||
@@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p) | |||
55 | return ag; | 60 | return ag; |
56 | } | 61 | } |
57 | 62 | ||
63 | #ifdef CONFIG_RT_GROUP_SCHED | ||
64 | static void free_rt_sched_group(struct task_group *tg); | ||
65 | #endif | ||
66 | |||
58 | static inline struct autogroup *autogroup_create(void) | 67 | static inline struct autogroup *autogroup_create(void) |
59 | { | 68 | { |
60 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | 69 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); |
@@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void) | |||
72 | init_rwsem(&ag->lock); | 81 | init_rwsem(&ag->lock); |
73 | ag->id = atomic_inc_return(&autogroup_seq_nr); | 82 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
74 | ag->tg = tg; | 83 | ag->tg = tg; |
84 | #ifdef CONFIG_RT_GROUP_SCHED | ||
85 | /* | ||
86 | * Autogroup RT tasks are redirected to the root task group | ||
87 | * so we don't have to move tasks around upon policy change, | ||
88 | * or flail around trying to allocate bandwidth on the fly. | ||
89 | * A bandwidth exception in __sched_setscheduler() allows | ||
90 | * the policy change to proceed. Thereafter, task_group() | ||
91 | * returns &root_task_group, so zero bandwidth is required. | ||
92 | */ | ||
93 | free_rt_sched_group(tg); | ||
94 | tg->rt_se = root_task_group.rt_se; | ||
95 | tg->rt_rq = root_task_group.rt_rq; | ||
96 | #endif | ||
75 | tg->autogroup = ag; | 97 | tg->autogroup = ag; |
76 | 98 | ||
77 | return ag; | 99 | return ag; |
@@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
106 | return true; | 128 | return true; |
107 | } | 129 | } |
108 | 130 | ||
131 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
132 | { | ||
133 | return tg != &root_task_group && tg->autogroup; | ||
134 | } | ||
135 | |||
109 | static inline struct task_group * | 136 | static inline struct task_group * |
110 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 137 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
111 | { | 138 | { |
@@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | |||
231 | #ifdef CONFIG_SCHED_DEBUG | 258 | #ifdef CONFIG_SCHED_DEBUG |
232 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | 259 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) |
233 | { | 260 | { |
261 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
262 | |||
263 | if (!enabled || !tg->autogroup) | ||
264 | return 0; | ||
265 | |||
234 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | 266 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); |
235 | } | 267 | } |
236 | #endif /* CONFIG_SCHED_DEBUG */ | 268 | #endif /* CONFIG_SCHED_DEBUG */ |
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 5358e241cb20..7b859ffe5dad 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
@@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg); | |||
15 | 15 | ||
16 | static inline void autogroup_init(struct task_struct *init_task) { } | 16 | static inline void autogroup_init(struct task_struct *init_task) { } |
17 | static inline void autogroup_free(struct task_group *tg) { } | 17 | static inline void autogroup_free(struct task_group *tg) { } |
18 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
19 | { | ||
20 | return 0; | ||
21 | } | ||
18 | 22 | ||
19 | static inline struct task_group * | 23 | static inline struct task_group * |
20 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 24 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 1dfae3d014b5..eb6cb8edd075 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/kallsyms.h> | 16 | #include <linux/kallsyms.h> |
17 | #include <linux/utsname.h> | 17 | #include <linux/utsname.h> |
18 | 18 | ||
19 | static DEFINE_SPINLOCK(sched_debug_lock); | ||
20 | |||
19 | /* | 21 | /* |
20 | * This allows printing both to /proc/sched_debug and | 22 | * This allows printing both to /proc/sched_debug and |
21 | * to the console | 23 | * to the console |
@@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group | |||
86 | } | 88 | } |
87 | #endif | 89 | #endif |
88 | 90 | ||
91 | #ifdef CONFIG_CGROUP_SCHED | ||
92 | static char group_path[PATH_MAX]; | ||
93 | |||
94 | static char *task_group_path(struct task_group *tg) | ||
95 | { | ||
96 | if (autogroup_path(tg, group_path, PATH_MAX)) | ||
97 | return group_path; | ||
98 | |||
99 | /* | ||
100 | * May be NULL if the underlying cgroup isn't fully-created yet | ||
101 | */ | ||
102 | if (!tg->css.cgroup) { | ||
103 | group_path[0] = '\0'; | ||
104 | return group_path; | ||
105 | } | ||
106 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); | ||
107 | return group_path; | ||
108 | } | ||
109 | #endif | ||
110 | |||
89 | static void | 111 | static void |
90 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | 112 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) |
91 | { | 113 | { |
@@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
108 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", | 130 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", |
109 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); | 131 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); |
110 | #endif | 132 | #endif |
133 | #ifdef CONFIG_CGROUP_SCHED | ||
134 | SEQ_printf(m, " %s", task_group_path(task_group(p))); | ||
135 | #endif | ||
111 | 136 | ||
112 | SEQ_printf(m, "\n"); | 137 | SEQ_printf(m, "\n"); |
113 | } | 138 | } |
@@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
144 | struct sched_entity *last; | 169 | struct sched_entity *last; |
145 | unsigned long flags; | 170 | unsigned long flags; |
146 | 171 | ||
172 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
173 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); | ||
174 | #else | ||
147 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | 175 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); |
176 | #endif | ||
148 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 177 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
149 | SPLIT_NS(cfs_rq->exec_clock)); | 178 | SPLIT_NS(cfs_rq->exec_clock)); |
150 | 179 | ||
@@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
191 | 220 | ||
192 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | 221 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
193 | { | 222 | { |
223 | #ifdef CONFIG_RT_GROUP_SCHED | ||
224 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); | ||
225 | #else | ||
194 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | 226 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); |
227 | #endif | ||
195 | 228 | ||
196 | #define P(x) \ | 229 | #define P(x) \ |
197 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | 230 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) |
@@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running; | |||
212 | static void print_cpu(struct seq_file *m, int cpu) | 245 | static void print_cpu(struct seq_file *m, int cpu) |
213 | { | 246 | { |
214 | struct rq *rq = cpu_rq(cpu); | 247 | struct rq *rq = cpu_rq(cpu); |
248 | unsigned long flags; | ||
215 | 249 | ||
216 | #ifdef CONFIG_X86 | 250 | #ifdef CONFIG_X86 |
217 | { | 251 | { |
@@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
262 | P(ttwu_count); | 296 | P(ttwu_count); |
263 | P(ttwu_local); | 297 | P(ttwu_local); |
264 | 298 | ||
265 | P(bkl_count); | 299 | SEQ_printf(m, " .%-30s: %d\n", "bkl_count", |
300 | rq->rq_sched_info.bkl_count); | ||
266 | 301 | ||
267 | #undef P | 302 | #undef P |
303 | #undef P64 | ||
268 | #endif | 304 | #endif |
305 | spin_lock_irqsave(&sched_debug_lock, flags); | ||
269 | print_cfs_stats(m, cpu); | 306 | print_cfs_stats(m, cpu); |
270 | print_rt_stats(m, cpu); | 307 | print_rt_stats(m, cpu); |
271 | 308 | ||
309 | rcu_read_lock(); | ||
272 | print_rq(m, rq, cpu); | 310 | print_rq(m, rq, cpu); |
311 | rcu_read_unlock(); | ||
312 | spin_unlock_irqrestore(&sched_debug_lock, flags); | ||
273 | } | 313 | } |
274 | 314 | ||
275 | static const char *sched_tunable_scaling_names[] = { | 315 | static const char *sched_tunable_scaling_names[] = { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c62ebae65cf0..0c26e2df450e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
699 | cfs_rq->nr_running--; | 699 | cfs_rq->nr_running--; |
700 | } | 700 | } |
701 | 701 | ||
702 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | 702 | #ifdef CONFIG_FAIR_GROUP_SCHED |
703 | # ifdef CONFIG_SMP | ||
703 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, | 704 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, |
704 | int global_update) | 705 | int global_update) |
705 | { | 706 | { |
@@ -721,10 +722,10 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
721 | u64 now, delta; | 722 | u64 now, delta; |
722 | unsigned long load = cfs_rq->load.weight; | 723 | unsigned long load = cfs_rq->load.weight; |
723 | 724 | ||
724 | if (!cfs_rq) | 725 | if (cfs_rq->tg == &root_task_group) |
725 | return; | 726 | return; |
726 | 727 | ||
727 | now = rq_of(cfs_rq)->clock; | 728 | now = rq_of(cfs_rq)->clock_task; |
728 | delta = now - cfs_rq->load_stamp; | 729 | delta = now - cfs_rq->load_stamp; |
729 | 730 | ||
730 | /* truncate load history at 4 idle periods */ | 731 | /* truncate load history at 4 idle periods */ |
@@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
762 | list_del_leaf_cfs_rq(cfs_rq); | 763 | list_del_leaf_cfs_rq(cfs_rq); |
763 | } | 764 | } |
764 | 765 | ||
766 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | ||
767 | long weight_delta) | ||
768 | { | ||
769 | long load_weight, load, shares; | ||
770 | |||
771 | load = cfs_rq->load.weight + weight_delta; | ||
772 | |||
773 | load_weight = atomic_read(&tg->load_weight); | ||
774 | load_weight -= cfs_rq->load_contribution; | ||
775 | load_weight += load; | ||
776 | |||
777 | shares = (tg->shares * load); | ||
778 | if (load_weight) | ||
779 | shares /= load_weight; | ||
780 | |||
781 | if (shares < MIN_SHARES) | ||
782 | shares = MIN_SHARES; | ||
783 | if (shares > tg->shares) | ||
784 | shares = tg->shares; | ||
785 | |||
786 | return shares; | ||
787 | } | ||
788 | |||
789 | static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
790 | { | ||
791 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | ||
792 | update_cfs_load(cfs_rq, 0); | ||
793 | update_cfs_shares(cfs_rq, 0); | ||
794 | } | ||
795 | } | ||
796 | # else /* CONFIG_SMP */ | ||
797 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | ||
798 | { | ||
799 | } | ||
800 | |||
801 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | ||
802 | long weight_delta) | ||
803 | { | ||
804 | return tg->shares; | ||
805 | } | ||
806 | |||
807 | static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
808 | { | ||
809 | } | ||
810 | # endif /* CONFIG_SMP */ | ||
765 | static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | 811 | static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, |
766 | unsigned long weight) | 812 | unsigned long weight) |
767 | { | 813 | { |
@@ -782,41 +828,20 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | |||
782 | { | 828 | { |
783 | struct task_group *tg; | 829 | struct task_group *tg; |
784 | struct sched_entity *se; | 830 | struct sched_entity *se; |
785 | long load_weight, load, shares; | 831 | long shares; |
786 | |||
787 | if (!cfs_rq) | ||
788 | return; | ||
789 | 832 | ||
790 | tg = cfs_rq->tg; | 833 | tg = cfs_rq->tg; |
791 | se = tg->se[cpu_of(rq_of(cfs_rq))]; | 834 | se = tg->se[cpu_of(rq_of(cfs_rq))]; |
792 | if (!se) | 835 | if (!se) |
793 | return; | 836 | return; |
794 | 837 | #ifndef CONFIG_SMP | |
795 | load = cfs_rq->load.weight + weight_delta; | 838 | if (likely(se->load.weight == tg->shares)) |
796 | 839 | return; | |
797 | load_weight = atomic_read(&tg->load_weight); | 840 | #endif |
798 | load_weight -= cfs_rq->load_contribution; | 841 | shares = calc_cfs_shares(cfs_rq, tg, weight_delta); |
799 | load_weight += load; | ||
800 | |||
801 | shares = (tg->shares * load); | ||
802 | if (load_weight) | ||
803 | shares /= load_weight; | ||
804 | |||
805 | if (shares < MIN_SHARES) | ||
806 | shares = MIN_SHARES; | ||
807 | if (shares > tg->shares) | ||
808 | shares = tg->shares; | ||
809 | 842 | ||
810 | reweight_entity(cfs_rq_of(se), se, shares); | 843 | reweight_entity(cfs_rq_of(se), se, shares); |
811 | } | 844 | } |
812 | |||
813 | static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
814 | { | ||
815 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | ||
816 | update_cfs_load(cfs_rq, 0); | ||
817 | update_cfs_shares(cfs_rq, 0); | ||
818 | } | ||
819 | } | ||
820 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 845 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
821 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | 846 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) |
822 | { | 847 | { |
@@ -1062,6 +1087,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
1062 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1087 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
1063 | s64 delta = curr->vruntime - se->vruntime; | 1088 | s64 delta = curr->vruntime - se->vruntime; |
1064 | 1089 | ||
1090 | if (delta < 0) | ||
1091 | return; | ||
1092 | |||
1065 | if (delta > ideal_runtime) | 1093 | if (delta > ideal_runtime) |
1066 | resched_task(rq_of(cfs_rq)->curr); | 1094 | resched_task(rq_of(cfs_rq)->curr); |
1067 | } | 1095 | } |
@@ -1362,27 +1390,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | |||
1362 | return wl; | 1390 | return wl; |
1363 | 1391 | ||
1364 | for_each_sched_entity(se) { | 1392 | for_each_sched_entity(se) { |
1365 | long S, rw, s, a, b; | 1393 | long lw, w; |
1366 | 1394 | ||
1367 | S = se->my_q->tg->shares; | 1395 | tg = se->my_q->tg; |
1368 | s = se->load.weight; | 1396 | w = se->my_q->load.weight; |
1369 | rw = se->my_q->load.weight; | ||
1370 | 1397 | ||
1371 | a = S*(rw + wl); | 1398 | /* use this cpu's instantaneous contribution */ |
1372 | b = S*rw + s*wg; | 1399 | lw = atomic_read(&tg->load_weight); |
1400 | lw -= se->my_q->load_contribution; | ||
1401 | lw += w + wg; | ||
1373 | 1402 | ||
1374 | wl = s*(a-b); | 1403 | wl += w; |
1375 | 1404 | ||
1376 | if (likely(b)) | 1405 | if (lw > 0 && wl < lw) |
1377 | wl /= b; | 1406 | wl = (wl * tg->shares) / lw; |
1407 | else | ||
1408 | wl = tg->shares; | ||
1378 | 1409 | ||
1379 | /* | 1410 | /* zero point is MIN_SHARES */ |
1380 | * Assume the group is already running and will | 1411 | if (wl < MIN_SHARES) |
1381 | * thus already be accounted for in the weight. | 1412 | wl = MIN_SHARES; |
1382 | * | 1413 | wl -= se->load.weight; |
1383 | * That is, moving shares between CPUs, does not | ||
1384 | * alter the group weight. | ||
1385 | */ | ||
1386 | wg = 0; | 1414 | wg = 0; |
1387 | } | 1415 | } |
1388 | 1416 | ||
@@ -1401,7 +1429,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
1401 | 1429 | ||
1402 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | 1430 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
1403 | { | 1431 | { |
1404 | unsigned long this_load, load; | 1432 | s64 this_load, load; |
1405 | int idx, this_cpu, prev_cpu; | 1433 | int idx, this_cpu, prev_cpu; |
1406 | unsigned long tl_per_task; | 1434 | unsigned long tl_per_task; |
1407 | struct task_group *tg; | 1435 | struct task_group *tg; |
@@ -1440,8 +1468,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
1440 | * Otherwise check if either cpus are near enough in load to allow this | 1468 | * Otherwise check if either cpus are near enough in load to allow this |
1441 | * task to be woken on this_cpu. | 1469 | * task to be woken on this_cpu. |
1442 | */ | 1470 | */ |
1443 | if (this_load) { | 1471 | if (this_load > 0) { |
1444 | unsigned long this_eff_load, prev_eff_load; | 1472 | s64 this_eff_load, prev_eff_load; |
1445 | 1473 | ||
1446 | this_eff_load = 100; | 1474 | this_eff_load = 100; |
1447 | this_eff_load *= power_of(prev_cpu); | 1475 | this_eff_load *= power_of(prev_cpu); |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c914ec747ca6..ad6267714c84 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -625,7 +625,7 @@ static void update_curr_rt(struct rq *rq) | |||
625 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 625 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
626 | u64 delta_exec; | 626 | u64 delta_exec; |
627 | 627 | ||
628 | if (!task_has_rt_policy(curr)) | 628 | if (curr->sched_class != &rt_sched_class) |
629 | return; | 629 | return; |
630 | 630 | ||
631 | delta_exec = rq->clock_task - curr->se.exec_start; | 631 | delta_exec = rq->clock_task - curr->se.exec_start; |
diff --git a/kernel/smp.c b/kernel/smp.c index 12ed8b013e2d..9910744f0856 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | 15 | ||
16 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | ||
16 | static struct { | 17 | static struct { |
17 | struct list_head queue; | 18 | struct list_head queue; |
18 | raw_spinlock_t lock; | 19 | raw_spinlock_t lock; |
@@ -193,23 +194,52 @@ void generic_smp_call_function_interrupt(void) | |||
193 | */ | 194 | */ |
194 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
195 | int refs; | 196 | int refs; |
197 | void (*func) (void *info); | ||
196 | 198 | ||
197 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) | 199 | /* |
200 | * Since we walk the list without any locks, we might | ||
201 | * see an entry that was completed, removed from the | ||
202 | * list and is in the process of being reused. | ||
203 | * | ||
204 | * We must check that the cpu is in the cpumask before | ||
205 | * checking the refs, and both must be set before | ||
206 | * executing the callback on this cpu. | ||
207 | */ | ||
208 | |||
209 | if (!cpumask_test_cpu(cpu, data->cpumask)) | ||
210 | continue; | ||
211 | |||
212 | smp_rmb(); | ||
213 | |||
214 | if (atomic_read(&data->refs) == 0) | ||
198 | continue; | 215 | continue; |
199 | 216 | ||
217 | func = data->csd.func; /* for later warn */ | ||
200 | data->csd.func(data->csd.info); | 218 | data->csd.func(data->csd.info); |
201 | 219 | ||
220 | /* | ||
221 | * If the cpu mask is not still set then it enabled interrupts, | ||
222 | * we took another smp interrupt, and executed the function | ||
223 | * twice on this cpu. In theory that copy decremented refs. | ||
224 | */ | ||
225 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | ||
226 | WARN(1, "%pS enabled interrupts and double executed\n", | ||
227 | func); | ||
228 | continue; | ||
229 | } | ||
230 | |||
202 | refs = atomic_dec_return(&data->refs); | 231 | refs = atomic_dec_return(&data->refs); |
203 | WARN_ON(refs < 0); | 232 | WARN_ON(refs < 0); |
204 | if (!refs) { | ||
205 | raw_spin_lock(&call_function.lock); | ||
206 | list_del_rcu(&data->csd.list); | ||
207 | raw_spin_unlock(&call_function.lock); | ||
208 | } | ||
209 | 233 | ||
210 | if (refs) | 234 | if (refs) |
211 | continue; | 235 | continue; |
212 | 236 | ||
237 | WARN_ON(!cpumask_empty(data->cpumask)); | ||
238 | |||
239 | raw_spin_lock(&call_function.lock); | ||
240 | list_del_rcu(&data->csd.list); | ||
241 | raw_spin_unlock(&call_function.lock); | ||
242 | |||
213 | csd_unlock(&data->csd); | 243 | csd_unlock(&data->csd); |
214 | } | 244 | } |
215 | 245 | ||
@@ -429,7 +459,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
429 | * can't happen. | 459 | * can't happen. |
430 | */ | 460 | */ |
431 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | 461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() |
432 | && !oops_in_progress); | 462 | && !oops_in_progress && !early_boot_irqs_disabled); |
433 | 463 | ||
434 | /* So, what's a CPU they want? Ignoring this one. */ | 464 | /* So, what's a CPU they want? Ignoring this one. */ |
435 | cpu = cpumask_first_and(mask, cpu_online_mask); | 465 | cpu = cpumask_first_and(mask, cpu_online_mask); |
@@ -453,11 +483,21 @@ void smp_call_function_many(const struct cpumask *mask, | |||
453 | 483 | ||
454 | data = &__get_cpu_var(cfd_data); | 484 | data = &__get_cpu_var(cfd_data); |
455 | csd_lock(&data->csd); | 485 | csd_lock(&data->csd); |
486 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | ||
456 | 487 | ||
457 | data->csd.func = func; | 488 | data->csd.func = func; |
458 | data->csd.info = info; | 489 | data->csd.info = info; |
459 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 490 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
460 | cpumask_clear_cpu(this_cpu, data->cpumask); | 491 | cpumask_clear_cpu(this_cpu, data->cpumask); |
492 | |||
493 | /* | ||
494 | * To ensure the interrupt handler gets an complete view | ||
495 | * we order the cpumask and refs writes and order the read | ||
496 | * of them in the interrupt handler. In addition we may | ||
497 | * only clear our own cpu bit from the mask. | ||
498 | */ | ||
499 | smp_wmb(); | ||
500 | |||
461 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); | 501 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); |
462 | 502 | ||
463 | raw_spin_lock_irqsave(&call_function.lock, flags); | 503 | raw_spin_lock_irqsave(&call_function.lock, flags); |
@@ -529,3 +569,24 @@ void ipi_call_unlock_irq(void) | |||
529 | { | 569 | { |
530 | raw_spin_unlock_irq(&call_function.lock); | 570 | raw_spin_unlock_irq(&call_function.lock); |
531 | } | 571 | } |
572 | #endif /* USE_GENERIC_SMP_HELPERS */ | ||
573 | |||
574 | /* | ||
575 | * Call a function on all processors. May be used during early boot while | ||
576 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | ||
577 | * of local_irq_disable/enable(). | ||
578 | */ | ||
579 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | ||
580 | { | ||
581 | unsigned long flags; | ||
582 | int ret = 0; | ||
583 | |||
584 | preempt_disable(); | ||
585 | ret = smp_call_function(func, info, wait); | ||
586 | local_irq_save(flags); | ||
587 | func(info); | ||
588 | local_irq_restore(flags); | ||
589 | preempt_enable(); | ||
590 | return ret; | ||
591 | } | ||
592 | EXPORT_SYMBOL(on_each_cpu); | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 0823778f87fc..68eb5efec388 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -885,25 +885,6 @@ static __init int spawn_ksoftirqd(void) | |||
885 | } | 885 | } |
886 | early_initcall(spawn_ksoftirqd); | 886 | early_initcall(spawn_ksoftirqd); |
887 | 887 | ||
888 | #ifdef CONFIG_SMP | ||
889 | /* | ||
890 | * Call a function on all processors | ||
891 | */ | ||
892 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | ||
893 | { | ||
894 | int ret = 0; | ||
895 | |||
896 | preempt_disable(); | ||
897 | ret = smp_call_function(func, info, wait); | ||
898 | local_irq_disable(); | ||
899 | func(info); | ||
900 | local_irq_enable(); | ||
901 | preempt_enable(); | ||
902 | return ret; | ||
903 | } | ||
904 | EXPORT_SYMBOL(on_each_cpu); | ||
905 | #endif | ||
906 | |||
907 | /* | 888 | /* |
908 | * [ These __weak aliases are kept in a separate compilation unit, so that | 889 | * [ These __weak aliases are kept in a separate compilation unit, so that |
909 | * GCC does not inline them incorrectly. ] | 890 | * GCC does not inline them incorrectly. ] |
diff --git a/kernel/srcu.c b/kernel/srcu.c index 98d8c1e80edb..73ce23feaea9 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -156,6 +156,16 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx) | |||
156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
157 | 157 | ||
158 | /* | 158 | /* |
159 | * We use an adaptive strategy for synchronize_srcu() and especially for | ||
160 | * synchronize_srcu_expedited(). We spin for a fixed time period | ||
161 | * (defined below) to allow SRCU readers to exit their read-side critical | ||
162 | * sections. If there are still some readers after 10 microseconds, | ||
163 | * we repeatedly block for 1-millisecond time periods. This approach | ||
164 | * has done well in testing, so there is no need for a config parameter. | ||
165 | */ | ||
166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 | ||
167 | |||
168 | /* | ||
159 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
160 | */ | 170 | */ |
161 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | 171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) |
@@ -207,11 +217,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
207 | * will have finished executing. We initially give readers | 217 | * will have finished executing. We initially give readers |
208 | * an arbitrarily chosen 10 microseconds to get out of their | 218 | * an arbitrarily chosen 10 microseconds to get out of their |
209 | * SRCU read-side critical sections, then loop waiting 1/HZ | 219 | * SRCU read-side critical sections, then loop waiting 1/HZ |
210 | * seconds per iteration. | 220 | * seconds per iteration. The 10-microsecond value has done |
221 | * very well in testing. | ||
211 | */ | 222 | */ |
212 | 223 | ||
213 | if (srcu_readers_active_idx(sp, idx)) | 224 | if (srcu_readers_active_idx(sp, idx)) |
214 | udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); | 225 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); |
215 | while (srcu_readers_active_idx(sp, idx)) | 226 | while (srcu_readers_active_idx(sp, idx)) |
216 | schedule_timeout_interruptible(1); | 227 | schedule_timeout_interruptible(1); |
217 | 228 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 2745dcdb6c6c..18da702ec813 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/kprobes.h> | 43 | #include <linux/kprobes.h> |
44 | #include <linux/user_namespace.h> | 44 | #include <linux/user_namespace.h> |
45 | 45 | ||
46 | #include <linux/kmsg_dump.h> | ||
47 | |||
46 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
47 | #include <asm/io.h> | 49 | #include <asm/io.h> |
48 | #include <asm/unistd.h> | 50 | #include <asm/unistd.h> |
@@ -285,6 +287,7 @@ out_unlock: | |||
285 | */ | 287 | */ |
286 | void emergency_restart(void) | 288 | void emergency_restart(void) |
287 | { | 289 | { |
290 | kmsg_dump(KMSG_DUMP_EMERG); | ||
288 | machine_emergency_restart(); | 291 | machine_emergency_restart(); |
289 | } | 292 | } |
290 | EXPORT_SYMBOL_GPL(emergency_restart); | 293 | EXPORT_SYMBOL_GPL(emergency_restart); |
@@ -312,6 +315,7 @@ void kernel_restart(char *cmd) | |||
312 | printk(KERN_EMERG "Restarting system.\n"); | 315 | printk(KERN_EMERG "Restarting system.\n"); |
313 | else | 316 | else |
314 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); | 317 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); |
318 | kmsg_dump(KMSG_DUMP_RESTART); | ||
315 | machine_restart(cmd); | 319 | machine_restart(cmd); |
316 | } | 320 | } |
317 | EXPORT_SYMBOL_GPL(kernel_restart); | 321 | EXPORT_SYMBOL_GPL(kernel_restart); |
@@ -333,6 +337,7 @@ void kernel_halt(void) | |||
333 | kernel_shutdown_prepare(SYSTEM_HALT); | 337 | kernel_shutdown_prepare(SYSTEM_HALT); |
334 | sysdev_shutdown(); | 338 | sysdev_shutdown(); |
335 | printk(KERN_EMERG "System halted.\n"); | 339 | printk(KERN_EMERG "System halted.\n"); |
340 | kmsg_dump(KMSG_DUMP_HALT); | ||
336 | machine_halt(); | 341 | machine_halt(); |
337 | } | 342 | } |
338 | 343 | ||
@@ -351,6 +356,7 @@ void kernel_power_off(void) | |||
351 | disable_nonboot_cpus(); | 356 | disable_nonboot_cpus(); |
352 | sysdev_shutdown(); | 357 | sysdev_shutdown(); |
353 | printk(KERN_EMERG "Power down.\n"); | 358 | printk(KERN_EMERG "Power down.\n"); |
359 | kmsg_dump(KMSG_DUMP_POWEROFF); | ||
354 | machine_power_off(); | 360 | machine_power_off(); |
355 | } | 361 | } |
356 | EXPORT_SYMBOL_GPL(kernel_power_off); | 362 | EXPORT_SYMBOL_GPL(kernel_power_off); |
@@ -1379,7 +1385,8 @@ static int check_prlimit_permission(struct task_struct *task) | |||
1379 | const struct cred *cred = current_cred(), *tcred; | 1385 | const struct cred *cred = current_cred(), *tcred; |
1380 | 1386 | ||
1381 | tcred = __task_cred(task); | 1387 | tcred = __task_cred(task); |
1382 | if ((cred->uid != tcred->euid || | 1388 | if (current != task && |
1389 | (cred->uid != tcred->euid || | ||
1383 | cred->uid != tcred->suid || | 1390 | cred->uid != tcred->suid || |
1384 | cred->uid != tcred->uid || | 1391 | cred->uid != tcred->uid || |
1385 | cred->gid != tcred->egid || | 1392 | cred->gid != tcred->egid || |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ae5cbb1e3ced..0f1bd83db985 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/sysctl.h> | 25 | #include <linux/sysctl.h> |
26 | #include <linux/signal.h> | 26 | #include <linux/signal.h> |
27 | #include <linux/printk.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/security.h> | 29 | #include <linux/security.h> |
29 | #include <linux/ctype.h> | 30 | #include <linux/ctype.h> |
@@ -169,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write, | |||
169 | #endif | 170 | #endif |
170 | 171 | ||
171 | #ifdef CONFIG_MAGIC_SYSRQ | 172 | #ifdef CONFIG_MAGIC_SYSRQ |
172 | static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ | 173 | /* Note: sysrq code uses it's own private copy */ |
174 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; | ||
173 | 175 | ||
174 | static int sysrq_sysctl_handler(ctl_table *table, int write, | 176 | static int sysrq_sysctl_handler(ctl_table *table, int write, |
175 | void __user *buffer, size_t *lenp, | 177 | void __user *buffer, size_t *lenp, |
@@ -245,10 +247,6 @@ static struct ctl_table root_table[] = { | |||
245 | .mode = 0555, | 247 | .mode = 0555, |
246 | .child = dev_table, | 248 | .child = dev_table, |
247 | }, | 249 | }, |
248 | /* | ||
249 | * NOTE: do not add new entries to this table unless you have read | ||
250 | * Documentation/sysctl/ctl_unnumbered.txt | ||
251 | */ | ||
252 | { } | 250 | { } |
253 | }; | 251 | }; |
254 | 252 | ||
@@ -710,6 +708,15 @@ static struct ctl_table kern_table[] = { | |||
710 | .extra1 = &zero, | 708 | .extra1 = &zero, |
711 | .extra2 = &one, | 709 | .extra2 = &one, |
712 | }, | 710 | }, |
711 | { | ||
712 | .procname = "kptr_restrict", | ||
713 | .data = &kptr_restrict, | ||
714 | .maxlen = sizeof(int), | ||
715 | .mode = 0644, | ||
716 | .proc_handler = proc_dointvec_minmax, | ||
717 | .extra1 = &zero, | ||
718 | .extra2 = &two, | ||
719 | }, | ||
713 | #endif | 720 | #endif |
714 | { | 721 | { |
715 | .procname = "ngroups_max", | 722 | .procname = "ngroups_max", |
@@ -962,10 +969,6 @@ static struct ctl_table kern_table[] = { | |||
962 | .proc_handler = proc_dointvec, | 969 | .proc_handler = proc_dointvec, |
963 | }, | 970 | }, |
964 | #endif | 971 | #endif |
965 | /* | ||
966 | * NOTE: do not add new entries to this table unless you have read | ||
967 | * Documentation/sysctl/ctl_unnumbered.txt | ||
968 | */ | ||
969 | { } | 972 | { } |
970 | }; | 973 | }; |
971 | 974 | ||
@@ -1326,11 +1329,6 @@ static struct ctl_table vm_table[] = { | |||
1326 | .extra2 = &one, | 1329 | .extra2 = &one, |
1327 | }, | 1330 | }, |
1328 | #endif | 1331 | #endif |
1329 | |||
1330 | /* | ||
1331 | * NOTE: do not add new entries to this table unless you have read | ||
1332 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1333 | */ | ||
1334 | { } | 1332 | { } |
1335 | }; | 1333 | }; |
1336 | 1334 | ||
@@ -1486,10 +1484,6 @@ static struct ctl_table fs_table[] = { | |||
1486 | .proc_handler = &pipe_proc_fn, | 1484 | .proc_handler = &pipe_proc_fn, |
1487 | .extra1 = &pipe_min_size, | 1485 | .extra1 = &pipe_min_size, |
1488 | }, | 1486 | }, |
1489 | /* | ||
1490 | * NOTE: do not add new entries to this table unless you have read | ||
1491 | * Documentation/sysctl/ctl_unnumbered.txt | ||
1492 | */ | ||
1493 | { } | 1487 | { } |
1494 | }; | 1488 | }; |
1495 | 1489 | ||
@@ -2899,7 +2893,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, | |||
2899 | } | 2893 | } |
2900 | } | 2894 | } |
2901 | 2895 | ||
2902 | #else /* CONFIG_PROC_FS */ | 2896 | #else /* CONFIG_PROC_SYSCTL */ |
2903 | 2897 | ||
2904 | int proc_dostring(struct ctl_table *table, int write, | 2898 | int proc_dostring(struct ctl_table *table, int write, |
2905 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2899 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -2951,7 +2945,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | |||
2951 | } | 2945 | } |
2952 | 2946 | ||
2953 | 2947 | ||
2954 | #endif /* CONFIG_PROC_FS */ | 2948 | #endif /* CONFIG_PROC_SYSCTL */ |
2955 | 2949 | ||
2956 | /* | 2950 | /* |
2957 | * No sense putting this after each symbol definition, twice, | 2951 | * No sense putting this after each symbol definition, twice, |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 4b2545a136ff..b875bedf7c9a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -1192,7 +1192,7 @@ static ssize_t bin_dn_node_address(struct file *file, | |||
1192 | 1192 | ||
1193 | buf[result] = '\0'; | 1193 | buf[result] = '\0'; |
1194 | 1194 | ||
1195 | /* Convert the decnet addresss to binary */ | 1195 | /* Convert the decnet address to binary */ |
1196 | result = -EIO; | 1196 | result = -EIO; |
1197 | nodep = strchr(buf, '.') + 1; | 1197 | nodep = strchr(buf, '.') + 1; |
1198 | if (!nodep) | 1198 | if (!nodep) |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 69691eb4b715..3971c6b9d58d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -348,7 +348,7 @@ static int parse(struct nlattr *na, struct cpumask *mask) | |||
348 | return ret; | 348 | return ret; |
349 | } | 349 | } |
350 | 350 | ||
351 | #ifdef CONFIG_IA64 | 351 | #if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
352 | #define TASKSTATS_NEEDS_PADDING 1 | 352 | #define TASKSTATS_NEEDS_PADDING 1 |
353 | #endif | 353 | #endif |
354 | 354 | ||
diff --git a/kernel/time.c b/kernel/time.c index ba9b338d1835..32174359576f 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(current_fs_time); | |||
238 | * Avoid unnecessary multiplications/divisions in the | 238 | * Avoid unnecessary multiplications/divisions in the |
239 | * two most common HZ cases: | 239 | * two most common HZ cases: |
240 | */ | 240 | */ |
241 | unsigned int inline jiffies_to_msecs(const unsigned long j) | 241 | inline unsigned int jiffies_to_msecs(const unsigned long j) |
242 | { | 242 | { |
243 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | 243 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) |
244 | return (MSEC_PER_SEC / HZ) * j; | 244 | return (MSEC_PER_SEC / HZ) * j; |
@@ -254,7 +254,7 @@ unsigned int inline jiffies_to_msecs(const unsigned long j) | |||
254 | } | 254 | } |
255 | EXPORT_SYMBOL(jiffies_to_msecs); | 255 | EXPORT_SYMBOL(jiffies_to_msecs); |
256 | 256 | ||
257 | unsigned int inline jiffies_to_usecs(const unsigned long j) | 257 | inline unsigned int jiffies_to_usecs(const unsigned long j) |
258 | { | 258 | { |
259 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | 259 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) |
260 | return (USEC_PER_SEC / HZ) * j; | 260 | return (USEC_PER_SEC / HZ) * j; |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index df140cd3ea47..6519cf62d9cd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -113,7 +113,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time); | |||
113 | * @shift: pointer to shift variable | 113 | * @shift: pointer to shift variable |
114 | * @from: frequency to convert from | 114 | * @from: frequency to convert from |
115 | * @to: frequency to convert to | 115 | * @to: frequency to convert to |
116 | * @minsec: guaranteed runtime conversion range in seconds | 116 | * @maxsec: guaranteed runtime conversion range in seconds |
117 | * | 117 | * |
118 | * The function evaluates the shift/mult pair for the scaled math | 118 | * The function evaluates the shift/mult pair for the scaled math |
119 | * operations of clocksources and clockevents. | 119 | * operations of clocksources and clockevents. |
@@ -122,7 +122,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time); | |||
122 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock | 122 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock |
123 | * event @to is the counter frequency and @from is NSEC_PER_SEC. | 123 | * event @to is the counter frequency and @from is NSEC_PER_SEC. |
124 | * | 124 | * |
125 | * The @minsec conversion range argument controls the time frame in | 125 | * The @maxsec conversion range argument controls the time frame in |
126 | * seconds which must be covered by the runtime conversion with the | 126 | * seconds which must be covered by the runtime conversion with the |
127 | * calculated mult and shift factors. This guarantees that no 64bit | 127 | * calculated mult and shift factors. This guarantees that no 64bit |
128 | * overflow happens when the input value of the conversion is | 128 | * overflow happens when the input value of the conversion is |
@@ -131,7 +131,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time); | |||
131 | * factors. | 131 | * factors. |
132 | */ | 132 | */ |
133 | void | 133 | void |
134 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) | 134 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) |
135 | { | 135 | { |
136 | u64 tmp; | 136 | u64 tmp; |
137 | u32 sft, sftacc= 32; | 137 | u32 sft, sftacc= 32; |
@@ -140,7 +140,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) | |||
140 | * Calculate the shift factor which is limiting the conversion | 140 | * Calculate the shift factor which is limiting the conversion |
141 | * range: | 141 | * range: |
142 | */ | 142 | */ |
143 | tmp = ((u64)minsec * from) >> 32; | 143 | tmp = ((u64)maxsec * from) >> 32; |
144 | while (tmp) { | 144 | while (tmp) { |
145 | tmp >>=1; | 145 | tmp >>=1; |
146 | sftacc--; | 146 | sftacc--; |
@@ -679,7 +679,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); | |||
679 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | 679 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) |
680 | { | 680 | { |
681 | 681 | ||
682 | /* Intialize mult/shift and max_idle_ns */ | 682 | /* Initialize mult/shift and max_idle_ns */ |
683 | __clocksource_updatefreq_scale(cs, scale, freq); | 683 | __clocksource_updatefreq_scale(cs, scale, freq); |
684 | 684 | ||
685 | /* Add clocksource to the clcoksource list */ | 685 | /* Add clocksource to the clcoksource list */ |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index d2321891538f..5c00242fa921 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/timex.h> | 14 | #include <linux/timex.h> |
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/module.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * NTP timekeeping variables: | 20 | * NTP timekeeping variables: |
@@ -74,6 +75,162 @@ static long time_adjust; | |||
74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ | 75 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ |
75 | static s64 ntp_tick_adj; | 76 | static s64 ntp_tick_adj; |
76 | 77 | ||
78 | #ifdef CONFIG_NTP_PPS | ||
79 | |||
80 | /* | ||
81 | * The following variables are used when a pulse-per-second (PPS) signal | ||
82 | * is available. They establish the engineering parameters of the clock | ||
83 | * discipline loop when controlled by the PPS signal. | ||
84 | */ | ||
85 | #define PPS_VALID 10 /* PPS signal watchdog max (s) */ | ||
86 | #define PPS_POPCORN 4 /* popcorn spike threshold (shift) */ | ||
87 | #define PPS_INTMIN 2 /* min freq interval (s) (shift) */ | ||
88 | #define PPS_INTMAX 8 /* max freq interval (s) (shift) */ | ||
89 | #define PPS_INTCOUNT 4 /* number of consecutive good intervals to | ||
90 | increase pps_shift or consecutive bad | ||
91 | intervals to decrease it */ | ||
92 | #define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */ | ||
93 | |||
94 | static int pps_valid; /* signal watchdog counter */ | ||
95 | static long pps_tf[3]; /* phase median filter */ | ||
96 | static long pps_jitter; /* current jitter (ns) */ | ||
97 | static struct timespec pps_fbase; /* beginning of the last freq interval */ | ||
98 | static int pps_shift; /* current interval duration (s) (shift) */ | ||
99 | static int pps_intcnt; /* interval counter */ | ||
100 | static s64 pps_freq; /* frequency offset (scaled ns/s) */ | ||
101 | static long pps_stabil; /* current stability (scaled ns/s) */ | ||
102 | |||
103 | /* | ||
104 | * PPS signal quality monitors | ||
105 | */ | ||
106 | static long pps_calcnt; /* calibration intervals */ | ||
107 | static long pps_jitcnt; /* jitter limit exceeded */ | ||
108 | static long pps_stbcnt; /* stability limit exceeded */ | ||
109 | static long pps_errcnt; /* calibration errors */ | ||
110 | |||
111 | |||
112 | /* PPS kernel consumer compensates the whole phase error immediately. | ||
113 | * Otherwise, reduce the offset by a fixed factor times the time constant. | ||
114 | */ | ||
115 | static inline s64 ntp_offset_chunk(s64 offset) | ||
116 | { | ||
117 | if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) | ||
118 | return offset; | ||
119 | else | ||
120 | return shift_right(offset, SHIFT_PLL + time_constant); | ||
121 | } | ||
122 | |||
123 | static inline void pps_reset_freq_interval(void) | ||
124 | { | ||
125 | /* the PPS calibration interval may end | ||
126 | surprisingly early */ | ||
127 | pps_shift = PPS_INTMIN; | ||
128 | pps_intcnt = 0; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * pps_clear - Clears the PPS state variables | ||
133 | * | ||
134 | * Must be called while holding a write on the xtime_lock | ||
135 | */ | ||
136 | static inline void pps_clear(void) | ||
137 | { | ||
138 | pps_reset_freq_interval(); | ||
139 | pps_tf[0] = 0; | ||
140 | pps_tf[1] = 0; | ||
141 | pps_tf[2] = 0; | ||
142 | pps_fbase.tv_sec = pps_fbase.tv_nsec = 0; | ||
143 | pps_freq = 0; | ||
144 | } | ||
145 | |||
146 | /* Decrease pps_valid to indicate that another second has passed since | ||
147 | * the last PPS signal. When it reaches 0, indicate that PPS signal is | ||
148 | * missing. | ||
149 | * | ||
150 | * Must be called while holding a write on the xtime_lock | ||
151 | */ | ||
152 | static inline void pps_dec_valid(void) | ||
153 | { | ||
154 | if (pps_valid > 0) | ||
155 | pps_valid--; | ||
156 | else { | ||
157 | time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | | ||
158 | STA_PPSWANDER | STA_PPSERROR); | ||
159 | pps_clear(); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | static inline void pps_set_freq(s64 freq) | ||
164 | { | ||
165 | pps_freq = freq; | ||
166 | } | ||
167 | |||
168 | static inline int is_error_status(int status) | ||
169 | { | ||
170 | return (time_status & (STA_UNSYNC|STA_CLOCKERR)) | ||
171 | /* PPS signal lost when either PPS time or | ||
172 | * PPS frequency synchronization requested | ||
173 | */ | ||
174 | || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) | ||
175 | && !(time_status & STA_PPSSIGNAL)) | ||
176 | /* PPS jitter exceeded when | ||
177 | * PPS time synchronization requested */ | ||
178 | || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) | ||
179 | == (STA_PPSTIME|STA_PPSJITTER)) | ||
180 | /* PPS wander exceeded or calibration error when | ||
181 | * PPS frequency synchronization requested | ||
182 | */ | ||
183 | || ((time_status & STA_PPSFREQ) | ||
184 | && (time_status & (STA_PPSWANDER|STA_PPSERROR))); | ||
185 | } | ||
186 | |||
187 | static inline void pps_fill_timex(struct timex *txc) | ||
188 | { | ||
189 | txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) * | ||
190 | PPM_SCALE_INV, NTP_SCALE_SHIFT); | ||
191 | txc->jitter = pps_jitter; | ||
192 | if (!(time_status & STA_NANO)) | ||
193 | txc->jitter /= NSEC_PER_USEC; | ||
194 | txc->shift = pps_shift; | ||
195 | txc->stabil = pps_stabil; | ||
196 | txc->jitcnt = pps_jitcnt; | ||
197 | txc->calcnt = pps_calcnt; | ||
198 | txc->errcnt = pps_errcnt; | ||
199 | txc->stbcnt = pps_stbcnt; | ||
200 | } | ||
201 | |||
202 | #else /* !CONFIG_NTP_PPS */ | ||
203 | |||
204 | static inline s64 ntp_offset_chunk(s64 offset) | ||
205 | { | ||
206 | return shift_right(offset, SHIFT_PLL + time_constant); | ||
207 | } | ||
208 | |||
209 | static inline void pps_reset_freq_interval(void) {} | ||
210 | static inline void pps_clear(void) {} | ||
211 | static inline void pps_dec_valid(void) {} | ||
212 | static inline void pps_set_freq(s64 freq) {} | ||
213 | |||
214 | static inline int is_error_status(int status) | ||
215 | { | ||
216 | return status & (STA_UNSYNC|STA_CLOCKERR); | ||
217 | } | ||
218 | |||
219 | static inline void pps_fill_timex(struct timex *txc) | ||
220 | { | ||
221 | /* PPS is not implemented, so these are zero */ | ||
222 | txc->ppsfreq = 0; | ||
223 | txc->jitter = 0; | ||
224 | txc->shift = 0; | ||
225 | txc->stabil = 0; | ||
226 | txc->jitcnt = 0; | ||
227 | txc->calcnt = 0; | ||
228 | txc->errcnt = 0; | ||
229 | txc->stbcnt = 0; | ||
230 | } | ||
231 | |||
232 | #endif /* CONFIG_NTP_PPS */ | ||
233 | |||
77 | /* | 234 | /* |
78 | * NTP methods: | 235 | * NTP methods: |
79 | */ | 236 | */ |
@@ -185,6 +342,9 @@ void ntp_clear(void) | |||
185 | 342 | ||
186 | tick_length = tick_length_base; | 343 | tick_length = tick_length_base; |
187 | time_offset = 0; | 344 | time_offset = 0; |
345 | |||
346 | /* Clear PPS state variables */ | ||
347 | pps_clear(); | ||
188 | } | 348 | } |
189 | 349 | ||
190 | /* | 350 | /* |
@@ -250,16 +410,16 @@ void second_overflow(void) | |||
250 | time_status |= STA_UNSYNC; | 410 | time_status |= STA_UNSYNC; |
251 | } | 411 | } |
252 | 412 | ||
253 | /* | 413 | /* Compute the phase adjustment for the next second */ |
254 | * Compute the phase adjustment for the next second. The offset is | ||
255 | * reduced by a fixed factor times the time constant. | ||
256 | */ | ||
257 | tick_length = tick_length_base; | 414 | tick_length = tick_length_base; |
258 | 415 | ||
259 | delta = shift_right(time_offset, SHIFT_PLL + time_constant); | 416 | delta = ntp_offset_chunk(time_offset); |
260 | time_offset -= delta; | 417 | time_offset -= delta; |
261 | tick_length += delta; | 418 | tick_length += delta; |
262 | 419 | ||
420 | /* Check PPS signal */ | ||
421 | pps_dec_valid(); | ||
422 | |||
263 | if (!time_adjust) | 423 | if (!time_adjust) |
264 | return; | 424 | return; |
265 | 425 | ||
@@ -369,6 +529,8 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | |||
369 | if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { | 529 | if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { |
370 | time_state = TIME_OK; | 530 | time_state = TIME_OK; |
371 | time_status = STA_UNSYNC; | 531 | time_status = STA_UNSYNC; |
532 | /* restart PPS frequency calibration */ | ||
533 | pps_reset_freq_interval(); | ||
372 | } | 534 | } |
373 | 535 | ||
374 | /* | 536 | /* |
@@ -418,6 +580,8 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts | |||
418 | time_freq = txc->freq * PPM_SCALE; | 580 | time_freq = txc->freq * PPM_SCALE; |
419 | time_freq = min(time_freq, MAXFREQ_SCALED); | 581 | time_freq = min(time_freq, MAXFREQ_SCALED); |
420 | time_freq = max(time_freq, -MAXFREQ_SCALED); | 582 | time_freq = max(time_freq, -MAXFREQ_SCALED); |
583 | /* update pps_freq */ | ||
584 | pps_set_freq(time_freq); | ||
421 | } | 585 | } |
422 | 586 | ||
423 | if (txc->modes & ADJ_MAXERROR) | 587 | if (txc->modes & ADJ_MAXERROR) |
@@ -508,7 +672,8 @@ int do_adjtimex(struct timex *txc) | |||
508 | } | 672 | } |
509 | 673 | ||
510 | result = time_state; /* mostly `TIME_OK' */ | 674 | result = time_state; /* mostly `TIME_OK' */ |
511 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) | 675 | /* check for errors */ |
676 | if (is_error_status(time_status)) | ||
512 | result = TIME_ERROR; | 677 | result = TIME_ERROR; |
513 | 678 | ||
514 | txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * | 679 | txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * |
@@ -522,15 +687,8 @@ int do_adjtimex(struct timex *txc) | |||
522 | txc->tick = tick_usec; | 687 | txc->tick = tick_usec; |
523 | txc->tai = time_tai; | 688 | txc->tai = time_tai; |
524 | 689 | ||
525 | /* PPS is not implemented, so these are zero */ | 690 | /* fill PPS status fields */ |
526 | txc->ppsfreq = 0; | 691 | pps_fill_timex(txc); |
527 | txc->jitter = 0; | ||
528 | txc->shift = 0; | ||
529 | txc->stabil = 0; | ||
530 | txc->jitcnt = 0; | ||
531 | txc->calcnt = 0; | ||
532 | txc->errcnt = 0; | ||
533 | txc->stbcnt = 0; | ||
534 | 692 | ||
535 | write_sequnlock_irq(&xtime_lock); | 693 | write_sequnlock_irq(&xtime_lock); |
536 | 694 | ||
@@ -544,6 +702,243 @@ int do_adjtimex(struct timex *txc) | |||
544 | return result; | 702 | return result; |
545 | } | 703 | } |
546 | 704 | ||
705 | #ifdef CONFIG_NTP_PPS | ||
706 | |||
707 | /* actually struct pps_normtime is good old struct timespec, but it is | ||
708 | * semantically different (and it is the reason why it was invented): | ||
709 | * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] | ||
710 | * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */ | ||
711 | struct pps_normtime { | ||
712 | __kernel_time_t sec; /* seconds */ | ||
713 | long nsec; /* nanoseconds */ | ||
714 | }; | ||
715 | |||
716 | /* normalize the timestamp so that nsec is in the | ||
717 | ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */ | ||
718 | static inline struct pps_normtime pps_normalize_ts(struct timespec ts) | ||
719 | { | ||
720 | struct pps_normtime norm = { | ||
721 | .sec = ts.tv_sec, | ||
722 | .nsec = ts.tv_nsec | ||
723 | }; | ||
724 | |||
725 | if (norm.nsec > (NSEC_PER_SEC >> 1)) { | ||
726 | norm.nsec -= NSEC_PER_SEC; | ||
727 | norm.sec++; | ||
728 | } | ||
729 | |||
730 | return norm; | ||
731 | } | ||
732 | |||
733 | /* get current phase correction and jitter */ | ||
734 | static inline long pps_phase_filter_get(long *jitter) | ||
735 | { | ||
736 | *jitter = pps_tf[0] - pps_tf[1]; | ||
737 | if (*jitter < 0) | ||
738 | *jitter = -*jitter; | ||
739 | |||
740 | /* TODO: test various filters */ | ||
741 | return pps_tf[0]; | ||
742 | } | ||
743 | |||
744 | /* add the sample to the phase filter */ | ||
745 | static inline void pps_phase_filter_add(long err) | ||
746 | { | ||
747 | pps_tf[2] = pps_tf[1]; | ||
748 | pps_tf[1] = pps_tf[0]; | ||
749 | pps_tf[0] = err; | ||
750 | } | ||
751 | |||
752 | /* decrease frequency calibration interval length. | ||
753 | * It is halved after four consecutive unstable intervals. | ||
754 | */ | ||
755 | static inline void pps_dec_freq_interval(void) | ||
756 | { | ||
757 | if (--pps_intcnt <= -PPS_INTCOUNT) { | ||
758 | pps_intcnt = -PPS_INTCOUNT; | ||
759 | if (pps_shift > PPS_INTMIN) { | ||
760 | pps_shift--; | ||
761 | pps_intcnt = 0; | ||
762 | } | ||
763 | } | ||
764 | } | ||
765 | |||
766 | /* increase frequency calibration interval length. | ||
767 | * It is doubled after four consecutive stable intervals. | ||
768 | */ | ||
769 | static inline void pps_inc_freq_interval(void) | ||
770 | { | ||
771 | if (++pps_intcnt >= PPS_INTCOUNT) { | ||
772 | pps_intcnt = PPS_INTCOUNT; | ||
773 | if (pps_shift < PPS_INTMAX) { | ||
774 | pps_shift++; | ||
775 | pps_intcnt = 0; | ||
776 | } | ||
777 | } | ||
778 | } | ||
779 | |||
780 | /* update clock frequency based on MONOTONIC_RAW clock PPS signal | ||
781 | * timestamps | ||
782 | * | ||
783 | * At the end of the calibration interval the difference between the | ||
784 | * first and last MONOTONIC_RAW clock timestamps divided by the length | ||
785 | * of the interval becomes the frequency update. If the interval was | ||
786 | * too long, the data are discarded. | ||
787 | * Returns the difference between old and new frequency values. | ||
788 | */ | ||
789 | static long hardpps_update_freq(struct pps_normtime freq_norm) | ||
790 | { | ||
791 | long delta, delta_mod; | ||
792 | s64 ftemp; | ||
793 | |||
794 | /* check if the frequency interval was too long */ | ||
795 | if (freq_norm.sec > (2 << pps_shift)) { | ||
796 | time_status |= STA_PPSERROR; | ||
797 | pps_errcnt++; | ||
798 | pps_dec_freq_interval(); | ||
799 | pr_err("hardpps: PPSERROR: interval too long - %ld s\n", | ||
800 | freq_norm.sec); | ||
801 | return 0; | ||
802 | } | ||
803 | |||
804 | /* here the raw frequency offset and wander (stability) is | ||
805 | * calculated. If the wander is less than the wander threshold | ||
806 | * the interval is increased; otherwise it is decreased. | ||
807 | */ | ||
808 | ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT, | ||
809 | freq_norm.sec); | ||
810 | delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT); | ||
811 | pps_freq = ftemp; | ||
812 | if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { | ||
813 | pr_warning("hardpps: PPSWANDER: change=%ld\n", delta); | ||
814 | time_status |= STA_PPSWANDER; | ||
815 | pps_stbcnt++; | ||
816 | pps_dec_freq_interval(); | ||
817 | } else { /* good sample */ | ||
818 | pps_inc_freq_interval(); | ||
819 | } | ||
820 | |||
821 | /* the stability metric is calculated as the average of recent | ||
822 | * frequency changes, but is used only for performance | ||
823 | * monitoring | ||
824 | */ | ||
825 | delta_mod = delta; | ||
826 | if (delta_mod < 0) | ||
827 | delta_mod = -delta_mod; | ||
828 | pps_stabil += (div_s64(((s64)delta_mod) << | ||
829 | (NTP_SCALE_SHIFT - SHIFT_USEC), | ||
830 | NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN; | ||
831 | |||
832 | /* if enabled, the system clock frequency is updated */ | ||
833 | if ((time_status & STA_PPSFREQ) != 0 && | ||
834 | (time_status & STA_FREQHOLD) == 0) { | ||
835 | time_freq = pps_freq; | ||
836 | ntp_update_frequency(); | ||
837 | } | ||
838 | |||
839 | return delta; | ||
840 | } | ||
841 | |||
842 | /* correct REALTIME clock phase error against PPS signal */ | ||
843 | static void hardpps_update_phase(long error) | ||
844 | { | ||
845 | long correction = -error; | ||
846 | long jitter; | ||
847 | |||
848 | /* add the sample to the median filter */ | ||
849 | pps_phase_filter_add(correction); | ||
850 | correction = pps_phase_filter_get(&jitter); | ||
851 | |||
852 | /* Nominal jitter is due to PPS signal noise. If it exceeds the | ||
853 | * threshold, the sample is discarded; otherwise, if so enabled, | ||
854 | * the time offset is updated. | ||
855 | */ | ||
856 | if (jitter > (pps_jitter << PPS_POPCORN)) { | ||
857 | pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", | ||
858 | jitter, (pps_jitter << PPS_POPCORN)); | ||
859 | time_status |= STA_PPSJITTER; | ||
860 | pps_jitcnt++; | ||
861 | } else if (time_status & STA_PPSTIME) { | ||
862 | /* correct the time using the phase offset */ | ||
863 | time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT, | ||
864 | NTP_INTERVAL_FREQ); | ||
865 | /* cancel running adjtime() */ | ||
866 | time_adjust = 0; | ||
867 | } | ||
868 | /* update jitter */ | ||
869 | pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN; | ||
870 | } | ||
871 | |||
872 | /* | ||
873 | * hardpps() - discipline CPU clock oscillator to external PPS signal | ||
874 | * | ||
875 | * This routine is called at each PPS signal arrival in order to | ||
876 | * discipline the CPU clock oscillator to the PPS signal. It takes two | ||
877 | * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former | ||
878 | * is used to correct clock phase error and the latter is used to | ||
879 | * correct the frequency. | ||
880 | * | ||
881 | * This code is based on David Mills's reference nanokernel | ||
882 | * implementation. It was mostly rewritten but keeps the same idea. | ||
883 | */ | ||
884 | void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | ||
885 | { | ||
886 | struct pps_normtime pts_norm, freq_norm; | ||
887 | unsigned long flags; | ||
888 | |||
889 | pts_norm = pps_normalize_ts(*phase_ts); | ||
890 | |||
891 | write_seqlock_irqsave(&xtime_lock, flags); | ||
892 | |||
893 | /* clear the error bits, they will be set again if needed */ | ||
894 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); | ||
895 | |||
896 | /* indicate signal presence */ | ||
897 | time_status |= STA_PPSSIGNAL; | ||
898 | pps_valid = PPS_VALID; | ||
899 | |||
900 | /* when called for the first time, | ||
901 | * just start the frequency interval */ | ||
902 | if (unlikely(pps_fbase.tv_sec == 0)) { | ||
903 | pps_fbase = *raw_ts; | ||
904 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
905 | return; | ||
906 | } | ||
907 | |||
908 | /* ok, now we have a base for frequency calculation */ | ||
909 | freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase)); | ||
910 | |||
911 | /* check that the signal is in the range | ||
912 | * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */ | ||
913 | if ((freq_norm.sec == 0) || | ||
914 | (freq_norm.nsec > MAXFREQ * freq_norm.sec) || | ||
915 | (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) { | ||
916 | time_status |= STA_PPSJITTER; | ||
917 | /* restart the frequency calibration interval */ | ||
918 | pps_fbase = *raw_ts; | ||
919 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
920 | pr_err("hardpps: PPSJITTER: bad pulse\n"); | ||
921 | return; | ||
922 | } | ||
923 | |||
924 | /* signal is ok */ | ||
925 | |||
926 | /* check if the current frequency interval is finished */ | ||
927 | if (freq_norm.sec >= (1 << pps_shift)) { | ||
928 | pps_calcnt++; | ||
929 | /* restart the frequency calibration interval */ | ||
930 | pps_fbase = *raw_ts; | ||
931 | hardpps_update_freq(freq_norm); | ||
932 | } | ||
933 | |||
934 | hardpps_update_phase(pts_norm.nsec); | ||
935 | |||
936 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
937 | } | ||
938 | EXPORT_SYMBOL(hardpps); | ||
939 | |||
940 | #endif /* CONFIG_NTP_PPS */ | ||
941 | |||
547 | static int __init ntp_tick_adj_setup(char *str) | 942 | static int __init ntp_tick_adj_setup(char *str) |
548 | { | 943 | { |
549 | ntp_tick_adj = simple_strtol(str, NULL, 0); | 944 | ntp_tick_adj = simple_strtol(str, NULL, 0); |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 48b2761b5668..a3b5aff62606 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -600,4 +600,14 @@ int tick_broadcast_oneshot_active(void) | |||
600 | return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; | 600 | return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; |
601 | } | 601 | } |
602 | 602 | ||
603 | /* | ||
604 | * Check whether the broadcast device supports oneshot. | ||
605 | */ | ||
606 | bool tick_broadcast_oneshot_available(void) | ||
607 | { | ||
608 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | ||
609 | |||
610 | return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; | ||
611 | } | ||
612 | |||
603 | #endif | 613 | #endif |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 051bc80a0c43..ed228ef6f6b8 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -51,7 +51,11 @@ int tick_is_oneshot_available(void) | |||
51 | { | 51 | { |
52 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | 52 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); |
53 | 53 | ||
54 | return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); | 54 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) |
55 | return 0; | ||
56 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
57 | return 1; | ||
58 | return tick_broadcast_oneshot_available(); | ||
55 | } | 59 | } |
56 | 60 | ||
57 | /* | 61 | /* |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 290eefbc1f60..f65d3a723a64 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
@@ -36,6 +36,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | |||
36 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); | 36 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); |
37 | extern int tick_broadcast_oneshot_active(void); | 37 | extern int tick_broadcast_oneshot_active(void); |
38 | extern void tick_check_oneshot_broadcast(int cpu); | 38 | extern void tick_check_oneshot_broadcast(int cpu); |
39 | bool tick_broadcast_oneshot_available(void); | ||
39 | # else /* BROADCAST */ | 40 | # else /* BROADCAST */ |
40 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | 41 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
41 | { | 42 | { |
@@ -46,6 +47,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { } | |||
46 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | 47 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } |
47 | static inline int tick_broadcast_oneshot_active(void) { return 0; } | 48 | static inline int tick_broadcast_oneshot_active(void) { return 0; } |
48 | static inline void tick_check_oneshot_broadcast(int cpu) { } | 49 | static inline void tick_check_oneshot_broadcast(int cpu) { } |
50 | static inline bool tick_broadcast_oneshot_available(void) { return true; } | ||
49 | # endif /* !BROADCAST */ | 51 | # endif /* !BROADCAST */ |
50 | 52 | ||
51 | #else /* !ONESHOT */ | 53 | #else /* !ONESHOT */ |
@@ -76,6 +78,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | |||
76 | return 0; | 78 | return 0; |
77 | } | 79 | } |
78 | static inline int tick_broadcast_oneshot_active(void) { return 0; } | 80 | static inline int tick_broadcast_oneshot_active(void) { return 0; } |
81 | static inline bool tick_broadcast_oneshot_available(void) { return false; } | ||
79 | #endif /* !TICK_ONESHOT */ | 82 | #endif /* !TICK_ONESHOT */ |
80 | 83 | ||
81 | /* | 84 | /* |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3e216e01bbd1..c55ea2433471 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
642 | } | 642 | } |
643 | local_irq_enable(); | 643 | local_irq_enable(); |
644 | 644 | ||
645 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", | 645 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); |
646 | smp_processor_id()); | ||
647 | } | 646 | } |
648 | 647 | ||
649 | /* | 648 | /* |
@@ -795,8 +794,10 @@ void tick_setup_sched_timer(void) | |||
795 | } | 794 | } |
796 | 795 | ||
797 | #ifdef CONFIG_NO_HZ | 796 | #ifdef CONFIG_NO_HZ |
798 | if (tick_nohz_enabled) | 797 | if (tick_nohz_enabled) { |
799 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 798 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
799 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); | ||
800 | } | ||
800 | #endif | 801 | #endif |
801 | } | 802 | } |
802 | #endif /* HIGH_RES_TIMERS */ | 803 | #endif /* HIGH_RES_TIMERS */ |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5bb86da82003..d27c7562902c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -49,7 +49,7 @@ struct timekeeper { | |||
49 | u32 mult; | 49 | u32 mult; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | struct timekeeper timekeeper; | 52 | static struct timekeeper timekeeper; |
53 | 53 | ||
54 | /** | 54 | /** |
55 | * timekeeper_setup_internals - Set up internals to use clocksource clock. | 55 | * timekeeper_setup_internals - Set up internals to use clocksource clock. |
@@ -164,7 +164,7 @@ static struct timespec total_sleep_time; | |||
164 | /* | 164 | /* |
165 | * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. | 165 | * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. |
166 | */ | 166 | */ |
167 | struct timespec raw_time; | 167 | static struct timespec raw_time; |
168 | 168 | ||
169 | /* flag for if timekeeping is suspended */ | 169 | /* flag for if timekeeping is suspended */ |
170 | int __read_mostly timekeeping_suspended; | 170 | int __read_mostly timekeeping_suspended; |
@@ -288,6 +288,49 @@ void ktime_get_ts(struct timespec *ts) | |||
288 | } | 288 | } |
289 | EXPORT_SYMBOL_GPL(ktime_get_ts); | 289 | EXPORT_SYMBOL_GPL(ktime_get_ts); |
290 | 290 | ||
291 | #ifdef CONFIG_NTP_PPS | ||
292 | |||
293 | /** | ||
294 | * getnstime_raw_and_real - get day and raw monotonic time in timespec format | ||
295 | * @ts_raw: pointer to the timespec to be set to raw monotonic time | ||
296 | * @ts_real: pointer to the timespec to be set to the time of day | ||
297 | * | ||
298 | * This function reads both the time of day and raw monotonic time at the | ||
299 | * same time atomically and stores the resulting timestamps in timespec | ||
300 | * format. | ||
301 | */ | ||
302 | void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) | ||
303 | { | ||
304 | unsigned long seq; | ||
305 | s64 nsecs_raw, nsecs_real; | ||
306 | |||
307 | WARN_ON_ONCE(timekeeping_suspended); | ||
308 | |||
309 | do { | ||
310 | u32 arch_offset; | ||
311 | |||
312 | seq = read_seqbegin(&xtime_lock); | ||
313 | |||
314 | *ts_raw = raw_time; | ||
315 | *ts_real = xtime; | ||
316 | |||
317 | nsecs_raw = timekeeping_get_ns_raw(); | ||
318 | nsecs_real = timekeeping_get_ns(); | ||
319 | |||
320 | /* If arch requires, add in gettimeoffset() */ | ||
321 | arch_offset = arch_gettimeoffset(); | ||
322 | nsecs_raw += arch_offset; | ||
323 | nsecs_real += arch_offset; | ||
324 | |||
325 | } while (read_seqretry(&xtime_lock, seq)); | ||
326 | |||
327 | timespec_add_ns(ts_raw, nsecs_raw); | ||
328 | timespec_add_ns(ts_real, nsecs_real); | ||
329 | } | ||
330 | EXPORT_SYMBOL(getnstime_raw_and_real); | ||
331 | |||
332 | #endif /* CONFIG_NTP_PPS */ | ||
333 | |||
291 | /** | 334 | /** |
292 | * do_gettimeofday - Returns the time of day in a timeval | 335 | * do_gettimeofday - Returns the time of day in a timeval |
293 | * @tv: pointer to the timeval to be set | 336 | * @tv: pointer to the timeval to be set |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 32a19f9397fc..3258455549f4 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -41,7 +41,7 @@ static void print_name_offset(struct seq_file *m, void *sym) | |||
41 | char symname[KSYM_NAME_LEN]; | 41 | char symname[KSYM_NAME_LEN]; |
42 | 42 | ||
43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) | 43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) |
44 | SEQ_printf(m, "<%p>", sym); | 44 | SEQ_printf(m, "<%pK>", sym); |
45 | else | 45 | else |
46 | SEQ_printf(m, "%s", symname); | 46 | SEQ_printf(m, "%s", symname); |
47 | } | 47 | } |
@@ -112,7 +112,7 @@ next_one: | |||
112 | static void | 112 | static void |
113 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | 113 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) |
114 | { | 114 | { |
115 | SEQ_printf(m, " .base: %p\n", base); | 115 | SEQ_printf(m, " .base: %pK\n", base); |
116 | SEQ_printf(m, " .index: %d\n", | 116 | SEQ_printf(m, " .index: %d\n", |
117 | base->index); | 117 | base->index); |
118 | SEQ_printf(m, " .resolution: %Lu nsecs\n", | 118 | SEQ_printf(m, " .resolution: %Lu nsecs\n", |
diff --git a/kernel/timer.c b/kernel/timer.c index 43ca9936f2d0..d6459923d245 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -959,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); | |||
959 | * | 959 | * |
960 | * Synchronization rules: Callers must prevent restarting of the timer, | 960 | * Synchronization rules: Callers must prevent restarting of the timer, |
961 | * otherwise this function is meaningless. It must not be called from | 961 | * otherwise this function is meaningless. It must not be called from |
962 | * hardirq contexts. The caller must not hold locks which would prevent | 962 | * interrupt contexts. The caller must not hold locks which would prevent |
963 | * completion of the timer's handler. The timer's handler must not call | 963 | * completion of the timer's handler. The timer's handler must not call |
964 | * add_timer_on(). Upon exit the timer is not queued and the handler is | 964 | * add_timer_on(). Upon exit the timer is not queued and the handler is |
965 | * not running on any CPU. | 965 | * not running on any CPU. |
@@ -969,10 +969,12 @@ EXPORT_SYMBOL(try_to_del_timer_sync); | |||
969 | int del_timer_sync(struct timer_list *timer) | 969 | int del_timer_sync(struct timer_list *timer) |
970 | { | 970 | { |
971 | #ifdef CONFIG_LOCKDEP | 971 | #ifdef CONFIG_LOCKDEP |
972 | local_bh_disable(); | 972 | unsigned long flags; |
973 | |||
974 | local_irq_save(flags); | ||
973 | lock_map_acquire(&timer->lockdep_map); | 975 | lock_map_acquire(&timer->lockdep_map); |
974 | lock_map_release(&timer->lockdep_map); | 976 | lock_map_release(&timer->lockdep_map); |
975 | local_bh_enable(); | 977 | local_irq_restore(flags); |
976 | #endif | 978 | #endif |
977 | /* | 979 | /* |
978 | * don't use it in hardirq context, because it | 980 | * don't use it in hardirq context, because it |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 53f338190b26..761c510a06c5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o | |||
52 | endif | 52 | endif |
53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
55 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 55 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o |
56 | ifeq ($(CONFIG_TRACING),y) | 56 | ifeq ($(CONFIG_TRACING),y) |
57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o | 57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o |
58 | endif | 58 | endif |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7b8ec0281548..cbafed7d4f38 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) | |||
138 | !blk_tracer_enabled)) | 138 | !blk_tracer_enabled)) |
139 | return; | 139 | return; |
140 | 140 | ||
141 | /* | ||
142 | * If the BLK_TC_NOTIFY action mask isn't set, don't send any note | ||
143 | * message to the trace. | ||
144 | */ | ||
145 | if (!(bt->act_mask & BLK_TC_NOTIFY)) | ||
146 | return; | ||
147 | |||
141 | local_irq_save(flags); | 148 | local_irq_save(flags); |
142 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); | 149 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); |
143 | va_start(args, fmt); | 150 | va_start(args, fmt); |
@@ -758,53 +765,58 @@ static void blk_add_trace_rq_complete(void *ignore, | |||
758 | * @q: queue the io is for | 765 | * @q: queue the io is for |
759 | * @bio: the source bio | 766 | * @bio: the source bio |
760 | * @what: the action | 767 | * @what: the action |
768 | * @error: error, if any | ||
761 | * | 769 | * |
762 | * Description: | 770 | * Description: |
763 | * Records an action against a bio. Will log the bio offset + size. | 771 | * Records an action against a bio. Will log the bio offset + size. |
764 | * | 772 | * |
765 | **/ | 773 | **/ |
766 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | 774 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, |
767 | u32 what) | 775 | u32 what, int error) |
768 | { | 776 | { |
769 | struct blk_trace *bt = q->blk_trace; | 777 | struct blk_trace *bt = q->blk_trace; |
770 | 778 | ||
771 | if (likely(!bt)) | 779 | if (likely(!bt)) |
772 | return; | 780 | return; |
773 | 781 | ||
782 | if (!error && !bio_flagged(bio, BIO_UPTODATE)) | ||
783 | error = EIO; | ||
784 | |||
774 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, | 785 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, |
775 | !bio_flagged(bio, BIO_UPTODATE), 0, NULL); | 786 | error, 0, NULL); |
776 | } | 787 | } |
777 | 788 | ||
778 | static void blk_add_trace_bio_bounce(void *ignore, | 789 | static void blk_add_trace_bio_bounce(void *ignore, |
779 | struct request_queue *q, struct bio *bio) | 790 | struct request_queue *q, struct bio *bio) |
780 | { | 791 | { |
781 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); | 792 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); |
782 | } | 793 | } |
783 | 794 | ||
784 | static void blk_add_trace_bio_complete(void *ignore, | 795 | static void blk_add_trace_bio_complete(void *ignore, |
785 | struct request_queue *q, struct bio *bio) | 796 | struct request_queue *q, struct bio *bio, |
797 | int error) | ||
786 | { | 798 | { |
787 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); | 799 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
788 | } | 800 | } |
789 | 801 | ||
790 | static void blk_add_trace_bio_backmerge(void *ignore, | 802 | static void blk_add_trace_bio_backmerge(void *ignore, |
791 | struct request_queue *q, | 803 | struct request_queue *q, |
792 | struct bio *bio) | 804 | struct bio *bio) |
793 | { | 805 | { |
794 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | 806 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); |
795 | } | 807 | } |
796 | 808 | ||
797 | static void blk_add_trace_bio_frontmerge(void *ignore, | 809 | static void blk_add_trace_bio_frontmerge(void *ignore, |
798 | struct request_queue *q, | 810 | struct request_queue *q, |
799 | struct bio *bio) | 811 | struct bio *bio) |
800 | { | 812 | { |
801 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | 813 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); |
802 | } | 814 | } |
803 | 815 | ||
804 | static void blk_add_trace_bio_queue(void *ignore, | 816 | static void blk_add_trace_bio_queue(void *ignore, |
805 | struct request_queue *q, struct bio *bio) | 817 | struct request_queue *q, struct bio *bio) |
806 | { | 818 | { |
807 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 819 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); |
808 | } | 820 | } |
809 | 821 | ||
810 | static void blk_add_trace_getrq(void *ignore, | 822 | static void blk_add_trace_getrq(void *ignore, |
@@ -812,7 +824,7 @@ static void blk_add_trace_getrq(void *ignore, | |||
812 | struct bio *bio, int rw) | 824 | struct bio *bio, int rw) |
813 | { | 825 | { |
814 | if (bio) | 826 | if (bio) |
815 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ); | 827 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); |
816 | else { | 828 | else { |
817 | struct blk_trace *bt = q->blk_trace; | 829 | struct blk_trace *bt = q->blk_trace; |
818 | 830 | ||
@@ -827,7 +839,7 @@ static void blk_add_trace_sleeprq(void *ignore, | |||
827 | struct bio *bio, int rw) | 839 | struct bio *bio, int rw) |
828 | { | 840 | { |
829 | if (bio) | 841 | if (bio) |
830 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); | 842 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); |
831 | else { | 843 | else { |
832 | struct blk_trace *bt = q->blk_trace; | 844 | struct blk_trace *bt = q->blk_trace; |
833 | 845 | ||
@@ -887,7 +899,7 @@ static void blk_add_trace_split(void *ignore, | |||
887 | } | 899 | } |
888 | 900 | ||
889 | /** | 901 | /** |
890 | * blk_add_trace_remap - Add a trace for a remap operation | 902 | * blk_add_trace_bio_remap - Add a trace for a bio-remap operation |
891 | * @ignore: trace callback data parameter (not used) | 903 | * @ignore: trace callback data parameter (not used) |
892 | * @q: queue the io is for | 904 | * @q: queue the io is for |
893 | * @bio: the source bio | 905 | * @bio: the source bio |
@@ -899,9 +911,9 @@ static void blk_add_trace_split(void *ignore, | |||
899 | * it spans a stripe (or similar). Add a trace for that action. | 911 | * it spans a stripe (or similar). Add a trace for that action. |
900 | * | 912 | * |
901 | **/ | 913 | **/ |
902 | static void blk_add_trace_remap(void *ignore, | 914 | static void blk_add_trace_bio_remap(void *ignore, |
903 | struct request_queue *q, struct bio *bio, | 915 | struct request_queue *q, struct bio *bio, |
904 | dev_t dev, sector_t from) | 916 | dev_t dev, sector_t from) |
905 | { | 917 | { |
906 | struct blk_trace *bt = q->blk_trace; | 918 | struct blk_trace *bt = q->blk_trace; |
907 | struct blk_io_trace_remap r; | 919 | struct blk_io_trace_remap r; |
@@ -1016,7 +1028,7 @@ static void blk_register_tracepoints(void) | |||
1016 | WARN_ON(ret); | 1028 | WARN_ON(ret); |
1017 | ret = register_trace_block_split(blk_add_trace_split, NULL); | 1029 | ret = register_trace_block_split(blk_add_trace_split, NULL); |
1018 | WARN_ON(ret); | 1030 | WARN_ON(ret); |
1019 | ret = register_trace_block_remap(blk_add_trace_remap, NULL); | 1031 | ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1020 | WARN_ON(ret); | 1032 | WARN_ON(ret); |
1021 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1033 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1022 | WARN_ON(ret); | 1034 | WARN_ON(ret); |
@@ -1025,7 +1037,7 @@ static void blk_register_tracepoints(void) | |||
1025 | static void blk_unregister_tracepoints(void) | 1037 | static void blk_unregister_tracepoints(void) |
1026 | { | 1038 | { |
1027 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1039 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1028 | unregister_trace_block_remap(blk_add_trace_remap, NULL); | 1040 | unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1029 | unregister_trace_block_split(blk_add_trace_split, NULL); | 1041 | unregister_trace_block_split(blk_add_trace_split, NULL); |
1030 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); | 1042 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); |
1031 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); | 1043 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); |
@@ -1815,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1815 | rwbs[i] = '\0'; | 1827 | rwbs[i] = '\0'; |
1816 | } | 1828 | } |
1817 | 1829 | ||
1818 | void blk_fill_rwbs_rq(char *rwbs, struct request *rq) | ||
1819 | { | ||
1820 | int rw = rq->cmd_flags & 0x03; | ||
1821 | int bytes; | ||
1822 | |||
1823 | if (rq->cmd_flags & REQ_DISCARD) | ||
1824 | rw |= REQ_DISCARD; | ||
1825 | |||
1826 | if (rq->cmd_flags & REQ_SECURE) | ||
1827 | rw |= REQ_SECURE; | ||
1828 | |||
1829 | bytes = blk_rq_bytes(rq); | ||
1830 | |||
1831 | blk_fill_rwbs(rwbs, rw, bytes); | ||
1832 | } | ||
1833 | |||
1834 | #endif /* CONFIG_EVENT_TRACING */ | 1830 | #endif /* CONFIG_EVENT_TRACING */ |
1835 | 1831 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8cf959bad45..dc53ecb80589 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1313 | 1313 | ||
1314 | __this_cpu_inc(user_stack_count); | 1314 | __this_cpu_inc(user_stack_count); |
1315 | 1315 | ||
1316 | |||
1317 | |||
1318 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1316 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
1319 | sizeof(*entry), flags, pc); | 1317 | sizeof(*entry), flags, pc); |
1320 | if (!event) | 1318 | if (!event) |
1321 | return; | 1319 | goto out_drop_count; |
1322 | entry = ring_buffer_event_data(event); | 1320 | entry = ring_buffer_event_data(event); |
1323 | 1321 | ||
1324 | entry->tgid = current->tgid; | 1322 | entry->tgid = current->tgid; |
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1333 | if (!filter_check_discard(call, entry, buffer, event)) | 1331 | if (!filter_check_discard(call, entry, buffer, event)) |
1334 | ring_buffer_unlock_commit(buffer, event); | 1332 | ring_buffer_unlock_commit(buffer, event); |
1335 | 1333 | ||
1334 | out_drop_count: | ||
1336 | __this_cpu_dec(user_stack_count); | 1335 | __this_cpu_dec(user_stack_count); |
1337 | |||
1338 | out: | 1336 | out: |
1339 | preempt_enable(); | 1337 | preempt_enable(); |
1340 | } | 1338 | } |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e3dfecaf13e6..6cf223764be8 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -53,7 +53,7 @@ | |||
53 | */ | 53 | */ |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * Function trace entry - function address and parent function addres: | 56 | * Function trace entry - function address and parent function address: |
57 | */ | 57 | */ |
58 | FTRACE_ENTRY(function, ftrace_entry, | 58 | FTRACE_ENTRY(function, ftrace_entry, |
59 | 59 | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35fde09b81de..5f499e0438a4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod) | |||
1284 | static void trace_module_add_events(struct module *mod) | 1284 | static void trace_module_add_events(struct module *mod) |
1285 | { | 1285 | { |
1286 | struct ftrace_module_file_ops *file_ops = NULL; | 1286 | struct ftrace_module_file_ops *file_ops = NULL; |
1287 | struct ftrace_event_call *call, *start, *end; | 1287 | struct ftrace_event_call **call, **start, **end; |
1288 | 1288 | ||
1289 | start = mod->trace_events; | 1289 | start = mod->trace_events; |
1290 | end = mod->trace_events + mod->num_trace_events; | 1290 | end = mod->trace_events + mod->num_trace_events; |
@@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod) | |||
1297 | return; | 1297 | return; |
1298 | 1298 | ||
1299 | for_each_event(call, start, end) { | 1299 | for_each_event(call, start, end) { |
1300 | __trace_add_event_call(call, mod, | 1300 | __trace_add_event_call(*call, mod, |
1301 | &file_ops->id, &file_ops->enable, | 1301 | &file_ops->id, &file_ops->enable, |
1302 | &file_ops->filter, &file_ops->format); | 1302 | &file_ops->filter, &file_ops->format); |
1303 | } | 1303 | } |
@@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = { | |||
1367 | .priority = 0, | 1367 | .priority = 0, |
1368 | }; | 1368 | }; |
1369 | 1369 | ||
1370 | extern struct ftrace_event_call __start_ftrace_events[]; | 1370 | extern struct ftrace_event_call *__start_ftrace_events[]; |
1371 | extern struct ftrace_event_call __stop_ftrace_events[]; | 1371 | extern struct ftrace_event_call *__stop_ftrace_events[]; |
1372 | 1372 | ||
1373 | static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; | 1373 | static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; |
1374 | 1374 | ||
@@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event); | |||
1384 | 1384 | ||
1385 | static __init int event_trace_init(void) | 1385 | static __init int event_trace_init(void) |
1386 | { | 1386 | { |
1387 | struct ftrace_event_call *call; | 1387 | struct ftrace_event_call **call; |
1388 | struct dentry *d_tracer; | 1388 | struct dentry *d_tracer; |
1389 | struct dentry *entry; | 1389 | struct dentry *entry; |
1390 | struct dentry *d_events; | 1390 | struct dentry *d_events; |
@@ -1430,7 +1430,7 @@ static __init int event_trace_init(void) | |||
1430 | pr_warning("tracing: Failed to allocate common fields"); | 1430 | pr_warning("tracing: Failed to allocate common fields"); |
1431 | 1431 | ||
1432 | for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { | 1432 | for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { |
1433 | __trace_add_event_call(call, NULL, &ftrace_event_id_fops, | 1433 | __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, |
1434 | &ftrace_enable_fops, | 1434 | &ftrace_enable_fops, |
1435 | &ftrace_event_filter_fops, | 1435 | &ftrace_event_filter_fops, |
1436 | &ftrace_event_format_fops); | 1436 | &ftrace_event_format_fops); |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4b74d71705c0..bbeec31e0ae3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \ | |||
161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ | 161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ |
162 | }; \ | 162 | }; \ |
163 | \ | 163 | \ |
164 | struct ftrace_event_call __used \ | 164 | struct ftrace_event_call __used event_##call = { \ |
165 | __attribute__((__aligned__(4))) \ | ||
166 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
167 | .name = #call, \ | 165 | .name = #call, \ |
168 | .event.type = etype, \ | 166 | .event.type = etype, \ |
169 | .class = &event_class_ftrace_##call, \ | 167 | .class = &event_class_ftrace_##call, \ |
170 | .print_fmt = print, \ | 168 | .print_fmt = print, \ |
171 | }; \ | 169 | }; \ |
170 | struct ftrace_event_call __used \ | ||
171 | __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; | ||
172 | 172 | ||
173 | #include "trace_entries.h" | 173 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5cf8c602b880..92b6e1e12d98 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |||
453 | * Stubs: | 453 | * Stubs: |
454 | */ | 454 | */ |
455 | 455 | ||
456 | void early_boot_irqs_off(void) | ||
457 | { | ||
458 | } | ||
459 | |||
460 | void early_boot_irqs_on(void) | ||
461 | { | ||
462 | } | ||
463 | |||
464 | void trace_softirqs_on(unsigned long ip) | 456 | void trace_softirqs_on(unsigned long ip) |
465 | { | 457 | { |
466 | } | 458 | } |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index bac752f0cfb5..5c9fe08d2093 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event, | |||
23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); | 23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); |
24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); | 24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); |
25 | 25 | ||
26 | /* All syscall exit events have the same fields */ | ||
27 | static LIST_HEAD(syscall_exit_fields); | ||
28 | |||
29 | static struct list_head * | 26 | static struct list_head * |
30 | syscall_get_enter_fields(struct ftrace_event_call *call) | 27 | syscall_get_enter_fields(struct ftrace_event_call *call) |
31 | { | 28 | { |
@@ -34,50 +31,45 @@ syscall_get_enter_fields(struct ftrace_event_call *call) | |||
34 | return &entry->enter_fields; | 31 | return &entry->enter_fields; |
35 | } | 32 | } |
36 | 33 | ||
37 | static struct list_head * | ||
38 | syscall_get_exit_fields(struct ftrace_event_call *call) | ||
39 | { | ||
40 | return &syscall_exit_fields; | ||
41 | } | ||
42 | |||
43 | struct trace_event_functions enter_syscall_print_funcs = { | 34 | struct trace_event_functions enter_syscall_print_funcs = { |
44 | .trace = print_syscall_enter, | 35 | .trace = print_syscall_enter, |
45 | }; | 36 | }; |
46 | 37 | ||
47 | struct trace_event_functions exit_syscall_print_funcs = { | 38 | struct trace_event_functions exit_syscall_print_funcs = { |
48 | .trace = print_syscall_exit, | 39 | .trace = print_syscall_exit, |
49 | }; | 40 | }; |
50 | 41 | ||
51 | struct ftrace_event_class event_class_syscall_enter = { | 42 | struct ftrace_event_class event_class_syscall_enter = { |
52 | .system = "syscalls", | 43 | .system = "syscalls", |
53 | .reg = syscall_enter_register, | 44 | .reg = syscall_enter_register, |
54 | .define_fields = syscall_enter_define_fields, | 45 | .define_fields = syscall_enter_define_fields, |
55 | .get_fields = syscall_get_enter_fields, | 46 | .get_fields = syscall_get_enter_fields, |
56 | .raw_init = init_syscall_trace, | 47 | .raw_init = init_syscall_trace, |
57 | }; | 48 | }; |
58 | 49 | ||
59 | struct ftrace_event_class event_class_syscall_exit = { | 50 | struct ftrace_event_class event_class_syscall_exit = { |
60 | .system = "syscalls", | 51 | .system = "syscalls", |
61 | .reg = syscall_exit_register, | 52 | .reg = syscall_exit_register, |
62 | .define_fields = syscall_exit_define_fields, | 53 | .define_fields = syscall_exit_define_fields, |
63 | .get_fields = syscall_get_exit_fields, | 54 | .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), |
64 | .raw_init = init_syscall_trace, | 55 | .raw_init = init_syscall_trace, |
65 | }; | 56 | }; |
66 | 57 | ||
67 | extern unsigned long __start_syscalls_metadata[]; | 58 | extern struct syscall_metadata *__start_syscalls_metadata[]; |
68 | extern unsigned long __stop_syscalls_metadata[]; | 59 | extern struct syscall_metadata *__stop_syscalls_metadata[]; |
69 | 60 | ||
70 | static struct syscall_metadata **syscalls_metadata; | 61 | static struct syscall_metadata **syscalls_metadata; |
71 | 62 | ||
72 | static struct syscall_metadata *find_syscall_meta(unsigned long syscall) | 63 | static __init struct syscall_metadata * |
64 | find_syscall_meta(unsigned long syscall) | ||
73 | { | 65 | { |
74 | struct syscall_metadata *start; | 66 | struct syscall_metadata **start; |
75 | struct syscall_metadata *stop; | 67 | struct syscall_metadata **stop; |
76 | char str[KSYM_SYMBOL_LEN]; | 68 | char str[KSYM_SYMBOL_LEN]; |
77 | 69 | ||
78 | 70 | ||
79 | start = (struct syscall_metadata *)__start_syscalls_metadata; | 71 | start = __start_syscalls_metadata; |
80 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | 72 | stop = __stop_syscalls_metadata; |
81 | kallsyms_lookup(syscall, NULL, NULL, NULL, str); | 73 | kallsyms_lookup(syscall, NULL, NULL, NULL, str); |
82 | 74 | ||
83 | for ( ; start < stop; start++) { | 75 | for ( ; start < stop; start++) { |
@@ -87,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) | |||
87 | * with "SyS" instead of "sys", leading to an unwanted | 79 | * with "SyS" instead of "sys", leading to an unwanted |
88 | * mismatch. | 80 | * mismatch. |
89 | */ | 81 | */ |
90 | if (start->name && !strcmp(start->name + 3, str + 3)) | 82 | if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) |
91 | return start; | 83 | return *start; |
92 | } | 84 | } |
93 | return NULL; | 85 | return NULL; |
94 | } | 86 | } |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e95ee7f31d43..68187af4889e 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -27,8 +27,8 @@ | |||
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/jump_label.h> | 28 | #include <linux/jump_label.h> |
29 | 29 | ||
30 | extern struct tracepoint __start___tracepoints[]; | 30 | extern struct tracepoint * const __start___tracepoints_ptrs[]; |
31 | extern struct tracepoint __stop___tracepoints[]; | 31 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; |
32 | 32 | ||
33 | /* Set to 1 to enable tracepoint debug output */ | 33 | /* Set to 1 to enable tracepoint debug output */ |
34 | static const int tracepoint_debug; | 34 | static const int tracepoint_debug; |
@@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem) | |||
298 | * | 298 | * |
299 | * Updates the probe callback corresponding to a range of tracepoints. | 299 | * Updates the probe callback corresponding to a range of tracepoints. |
300 | */ | 300 | */ |
301 | void | 301 | void tracepoint_update_probe_range(struct tracepoint * const *begin, |
302 | tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) | 302 | struct tracepoint * const *end) |
303 | { | 303 | { |
304 | struct tracepoint *iter; | 304 | struct tracepoint * const *iter; |
305 | struct tracepoint_entry *mark_entry; | 305 | struct tracepoint_entry *mark_entry; |
306 | 306 | ||
307 | if (!begin) | 307 | if (!begin) |
@@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) | |||
309 | 309 | ||
310 | mutex_lock(&tracepoints_mutex); | 310 | mutex_lock(&tracepoints_mutex); |
311 | for (iter = begin; iter < end; iter++) { | 311 | for (iter = begin; iter < end; iter++) { |
312 | mark_entry = get_tracepoint(iter->name); | 312 | mark_entry = get_tracepoint((*iter)->name); |
313 | if (mark_entry) { | 313 | if (mark_entry) { |
314 | set_tracepoint(&mark_entry, iter, | 314 | set_tracepoint(&mark_entry, *iter, |
315 | !!mark_entry->refcount); | 315 | !!mark_entry->refcount); |
316 | } else { | 316 | } else { |
317 | disable_tracepoint(iter); | 317 | disable_tracepoint(*iter); |
318 | } | 318 | } |
319 | } | 319 | } |
320 | mutex_unlock(&tracepoints_mutex); | 320 | mutex_unlock(&tracepoints_mutex); |
@@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) | |||
326 | static void tracepoint_update_probes(void) | 326 | static void tracepoint_update_probes(void) |
327 | { | 327 | { |
328 | /* Core kernel tracepoints */ | 328 | /* Core kernel tracepoints */ |
329 | tracepoint_update_probe_range(__start___tracepoints, | 329 | tracepoint_update_probe_range(__start___tracepoints_ptrs, |
330 | __stop___tracepoints); | 330 | __stop___tracepoints_ptrs); |
331 | /* tracepoints in modules. */ | 331 | /* tracepoints in modules. */ |
332 | module_update_tracepoints(); | 332 | module_update_tracepoints(); |
333 | } | 333 | } |
@@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); | |||
514 | * Will return the first tracepoint in the range if the input tracepoint is | 514 | * Will return the first tracepoint in the range if the input tracepoint is |
515 | * NULL. | 515 | * NULL. |
516 | */ | 516 | */ |
517 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | 517 | int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, |
518 | struct tracepoint *begin, struct tracepoint *end) | 518 | struct tracepoint * const *begin, struct tracepoint * const *end) |
519 | { | 519 | { |
520 | if (!*tracepoint && begin != end) { | 520 | if (!*tracepoint && begin != end) { |
521 | *tracepoint = begin; | 521 | *tracepoint = begin; |
@@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter) | |||
534 | /* Core kernel tracepoints */ | 534 | /* Core kernel tracepoints */ |
535 | if (!iter->module) { | 535 | if (!iter->module) { |
536 | found = tracepoint_get_iter_range(&iter->tracepoint, | 536 | found = tracepoint_get_iter_range(&iter->tracepoint, |
537 | __start___tracepoints, __stop___tracepoints); | 537 | __start___tracepoints_ptrs, |
538 | __stop___tracepoints_ptrs); | ||
538 | if (found) | 539 | if (found) |
539 | goto end; | 540 | goto end; |
540 | } | 541 | } |
@@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self, | |||
585 | switch (val) { | 586 | switch (val) { |
586 | case MODULE_STATE_COMING: | 587 | case MODULE_STATE_COMING: |
587 | case MODULE_STATE_GOING: | 588 | case MODULE_STATE_GOING: |
588 | tracepoint_update_probe_range(mod->tracepoints, | 589 | tracepoint_update_probe_range(mod->tracepoints_ptrs, |
589 | mod->tracepoints + mod->num_tracepoints); | 590 | mod->tracepoints_ptrs + mod->num_tracepoints); |
590 | break; | 591 | break; |
591 | } | 592 | } |
592 | return 0; | 593 | return 0; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 25915832291a..9da289c34f22 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <linux/highuid.h> | 12 | #include <linux/highuid.h> |
13 | #include <linux/cred.h> | 13 | #include <linux/cred.h> |
14 | 14 | ||
15 | static struct kmem_cache *user_ns_cachep __read_mostly; | ||
16 | |||
15 | /* | 17 | /* |
16 | * Create a new user namespace, deriving the creator from the user in the | 18 | * Create a new user namespace, deriving the creator from the user in the |
17 | * passed credentials, and replacing that user with the new root user for the | 19 | * passed credentials, and replacing that user with the new root user for the |
@@ -26,7 +28,7 @@ int create_user_ns(struct cred *new) | |||
26 | struct user_struct *root_user; | 28 | struct user_struct *root_user; |
27 | int n; | 29 | int n; |
28 | 30 | ||
29 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); | 31 | ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL); |
30 | if (!ns) | 32 | if (!ns) |
31 | return -ENOMEM; | 33 | return -ENOMEM; |
32 | 34 | ||
@@ -38,7 +40,7 @@ int create_user_ns(struct cred *new) | |||
38 | /* Alloc new root user. */ | 40 | /* Alloc new root user. */ |
39 | root_user = alloc_uid(ns, 0); | 41 | root_user = alloc_uid(ns, 0); |
40 | if (!root_user) { | 42 | if (!root_user) { |
41 | kfree(ns); | 43 | kmem_cache_free(user_ns_cachep, ns); |
42 | return -ENOMEM; | 44 | return -ENOMEM; |
43 | } | 45 | } |
44 | 46 | ||
@@ -71,7 +73,7 @@ static void free_user_ns_work(struct work_struct *work) | |||
71 | struct user_namespace *ns = | 73 | struct user_namespace *ns = |
72 | container_of(work, struct user_namespace, destroyer); | 74 | container_of(work, struct user_namespace, destroyer); |
73 | free_uid(ns->creator); | 75 | free_uid(ns->creator); |
74 | kfree(ns); | 76 | kmem_cache_free(user_ns_cachep, ns); |
75 | } | 77 | } |
76 | 78 | ||
77 | void free_user_ns(struct kref *kref) | 79 | void free_user_ns(struct kref *kref) |
@@ -126,3 +128,10 @@ gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t | |||
126 | /* No useful relationship so no mapping */ | 128 | /* No useful relationship so no mapping */ |
127 | return overflowgid; | 129 | return overflowgid; |
128 | } | 130 | } |
131 | |||
132 | static __init int user_namespaces_init(void) | ||
133 | { | ||
134 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); | ||
135 | return 0; | ||
136 | } | ||
137 | module_init(user_namespaces_init); | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7ebdf4cea98..18bb15776c57 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
28 | #include <linux/perf_event.h> | 28 | #include <linux/perf_event.h> |
29 | 29 | ||
30 | int watchdog_enabled; | 30 | int watchdog_enabled = 1; |
31 | int __read_mostly softlockup_thresh = 60; | 31 | int __read_mostly softlockup_thresh = 60; |
32 | 32 | ||
33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int no_watchdog; | ||
47 | |||
48 | |||
49 | /* boot commands */ | 46 | /* boot commands */ |
50 | /* | 47 | /* |
51 | * Should we panic when a soft-lockup or hard-lockup occurs: | 48 | * Should we panic when a soft-lockup or hard-lockup occurs: |
@@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str) | |||
58 | if (!strncmp(str, "panic", 5)) | 55 | if (!strncmp(str, "panic", 5)) |
59 | hardlockup_panic = 1; | 56 | hardlockup_panic = 1; |
60 | else if (!strncmp(str, "0", 1)) | 57 | else if (!strncmp(str, "0", 1)) |
61 | no_watchdog = 1; | 58 | watchdog_enabled = 0; |
62 | return 1; | 59 | return 1; |
63 | } | 60 | } |
64 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 61 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
@@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
77 | 74 | ||
78 | static int __init nowatchdog_setup(char *str) | 75 | static int __init nowatchdog_setup(char *str) |
79 | { | 76 | { |
80 | no_watchdog = 1; | 77 | watchdog_enabled = 0; |
81 | return 1; | 78 | return 1; |
82 | } | 79 | } |
83 | __setup("nowatchdog", nowatchdog_setup); | 80 | __setup("nowatchdog", nowatchdog_setup); |
@@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup); | |||
85 | /* deprecated */ | 82 | /* deprecated */ |
86 | static int __init nosoftlockup_setup(char *str) | 83 | static int __init nosoftlockup_setup(char *str) |
87 | { | 84 | { |
88 | no_watchdog = 1; | 85 | watchdog_enabled = 0; |
89 | return 1; | 86 | return 1; |
90 | } | 87 | } |
91 | __setup("nosoftlockup", nosoftlockup_setup); | 88 | __setup("nosoftlockup", nosoftlockup_setup); |
@@ -366,8 +363,14 @@ static int watchdog_nmi_enable(int cpu) | |||
366 | goto out_save; | 363 | goto out_save; |
367 | } | 364 | } |
368 | 365 | ||
369 | printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", | 366 | |
370 | cpu, PTR_ERR(event)); | 367 | /* vary the KERN level based on the returned errno */ |
368 | if (PTR_ERR(event) == -EOPNOTSUPP) | ||
369 | printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); | ||
370 | else if (PTR_ERR(event) == -ENOENT) | ||
371 | printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); | ||
372 | else | ||
373 | printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); | ||
371 | return PTR_ERR(event); | 374 | return PTR_ERR(event); |
372 | 375 | ||
373 | /* success path */ | 376 | /* success path */ |
@@ -432,9 +435,6 @@ static int watchdog_enable(int cpu) | |||
432 | wake_up_process(p); | 435 | wake_up_process(p); |
433 | } | 436 | } |
434 | 437 | ||
435 | /* if any cpu succeeds, watchdog is considered enabled for the system */ | ||
436 | watchdog_enabled = 1; | ||
437 | |||
438 | return 0; | 438 | return 0; |
439 | } | 439 | } |
440 | 440 | ||
@@ -462,12 +462,16 @@ static void watchdog_disable(int cpu) | |||
462 | static void watchdog_enable_all_cpus(void) | 462 | static void watchdog_enable_all_cpus(void) |
463 | { | 463 | { |
464 | int cpu; | 464 | int cpu; |
465 | int result = 0; | 465 | |
466 | watchdog_enabled = 0; | ||
466 | 467 | ||
467 | for_each_online_cpu(cpu) | 468 | for_each_online_cpu(cpu) |
468 | result += watchdog_enable(cpu); | 469 | if (!watchdog_enable(cpu)) |
470 | /* if any cpu succeeds, watchdog is considered | ||
471 | enabled for the system */ | ||
472 | watchdog_enabled = 1; | ||
469 | 473 | ||
470 | if (result) | 474 | if (!watchdog_enabled) |
471 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); | 475 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); |
472 | 476 | ||
473 | } | 477 | } |
@@ -476,9 +480,6 @@ static void watchdog_disable_all_cpus(void) | |||
476 | { | 480 | { |
477 | int cpu; | 481 | int cpu; |
478 | 482 | ||
479 | if (no_watchdog) | ||
480 | return; | ||
481 | |||
482 | for_each_online_cpu(cpu) | 483 | for_each_online_cpu(cpu) |
483 | watchdog_disable(cpu); | 484 | watchdog_disable(cpu); |
484 | 485 | ||
@@ -498,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, | |||
498 | { | 499 | { |
499 | proc_dointvec(table, write, buffer, length, ppos); | 500 | proc_dointvec(table, write, buffer, length, ppos); |
500 | 501 | ||
501 | if (watchdog_enabled) | 502 | if (write) { |
502 | watchdog_enable_all_cpus(); | 503 | if (watchdog_enabled) |
503 | else | 504 | watchdog_enable_all_cpus(); |
504 | watchdog_disable_all_cpus(); | 505 | else |
506 | watchdog_disable_all_cpus(); | ||
507 | } | ||
505 | return 0; | 508 | return 0; |
506 | } | 509 | } |
507 | 510 | ||
@@ -530,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
530 | break; | 533 | break; |
531 | case CPU_ONLINE: | 534 | case CPU_ONLINE: |
532 | case CPU_ONLINE_FROZEN: | 535 | case CPU_ONLINE_FROZEN: |
533 | err = watchdog_enable(hotcpu); | 536 | if (watchdog_enabled) |
537 | err = watchdog_enable(hotcpu); | ||
534 | break; | 538 | break; |
535 | #ifdef CONFIG_HOTPLUG_CPU | 539 | #ifdef CONFIG_HOTPLUG_CPU |
536 | case CPU_UP_CANCELED: | 540 | case CPU_UP_CANCELED: |
@@ -555,9 +559,6 @@ void __init lockup_detector_init(void) | |||
555 | void *cpu = (void *)(long)smp_processor_id(); | 559 | void *cpu = (void *)(long)smp_processor_id(); |
556 | int err; | 560 | int err; |
557 | 561 | ||
558 | if (no_watchdog) | ||
559 | return; | ||
560 | |||
561 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 562 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
562 | WARN_ON(notifier_to_errno(err)); | 563 | WARN_ON(notifier_to_errno(err)); |
563 | 564 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8ee6ec82f88a..ee6578b578ad 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -79,7 +79,9 @@ enum { | |||
79 | MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ | 79 | MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ |
80 | IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ | 80 | IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ |
81 | 81 | ||
82 | MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ | 82 | MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, |
83 | /* call for help after 10ms | ||
84 | (min two ticks) */ | ||
83 | MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ | 85 | MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ |
84 | CREATE_COOLDOWN = HZ, /* time to breath after fail */ | 86 | CREATE_COOLDOWN = HZ, /* time to breath after fail */ |
85 | TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ | 87 | TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ |
@@ -768,7 +770,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) | |||
768 | 770 | ||
769 | worker->flags &= ~flags; | 771 | worker->flags &= ~flags; |
770 | 772 | ||
771 | /* if transitioning out of NOT_RUNNING, increment nr_running */ | 773 | /* |
774 | * If transitioning out of NOT_RUNNING, increment nr_running. Note | ||
775 | * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask | ||
776 | * of multiple flags, not a single flag. | ||
777 | */ | ||
772 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) | 778 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) |
773 | if (!(worker->flags & WORKER_NOT_RUNNING)) | 779 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
774 | atomic_inc(get_gcwq_nr_running(gcwq->cpu)); | 780 | atomic_inc(get_gcwq_nr_running(gcwq->cpu)); |
@@ -1840,7 +1846,7 @@ __acquires(&gcwq->lock) | |||
1840 | spin_unlock_irq(&gcwq->lock); | 1846 | spin_unlock_irq(&gcwq->lock); |
1841 | 1847 | ||
1842 | work_clear_pending(work); | 1848 | work_clear_pending(work); |
1843 | lock_map_acquire(&cwq->wq->lockdep_map); | 1849 | lock_map_acquire_read(&cwq->wq->lockdep_map); |
1844 | lock_map_acquire(&lockdep_map); | 1850 | lock_map_acquire(&lockdep_map); |
1845 | trace_workqueue_execute_start(work); | 1851 | trace_workqueue_execute_start(work); |
1846 | f(work); | 1852 | f(work); |
@@ -2043,6 +2049,15 @@ repeat: | |||
2043 | move_linked_works(work, scheduled, &n); | 2049 | move_linked_works(work, scheduled, &n); |
2044 | 2050 | ||
2045 | process_scheduled_works(rescuer); | 2051 | process_scheduled_works(rescuer); |
2052 | |||
2053 | /* | ||
2054 | * Leave this gcwq. If keep_working() is %true, notify a | ||
2055 | * regular worker; otherwise, we end up with 0 concurrency | ||
2056 | * and stalling the execution. | ||
2057 | */ | ||
2058 | if (keep_working(gcwq)) | ||
2059 | wake_up_worker(gcwq); | ||
2060 | |||
2046 | spin_unlock_irq(&gcwq->lock); | 2061 | spin_unlock_irq(&gcwq->lock); |
2047 | } | 2062 | } |
2048 | 2063 | ||
@@ -2384,8 +2399,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, | |||
2384 | insert_wq_barrier(cwq, barr, work, worker); | 2399 | insert_wq_barrier(cwq, barr, work, worker); |
2385 | spin_unlock_irq(&gcwq->lock); | 2400 | spin_unlock_irq(&gcwq->lock); |
2386 | 2401 | ||
2387 | lock_map_acquire(&cwq->wq->lockdep_map); | 2402 | /* |
2403 | * If @max_active is 1 or rescuer is in use, flushing another work | ||
2404 | * item on the same workqueue may lead to deadlock. Make sure the | ||
2405 | * flusher is not running on the same workqueue by verifying write | ||
2406 | * access. | ||
2407 | */ | ||
2408 | if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) | ||
2409 | lock_map_acquire(&cwq->wq->lockdep_map); | ||
2410 | else | ||
2411 | lock_map_acquire_read(&cwq->wq->lockdep_map); | ||
2388 | lock_map_release(&cwq->wq->lockdep_map); | 2412 | lock_map_release(&cwq->wq->lockdep_map); |
2413 | |||
2389 | return true; | 2414 | return true; |
2390 | already_gone: | 2415 | already_gone: |
2391 | spin_unlock_irq(&gcwq->lock); | 2416 | spin_unlock_irq(&gcwq->lock); |
@@ -2942,7 +2967,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, | |||
2942 | */ | 2967 | */ |
2943 | spin_lock(&workqueue_lock); | 2968 | spin_lock(&workqueue_lock); |
2944 | 2969 | ||
2945 | if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) | 2970 | if (workqueue_freezing && wq->flags & WQ_FREEZABLE) |
2946 | for_each_cwq_cpu(cpu, wq) | 2971 | for_each_cwq_cpu(cpu, wq) |
2947 | get_cwq(cpu, wq)->max_active = 0; | 2972 | get_cwq(cpu, wq)->max_active = 0; |
2948 | 2973 | ||
@@ -3054,7 +3079,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) | |||
3054 | 3079 | ||
3055 | spin_lock_irq(&gcwq->lock); | 3080 | spin_lock_irq(&gcwq->lock); |
3056 | 3081 | ||
3057 | if (!(wq->flags & WQ_FREEZEABLE) || | 3082 | if (!(wq->flags & WQ_FREEZABLE) || |
3058 | !(gcwq->flags & GCWQ_FREEZING)) | 3083 | !(gcwq->flags & GCWQ_FREEZING)) |
3059 | get_cwq(gcwq->cpu, wq)->max_active = max_active; | 3084 | get_cwq(gcwq->cpu, wq)->max_active = max_active; |
3060 | 3085 | ||
@@ -3304,7 +3329,7 @@ static int __cpuinit trustee_thread(void *__gcwq) | |||
3304 | * want to get it over with ASAP - spam rescuers, wake up as | 3329 | * want to get it over with ASAP - spam rescuers, wake up as |
3305 | * many idlers as necessary and create new ones till the | 3330 | * many idlers as necessary and create new ones till the |
3306 | * worklist is empty. Note that if the gcwq is frozen, there | 3331 | * worklist is empty. Note that if the gcwq is frozen, there |
3307 | * may be frozen works in freezeable cwqs. Don't declare | 3332 | * may be frozen works in freezable cwqs. Don't declare |
3308 | * completion while frozen. | 3333 | * completion while frozen. |
3309 | */ | 3334 | */ |
3310 | while (gcwq->nr_workers != gcwq->nr_idle || | 3335 | while (gcwq->nr_workers != gcwq->nr_idle || |
@@ -3562,9 +3587,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu); | |||
3562 | /** | 3587 | /** |
3563 | * freeze_workqueues_begin - begin freezing workqueues | 3588 | * freeze_workqueues_begin - begin freezing workqueues |
3564 | * | 3589 | * |
3565 | * Start freezing workqueues. After this function returns, all | 3590 | * Start freezing workqueues. After this function returns, all freezable |
3566 | * freezeable workqueues will queue new works to their frozen_works | 3591 | * workqueues will queue new works to their frozen_works list instead of |
3567 | * list instead of gcwq->worklist. | 3592 | * gcwq->worklist. |
3568 | * | 3593 | * |
3569 | * CONTEXT: | 3594 | * CONTEXT: |
3570 | * Grabs and releases workqueue_lock and gcwq->lock's. | 3595 | * Grabs and releases workqueue_lock and gcwq->lock's. |
@@ -3590,7 +3615,7 @@ void freeze_workqueues_begin(void) | |||
3590 | list_for_each_entry(wq, &workqueues, list) { | 3615 | list_for_each_entry(wq, &workqueues, list) { |
3591 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 3616 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
3592 | 3617 | ||
3593 | if (cwq && wq->flags & WQ_FREEZEABLE) | 3618 | if (cwq && wq->flags & WQ_FREEZABLE) |
3594 | cwq->max_active = 0; | 3619 | cwq->max_active = 0; |
3595 | } | 3620 | } |
3596 | 3621 | ||
@@ -3601,7 +3626,7 @@ void freeze_workqueues_begin(void) | |||
3601 | } | 3626 | } |
3602 | 3627 | ||
3603 | /** | 3628 | /** |
3604 | * freeze_workqueues_busy - are freezeable workqueues still busy? | 3629 | * freeze_workqueues_busy - are freezable workqueues still busy? |
3605 | * | 3630 | * |
3606 | * Check whether freezing is complete. This function must be called | 3631 | * Check whether freezing is complete. This function must be called |
3607 | * between freeze_workqueues_begin() and thaw_workqueues(). | 3632 | * between freeze_workqueues_begin() and thaw_workqueues(). |
@@ -3610,8 +3635,8 @@ void freeze_workqueues_begin(void) | |||
3610 | * Grabs and releases workqueue_lock. | 3635 | * Grabs and releases workqueue_lock. |
3611 | * | 3636 | * |
3612 | * RETURNS: | 3637 | * RETURNS: |
3613 | * %true if some freezeable workqueues are still busy. %false if | 3638 | * %true if some freezable workqueues are still busy. %false if freezing |
3614 | * freezing is complete. | 3639 | * is complete. |
3615 | */ | 3640 | */ |
3616 | bool freeze_workqueues_busy(void) | 3641 | bool freeze_workqueues_busy(void) |
3617 | { | 3642 | { |
@@ -3631,7 +3656,7 @@ bool freeze_workqueues_busy(void) | |||
3631 | list_for_each_entry(wq, &workqueues, list) { | 3656 | list_for_each_entry(wq, &workqueues, list) { |
3632 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 3657 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
3633 | 3658 | ||
3634 | if (!cwq || !(wq->flags & WQ_FREEZEABLE)) | 3659 | if (!cwq || !(wq->flags & WQ_FREEZABLE)) |
3635 | continue; | 3660 | continue; |
3636 | 3661 | ||
3637 | BUG_ON(cwq->nr_active < 0); | 3662 | BUG_ON(cwq->nr_active < 0); |
@@ -3676,7 +3701,7 @@ void thaw_workqueues(void) | |||
3676 | list_for_each_entry(wq, &workqueues, list) { | 3701 | list_for_each_entry(wq, &workqueues, list) { |
3677 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 3702 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
3678 | 3703 | ||
3679 | if (!cwq || !(wq->flags & WQ_FREEZEABLE)) | 3704 | if (!cwq || !(wq->flags & WQ_FREEZABLE)) |
3680 | continue; | 3705 | continue; |
3681 | 3706 | ||
3682 | /* restore max_active and repopulate worklist */ | 3707 | /* restore max_active and repopulate worklist */ |