diff options
Diffstat (limited to 'kernel')
102 files changed, 4288 insertions, 2577 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf987b95b356..24e7cb0ba26a 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt | |||
| @@ -35,6 +35,7 @@ config PREEMPT_VOLUNTARY | |||
| 35 | 35 | ||
| 36 | config PREEMPT | 36 | config PREEMPT |
| 37 | bool "Preemptible Kernel (Low-Latency Desktop)" | 37 | bool "Preemptible Kernel (Low-Latency Desktop)" |
| 38 | select PREEMPT_COUNT | ||
| 38 | help | 39 | help |
| 39 | This option reduces the latency of the kernel by making | 40 | This option reduces the latency of the kernel by making |
| 40 | all kernel code (that is not executing in a critical section) | 41 | all kernel code (that is not executing in a critical section) |
| @@ -52,3 +53,5 @@ config PREEMPT | |||
| 52 | 53 | ||
| 53 | endchoice | 54 | endchoice |
| 54 | 55 | ||
| 56 | config PREEMPT_COUNT | ||
| 57 | bool \ No newline at end of file | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 2d64cfcc8b42..d06467fc8f7c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -125,11 +125,10 @@ targets += config_data.gz | |||
| 125 | $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE | 125 | $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE |
| 126 | $(call if_changed,gzip) | 126 | $(call if_changed,gzip) |
| 127 | 127 | ||
| 128 | quiet_cmd_ikconfiggz = IKCFG $@ | 128 | filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") |
| 129 | cmd_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@ | ||
| 130 | targets += config_data.h | 129 | targets += config_data.h |
| 131 | $(obj)/config_data.h: $(obj)/config_data.gz FORCE | 130 | $(obj)/config_data.h: $(obj)/config_data.gz FORCE |
| 132 | $(call if_changed,ikconfiggz) | 131 | $(call filechk,ikconfiggz) |
| 133 | 132 | ||
| 134 | $(obj)/time.o: $(obj)/timeconst.h | 133 | $(obj)/time.o: $(obj)/timeconst.h |
| 135 | 134 | ||
diff --git a/kernel/async.c b/kernel/async.c index cd9dbb913c77..d5fe7af0de2e 100644 --- a/kernel/async.c +++ b/kernel/async.c | |||
| @@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel. | |||
| 49 | */ | 49 | */ |
| 50 | 50 | ||
| 51 | #include <linux/async.h> | 51 | #include <linux/async.h> |
| 52 | #include <linux/atomic.h> | ||
| 53 | #include <linux/ktime.h> | ||
| 52 | #include <linux/module.h> | 54 | #include <linux/module.h> |
| 53 | #include <linux/wait.h> | 55 | #include <linux/wait.h> |
| 54 | #include <linux/sched.h> | 56 | #include <linux/sched.h> |
| 55 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
| 56 | #include <linux/workqueue.h> | 58 | #include <linux/workqueue.h> |
| 57 | #include <asm/atomic.h> | ||
| 58 | 59 | ||
| 59 | static async_cookie_t next_cookie = 1; | 60 | static async_cookie_t next_cookie = 1; |
| 60 | 61 | ||
| @@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work) | |||
| 128 | 129 | ||
| 129 | /* 2) run (and print duration) */ | 130 | /* 2) run (and print duration) */ |
| 130 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 131 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
| 131 | printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, | 132 | printk(KERN_DEBUG "calling %lli_%pF @ %i\n", |
| 133 | (long long)entry->cookie, | ||
| 132 | entry->func, task_pid_nr(current)); | 134 | entry->func, task_pid_nr(current)); |
| 133 | calltime = ktime_get(); | 135 | calltime = ktime_get(); |
| 134 | } | 136 | } |
| @@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work) | |||
| 136 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 138 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
| 137 | rettime = ktime_get(); | 139 | rettime = ktime_get(); |
| 138 | delta = ktime_sub(rettime, calltime); | 140 | delta = ktime_sub(rettime, calltime); |
| 139 | printk("initcall %lli_%pF returned 0 after %lld usecs\n", | 141 | printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n", |
| 140 | (long long)entry->cookie, | 142 | (long long)entry->cookie, |
| 141 | entry->func, | 143 | entry->func, |
| 142 | (long long)ktime_to_ns(delta) >> 10); | 144 | (long long)ktime_to_ns(delta) >> 10); |
| @@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, | |||
| 270 | ktime_t starttime, delta, endtime; | 272 | ktime_t starttime, delta, endtime; |
| 271 | 273 | ||
| 272 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 274 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
| 273 | printk("async_waiting @ %i\n", task_pid_nr(current)); | 275 | printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); |
| 274 | starttime = ktime_get(); | 276 | starttime = ktime_get(); |
| 275 | } | 277 | } |
| 276 | 278 | ||
| @@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, | |||
| 280 | endtime = ktime_get(); | 282 | endtime = ktime_get(); |
| 281 | delta = ktime_sub(endtime, starttime); | 283 | delta = ktime_sub(endtime, starttime); |
| 282 | 284 | ||
| 283 | printk("async_continuing @ %i after %lli usec\n", | 285 | printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n", |
| 284 | task_pid_nr(current), | 286 | task_pid_nr(current), |
| 285 | (long long)ktime_to_ns(delta) >> 10); | 287 | (long long)ktime_to_ns(delta) >> 10); |
| 286 | } | 288 | } |
diff --git a/kernel/audit.c b/kernel/audit.c index 939500317066..0a1355ca3d79 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -43,7 +43,7 @@ | |||
| 43 | 43 | ||
| 44 | #include <linux/init.h> | 44 | #include <linux/init.h> |
| 45 | #include <asm/types.h> | 45 | #include <asm/types.h> |
| 46 | #include <asm/atomic.h> | 46 | #include <linux/atomic.h> |
| 47 | #include <linux/mm.h> | 47 | #include <linux/mm.h> |
| 48 | #include <linux/module.h> | 48 | #include <linux/module.h> |
| 49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
| @@ -55,6 +55,9 @@ | |||
| 55 | #include <net/sock.h> | 55 | #include <net/sock.h> |
| 56 | #include <net/netlink.h> | 56 | #include <net/netlink.h> |
| 57 | #include <linux/skbuff.h> | 57 | #include <linux/skbuff.h> |
| 58 | #ifdef CONFIG_SECURITY | ||
| 59 | #include <linux/security.h> | ||
| 60 | #endif | ||
| 58 | #include <linux/netlink.h> | 61 | #include <linux/netlink.h> |
| 59 | #include <linux/freezer.h> | 62 | #include <linux/freezer.h> |
| 60 | #include <linux/tty.h> | 63 | #include <linux/tty.h> |
| @@ -1502,6 +1505,32 @@ void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, | |||
| 1502 | } | 1505 | } |
| 1503 | } | 1506 | } |
| 1504 | 1507 | ||
| 1508 | #ifdef CONFIG_SECURITY | ||
| 1509 | /** | ||
| 1510 | * audit_log_secctx - Converts and logs SELinux context | ||
| 1511 | * @ab: audit_buffer | ||
| 1512 | * @secid: security number | ||
| 1513 | * | ||
| 1514 | * This is a helper function that calls security_secid_to_secctx to convert | ||
| 1515 | * secid to secctx and then adds the (converted) SELinux context to the audit | ||
| 1516 | * log by calling audit_log_format, thus also preventing leak of internal secid | ||
| 1517 | * to userspace. If secid cannot be converted audit_panic is called. | ||
| 1518 | */ | ||
| 1519 | void audit_log_secctx(struct audit_buffer *ab, u32 secid) | ||
| 1520 | { | ||
| 1521 | u32 len; | ||
| 1522 | char *secctx; | ||
| 1523 | |||
| 1524 | if (security_secid_to_secctx(secid, &secctx, &len)) { | ||
| 1525 | audit_panic("Cannot convert secid to context"); | ||
| 1526 | } else { | ||
| 1527 | audit_log_format(ab, " obj=%s", secctx); | ||
| 1528 | security_release_secctx(secctx, len); | ||
| 1529 | } | ||
| 1530 | } | ||
| 1531 | EXPORT_SYMBOL(audit_log_secctx); | ||
| 1532 | #endif | ||
| 1533 | |||
| 1505 | EXPORT_SYMBOL(audit_log_start); | 1534 | EXPORT_SYMBOL(audit_log_start); |
| 1506 | EXPORT_SYMBOL(audit_log_end); | 1535 | EXPORT_SYMBOL(audit_log_end); |
| 1507 | EXPORT_SYMBOL(audit_log_format); | 1536 | EXPORT_SYMBOL(audit_log_format); |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e99dda04b126..5bf0790497e7 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
| @@ -93,16 +93,10 @@ static inline void get_tree(struct audit_tree *tree) | |||
| 93 | atomic_inc(&tree->count); | 93 | atomic_inc(&tree->count); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static void __put_tree(struct rcu_head *rcu) | ||
| 97 | { | ||
| 98 | struct audit_tree *tree = container_of(rcu, struct audit_tree, head); | ||
| 99 | kfree(tree); | ||
| 100 | } | ||
| 101 | |||
| 102 | static inline void put_tree(struct audit_tree *tree) | 96 | static inline void put_tree(struct audit_tree *tree) |
| 103 | { | 97 | { |
| 104 | if (atomic_dec_and_test(&tree->count)) | 98 | if (atomic_dec_and_test(&tree->count)) |
| 105 | call_rcu(&tree->head, __put_tree); | 99 | kfree_rcu(tree, head); |
| 106 | } | 100 | } |
| 107 | 101 | ||
| 108 | /* to avoid bringing the entire thing in audit.h */ | 102 | /* to avoid bringing the entire thing in audit.h */ |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 00d79df03e76..ce4b054acee5 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -44,7 +44,7 @@ | |||
| 44 | 44 | ||
| 45 | #include <linux/init.h> | 45 | #include <linux/init.h> |
| 46 | #include <asm/types.h> | 46 | #include <asm/types.h> |
| 47 | #include <asm/atomic.h> | 47 | #include <linux/atomic.h> |
| 48 | #include <linux/fs.h> | 48 | #include <linux/fs.h> |
| 49 | #include <linux/namei.h> | 49 | #include <linux/namei.h> |
| 50 | #include <linux/mm.h> | 50 | #include <linux/mm.h> |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2731d115d725..1d2b6ceea95d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -27,9 +27,11 @@ | |||
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | #include <linux/cgroup.h> | 29 | #include <linux/cgroup.h> |
| 30 | #include <linux/cred.h> | ||
| 30 | #include <linux/ctype.h> | 31 | #include <linux/ctype.h> |
| 31 | #include <linux/errno.h> | 32 | #include <linux/errno.h> |
| 32 | #include <linux/fs.h> | 33 | #include <linux/fs.h> |
| 34 | #include <linux/init_task.h> | ||
| 33 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
| 34 | #include <linux/list.h> | 36 | #include <linux/list.h> |
| 35 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
| @@ -59,7 +61,7 @@ | |||
| 59 | #include <linux/poll.h> | 61 | #include <linux/poll.h> |
| 60 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | 62 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ |
| 61 | 63 | ||
| 62 | #include <asm/atomic.h> | 64 | #include <linux/atomic.h> |
| 63 | 65 | ||
| 64 | static DEFINE_MUTEX(cgroup_mutex); | 66 | static DEFINE_MUTEX(cgroup_mutex); |
| 65 | 67 | ||
| @@ -1514,6 +1516,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1514 | struct cgroup *root_cgrp = &root->top_cgroup; | 1516 | struct cgroup *root_cgrp = &root->top_cgroup; |
| 1515 | struct inode *inode; | 1517 | struct inode *inode; |
| 1516 | struct cgroupfs_root *existing_root; | 1518 | struct cgroupfs_root *existing_root; |
| 1519 | const struct cred *cred; | ||
| 1517 | int i; | 1520 | int i; |
| 1518 | 1521 | ||
| 1519 | BUG_ON(sb->s_root != NULL); | 1522 | BUG_ON(sb->s_root != NULL); |
| @@ -1593,7 +1596,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1593 | BUG_ON(!list_empty(&root_cgrp->children)); | 1596 | BUG_ON(!list_empty(&root_cgrp->children)); |
| 1594 | BUG_ON(root->number_of_cgroups != 1); | 1597 | BUG_ON(root->number_of_cgroups != 1); |
| 1595 | 1598 | ||
| 1599 | cred = override_creds(&init_cred); | ||
| 1596 | cgroup_populate_dir(root_cgrp); | 1600 | cgroup_populate_dir(root_cgrp); |
| 1601 | revert_creds(cred); | ||
| 1597 | mutex_unlock(&cgroup_mutex); | 1602 | mutex_unlock(&cgroup_mutex); |
| 1598 | mutex_unlock(&inode->i_mutex); | 1603 | mutex_unlock(&inode->i_mutex); |
| 1599 | } else { | 1604 | } else { |
| @@ -1697,7 +1702,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
| 1697 | { | 1702 | { |
| 1698 | char *start; | 1703 | char *start; |
| 1699 | struct dentry *dentry = rcu_dereference_check(cgrp->dentry, | 1704 | struct dentry *dentry = rcu_dereference_check(cgrp->dentry, |
| 1700 | rcu_read_lock_held() || | ||
| 1701 | cgroup_lock_is_held()); | 1705 | cgroup_lock_is_held()); |
| 1702 | 1706 | ||
| 1703 | if (!dentry || cgrp == dummytop) { | 1707 | if (!dentry || cgrp == dummytop) { |
| @@ -1723,7 +1727,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
| 1723 | break; | 1727 | break; |
| 1724 | 1728 | ||
| 1725 | dentry = rcu_dereference_check(cgrp->dentry, | 1729 | dentry = rcu_dereference_check(cgrp->dentry, |
| 1726 | rcu_read_lock_held() || | ||
| 1727 | cgroup_lock_is_held()); | 1730 | cgroup_lock_is_held()); |
| 1728 | if (!cgrp->parent) | 1731 | if (!cgrp->parent) |
| 1729 | continue; | 1732 | continue; |
| @@ -3542,7 +3545,8 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
| 3542 | } | 3545 | } |
| 3543 | 3546 | ||
| 3544 | /* the process need read permission on control file */ | 3547 | /* the process need read permission on control file */ |
| 3545 | ret = file_permission(cfile, MAY_READ); | 3548 | /* AV: shouldn't we check that it's been opened for read instead? */ |
| 3549 | ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ); | ||
| 3546 | if (ret < 0) | 3550 | if (ret < 0) |
| 3547 | goto fail; | 3551 | goto fail; |
| 3548 | 3552 | ||
| @@ -4813,8 +4817,7 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
| 4813 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | 4817 | * on this or this is under rcu_read_lock(). Once css->id is allocated, |
| 4814 | * it's unchanged until freed. | 4818 | * it's unchanged until freed. |
| 4815 | */ | 4819 | */ |
| 4816 | cssid = rcu_dereference_check(css->id, | 4820 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); |
| 4817 | rcu_read_lock_held() || atomic_read(&css->refcnt)); | ||
| 4818 | 4821 | ||
| 4819 | if (cssid) | 4822 | if (cssid) |
| 4820 | return cssid->id; | 4823 | return cssid->id; |
| @@ -4826,8 +4829,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
| 4826 | { | 4829 | { |
| 4827 | struct css_id *cssid; | 4830 | struct css_id *cssid; |
| 4828 | 4831 | ||
| 4829 | cssid = rcu_dereference_check(css->id, | 4832 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); |
| 4830 | rcu_read_lock_held() || atomic_read(&css->refcnt)); | ||
| 4831 | 4833 | ||
| 4832 | if (cssid) | 4834 | if (cssid) |
| 4833 | return cssid->depth; | 4835 | return cssid->depth; |
diff --git a/kernel/compat.c b/kernel/compat.c index fc9eb093acd5..e2435ee9993a 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
| @@ -158,6 +158,7 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user | |||
| 158 | __put_user(ts->tv_sec, &cts->tv_sec) || | 158 | __put_user(ts->tv_sec, &cts->tv_sec) || |
| 159 | __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; | 159 | __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; |
| 160 | } | 160 | } |
| 161 | EXPORT_SYMBOL_GPL(put_compat_timespec); | ||
| 161 | 162 | ||
| 162 | static long compat_nanosleep_restart(struct restart_block *restart) | 163 | static long compat_nanosleep_restart(struct restart_block *restart) |
| 163 | { | 164 | { |
| @@ -890,6 +891,7 @@ sigset_from_compat (sigset_t *set, compat_sigset_t *compat) | |||
| 890 | case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 ); | 891 | case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 ); |
| 891 | } | 892 | } |
| 892 | } | 893 | } |
| 894 | EXPORT_SYMBOL_GPL(sigset_from_compat); | ||
| 893 | 895 | ||
| 894 | asmlinkage long | 896 | asmlinkage long |
| 895 | compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | 897 | compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, |
| @@ -991,11 +993,8 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat | |||
| 991 | sigset_from_compat(&newset, &newset32); | 993 | sigset_from_compat(&newset, &newset32); |
| 992 | sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 994 | sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
| 993 | 995 | ||
| 994 | spin_lock_irq(¤t->sighand->siglock); | ||
| 995 | current->saved_sigmask = current->blocked; | 996 | current->saved_sigmask = current->blocked; |
| 996 | current->blocked = newset; | 997 | set_current_blocked(&newset); |
| 997 | recalc_sigpending(); | ||
| 998 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 999 | 998 | ||
| 1000 | current->state = TASK_INTERRUPTIBLE; | 999 | current->state = TASK_INTERRUPTIBLE; |
| 1001 | schedule(); | 1000 | schedule(); |
diff --git a/kernel/configs.c b/kernel/configs.c index b4066b44a99d..42e8fa075eed 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
| @@ -92,8 +92,8 @@ static void __exit ikconfig_cleanup(void) | |||
| 92 | module_init(ikconfig_init); | 92 | module_init(ikconfig_init); |
| 93 | module_exit(ikconfig_cleanup); | 93 | module_exit(ikconfig_cleanup); |
| 94 | 94 | ||
| 95 | #endif /* CONFIG_IKCONFIG_PROC */ | ||
| 96 | |||
| 95 | MODULE_LICENSE("GPL"); | 97 | MODULE_LICENSE("GPL"); |
| 96 | MODULE_AUTHOR("Randy Dunlap"); | 98 | MODULE_AUTHOR("Randy Dunlap"); |
| 97 | MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel"); | 99 | MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel"); |
| 98 | |||
| 99 | #endif /* CONFIG_IKCONFIG_PROC */ | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9c9b7545c810..10131fdaff70 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -55,7 +55,7 @@ | |||
| 55 | #include <linux/sort.h> | 55 | #include <linux/sort.h> |
| 56 | 56 | ||
| 57 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
| 58 | #include <asm/atomic.h> | 58 | #include <linux/atomic.h> |
| 59 | #include <linux/mutex.h> | 59 | #include <linux/mutex.h> |
| 60 | #include <linux/workqueue.h> | 60 | #include <linux/workqueue.h> |
| 61 | #include <linux/cgroup.h> | 61 | #include <linux/cgroup.h> |
| @@ -2460,11 +2460,19 @@ static int cpuset_spread_node(int *rotor) | |||
| 2460 | 2460 | ||
| 2461 | int cpuset_mem_spread_node(void) | 2461 | int cpuset_mem_spread_node(void) |
| 2462 | { | 2462 | { |
| 2463 | if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) | ||
| 2464 | current->cpuset_mem_spread_rotor = | ||
| 2465 | node_random(¤t->mems_allowed); | ||
| 2466 | |||
| 2463 | return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); | 2467 | return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); |
| 2464 | } | 2468 | } |
| 2465 | 2469 | ||
| 2466 | int cpuset_slab_spread_node(void) | 2470 | int cpuset_slab_spread_node(void) |
| 2467 | { | 2471 | { |
| 2472 | if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) | ||
| 2473 | current->cpuset_slab_spread_rotor = | ||
| 2474 | node_random(¤t->mems_allowed); | ||
| 2475 | |||
| 2468 | return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); | 2476 | return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); |
| 2469 | } | 2477 | } |
| 2470 | 2478 | ||
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index bad6786dee88..0d7c08784efb 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
| @@ -51,7 +51,7 @@ | |||
| 51 | 51 | ||
| 52 | #include <asm/cacheflush.h> | 52 | #include <asm/cacheflush.h> |
| 53 | #include <asm/byteorder.h> | 53 | #include <asm/byteorder.h> |
| 54 | #include <asm/atomic.h> | 54 | #include <linux/atomic.h> |
| 55 | #include <asm/system.h> | 55 | #include <asm/system.h> |
| 56 | 56 | ||
| 57 | #include "debug_core.h" | 57 | #include "debug_core.h" |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a11db956dd62..34872482315e 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
| @@ -42,6 +42,8 @@ | |||
| 42 | /* Our I/O buffers. */ | 42 | /* Our I/O buffers. */ |
| 43 | static char remcom_in_buffer[BUFMAX]; | 43 | static char remcom_in_buffer[BUFMAX]; |
| 44 | static char remcom_out_buffer[BUFMAX]; | 44 | static char remcom_out_buffer[BUFMAX]; |
| 45 | static int gdbstub_use_prev_in_buf; | ||
| 46 | static int gdbstub_prev_in_buf_pos; | ||
| 45 | 47 | ||
| 46 | /* Storage for the registers, in GDB format. */ | 48 | /* Storage for the registers, in GDB format. */ |
| 47 | static unsigned long gdb_regs[(NUMREGBYTES + | 49 | static unsigned long gdb_regs[(NUMREGBYTES + |
| @@ -58,6 +60,13 @@ static int gdbstub_read_wait(void) | |||
| 58 | int ret = -1; | 60 | int ret = -1; |
| 59 | int i; | 61 | int i; |
| 60 | 62 | ||
| 63 | if (unlikely(gdbstub_use_prev_in_buf)) { | ||
| 64 | if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf) | ||
| 65 | return remcom_in_buffer[gdbstub_prev_in_buf_pos++]; | ||
| 66 | else | ||
| 67 | gdbstub_use_prev_in_buf = 0; | ||
| 68 | } | ||
| 69 | |||
| 61 | /* poll any additional I/O interfaces that are defined */ | 70 | /* poll any additional I/O interfaces that are defined */ |
| 62 | while (ret < 0) | 71 | while (ret < 0) |
| 63 | for (i = 0; kdb_poll_funcs[i] != NULL; i++) { | 72 | for (i = 0; kdb_poll_funcs[i] != NULL; i++) { |
| @@ -109,7 +118,6 @@ static void get_packet(char *buffer) | |||
| 109 | buffer[count] = ch; | 118 | buffer[count] = ch; |
| 110 | count = count + 1; | 119 | count = count + 1; |
| 111 | } | 120 | } |
| 112 | buffer[count] = 0; | ||
| 113 | 121 | ||
| 114 | if (ch == '#') { | 122 | if (ch == '#') { |
| 115 | xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; | 123 | xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; |
| @@ -124,6 +132,7 @@ static void get_packet(char *buffer) | |||
| 124 | if (dbg_io_ops->flush) | 132 | if (dbg_io_ops->flush) |
| 125 | dbg_io_ops->flush(); | 133 | dbg_io_ops->flush(); |
| 126 | } | 134 | } |
| 135 | buffer[count] = 0; | ||
| 127 | } while (checksum != xmitcsum); | 136 | } while (checksum != xmitcsum); |
| 128 | } | 137 | } |
| 129 | 138 | ||
| @@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) | |||
| 1082 | case 'c': | 1091 | case 'c': |
| 1083 | strcpy(remcom_in_buffer, cmd); | 1092 | strcpy(remcom_in_buffer, cmd); |
| 1084 | return 0; | 1093 | return 0; |
| 1085 | case '?': | 1094 | case '$': |
| 1086 | gdb_cmd_status(ks); | 1095 | strcpy(remcom_in_buffer, cmd); |
| 1087 | break; | 1096 | gdbstub_use_prev_in_buf = strlen(remcom_in_buffer); |
| 1088 | case '\0': | 1097 | gdbstub_prev_in_buf_pos = 0; |
| 1089 | strcpy(remcom_out_buffer, ""); | 1098 | return 0; |
| 1090 | break; | ||
| 1091 | } | 1099 | } |
| 1092 | dbg_io_ops->write_char('+'); | 1100 | dbg_io_ops->write_char('+'); |
| 1093 | put_packet(remcom_out_buffer); | 1101 | put_packet(remcom_out_buffer); |
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 2f62fe85f16a..7179eac7b41c 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c | |||
| @@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv) | |||
| 112 | unsigned long addr; | 112 | unsigned long addr; |
| 113 | long offset; | 113 | long offset; |
| 114 | 114 | ||
| 115 | kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ | 115 | /* Prompt after each proc in bta */ |
| 116 | kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each | 116 | kdbgetintenv("BTAPROMPT", &btaprompt); |
| 117 | * proc in bta */ | ||
| 118 | 117 | ||
| 119 | if (strcmp(argv[0], "bta") == 0) { | 118 | if (strcmp(argv[0], "bta") == 0) { |
| 120 | struct task_struct *g, *p; | 119 | struct task_struct *g, *p; |
diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds index 56c88e4db309..9834ad303ab6 100644 --- a/kernel/debug/kdb/kdb_cmds +++ b/kernel/debug/kdb/kdb_cmds | |||
| @@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging" | |||
| 18 | endefcmd | 18 | endefcmd |
| 19 | 19 | ||
| 20 | defcmd dumpall "" "First line debugging" | 20 | defcmd dumpall "" "First line debugging" |
| 21 | set BTSYMARG 1 | ||
| 22 | set BTARGS 9 | ||
| 23 | pid R | 21 | pid R |
| 24 | -dumpcommon | 22 | -dumpcommon |
| 25 | -bta | 23 | -bta |
| 26 | endefcmd | 24 | endefcmd |
| 27 | 25 | ||
| 28 | defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" | 26 | defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" |
| 29 | set BTSYMARG 1 | ||
| 30 | set BTARGS 9 | ||
| 31 | pid R | 27 | pid R |
| 32 | -dumpcommon | 28 | -dumpcommon |
| 33 | -btc | 29 | -btc |
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index dd0b1b7dd02c..d9ca9aa481ec 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c | |||
| @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs); | |||
| 30 | int kdb_poll_idx = 1; | 30 | int kdb_poll_idx = 1; |
| 31 | EXPORT_SYMBOL_GPL(kdb_poll_idx); | 31 | EXPORT_SYMBOL_GPL(kdb_poll_idx); |
| 32 | 32 | ||
| 33 | static struct kgdb_state *kdb_ks; | ||
| 34 | |||
| 33 | int kdb_stub(struct kgdb_state *ks) | 35 | int kdb_stub(struct kgdb_state *ks) |
| 34 | { | 36 | { |
| 35 | int error = 0; | 37 | int error = 0; |
| @@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks) | |||
| 39 | kdb_dbtrap_t db_result = KDB_DB_NOBPT; | 41 | kdb_dbtrap_t db_result = KDB_DB_NOBPT; |
| 40 | int i; | 42 | int i; |
| 41 | 43 | ||
| 44 | kdb_ks = ks; | ||
| 42 | if (KDB_STATE(REENTRY)) { | 45 | if (KDB_STATE(REENTRY)) { |
| 43 | reason = KDB_REASON_SWITCH; | 46 | reason = KDB_REASON_SWITCH; |
| 44 | KDB_STATE_CLEAR(REENTRY); | 47 | KDB_STATE_CLEAR(REENTRY); |
| @@ -123,20 +126,8 @@ int kdb_stub(struct kgdb_state *ks) | |||
| 123 | KDB_STATE_CLEAR(PAGER); | 126 | KDB_STATE_CLEAR(PAGER); |
| 124 | kdbnearsym_cleanup(); | 127 | kdbnearsym_cleanup(); |
| 125 | if (error == KDB_CMD_KGDB) { | 128 | if (error == KDB_CMD_KGDB) { |
| 126 | if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { | 129 | if (KDB_STATE(DOING_KGDB)) |
| 127 | /* | ||
| 128 | * This inteface glue which allows kdb to transition in into | ||
| 129 | * the gdb stub. In order to do this the '?' or '' gdb serial | ||
| 130 | * packet response is processed here. And then control is | ||
| 131 | * passed to the gdbstub. | ||
| 132 | */ | ||
| 133 | if (KDB_STATE(DOING_KGDB)) | ||
| 134 | gdbstub_state(ks, "?"); | ||
| 135 | else | ||
| 136 | gdbstub_state(ks, ""); | ||
| 137 | KDB_STATE_CLEAR(DOING_KGDB); | 130 | KDB_STATE_CLEAR(DOING_KGDB); |
| 138 | KDB_STATE_CLEAR(DOING_KGDB2); | ||
| 139 | } | ||
| 140 | return DBG_PASS_EVENT; | 131 | return DBG_PASS_EVENT; |
| 141 | } | 132 | } |
| 142 | kdb_bp_install(ks->linux_regs); | 133 | kdb_bp_install(ks->linux_regs); |
| @@ -166,3 +157,7 @@ int kdb_stub(struct kgdb_state *ks) | |||
| 166 | return kgdb_info[ks->cpu].ret_state; | 157 | return kgdb_info[ks->cpu].ret_state; |
| 167 | } | 158 | } |
| 168 | 159 | ||
| 160 | void kdb_gdb_state_pass(char *buf) | ||
| 161 | { | ||
| 162 | gdbstub_state(kdb_ks, buf); | ||
| 163 | } | ||
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 96fdaac46a80..4802eb5840e1 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
| @@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN]; | |||
| 31 | 31 | ||
| 32 | int kdb_trap_printk; | 32 | int kdb_trap_printk; |
| 33 | 33 | ||
| 34 | static void kgdb_transition_check(char *buffer) | 34 | static int kgdb_transition_check(char *buffer) |
| 35 | { | 35 | { |
| 36 | int slen = strlen(buffer); | 36 | if (buffer[0] != '+' && buffer[0] != '$') { |
| 37 | if (strncmp(buffer, "$?#3f", slen) != 0 && | ||
| 38 | strncmp(buffer, "$qSupported#37", slen) != 0 && | ||
| 39 | strncmp(buffer, "+$qSupported#37", slen) != 0) { | ||
| 40 | KDB_STATE_SET(KGDB_TRANS); | 37 | KDB_STATE_SET(KGDB_TRANS); |
| 41 | kdb_printf("%s", buffer); | 38 | kdb_printf("%s", buffer); |
| 39 | } else { | ||
| 40 | int slen = strlen(buffer); | ||
| 41 | if (slen > 3 && buffer[slen - 3] == '#') { | ||
| 42 | kdb_gdb_state_pass(buffer); | ||
| 43 | strcpy(buffer, "kgdb"); | ||
| 44 | KDB_STATE_SET(DOING_KGDB); | ||
| 45 | return 1; | ||
| 46 | } | ||
| 42 | } | 47 | } |
| 48 | return 0; | ||
| 43 | } | 49 | } |
| 44 | 50 | ||
| 45 | static int kdb_read_get_key(char *buffer, size_t bufsize) | 51 | static int kdb_read_get_key(char *buffer, size_t bufsize) |
| @@ -251,6 +257,10 @@ poll_again: | |||
| 251 | case 13: /* enter */ | 257 | case 13: /* enter */ |
| 252 | *lastchar++ = '\n'; | 258 | *lastchar++ = '\n'; |
| 253 | *lastchar++ = '\0'; | 259 | *lastchar++ = '\0'; |
| 260 | if (!KDB_STATE(KGDB_TRANS)) { | ||
| 261 | KDB_STATE_SET(KGDB_TRANS); | ||
| 262 | kdb_printf("%s", buffer); | ||
| 263 | } | ||
| 254 | kdb_printf("\n"); | 264 | kdb_printf("\n"); |
| 255 | return buffer; | 265 | return buffer; |
| 256 | case 4: /* Del */ | 266 | case 4: /* Del */ |
| @@ -382,22 +392,26 @@ poll_again: | |||
| 382 | * printed characters if we think that | 392 | * printed characters if we think that |
| 383 | * kgdb is connecting, until the check | 393 | * kgdb is connecting, until the check |
| 384 | * fails */ | 394 | * fails */ |
| 385 | if (!KDB_STATE(KGDB_TRANS)) | 395 | if (!KDB_STATE(KGDB_TRANS)) { |
| 386 | kgdb_transition_check(buffer); | 396 | if (kgdb_transition_check(buffer)) |
| 387 | else | 397 | return buffer; |
| 398 | } else { | ||
| 388 | kdb_printf("%c", key); | 399 | kdb_printf("%c", key); |
| 400 | } | ||
| 389 | } | 401 | } |
| 390 | /* Special escape to kgdb */ | 402 | /* Special escape to kgdb */ |
| 391 | if (lastchar - buffer >= 5 && | 403 | if (lastchar - buffer >= 5 && |
| 392 | strcmp(lastchar - 5, "$?#3f") == 0) { | 404 | strcmp(lastchar - 5, "$?#3f") == 0) { |
| 405 | kdb_gdb_state_pass(lastchar - 5); | ||
| 393 | strcpy(buffer, "kgdb"); | 406 | strcpy(buffer, "kgdb"); |
| 394 | KDB_STATE_SET(DOING_KGDB); | 407 | KDB_STATE_SET(DOING_KGDB); |
| 395 | return buffer; | 408 | return buffer; |
| 396 | } | 409 | } |
| 397 | if (lastchar - buffer >= 14 && | 410 | if (lastchar - buffer >= 11 && |
| 398 | strcmp(lastchar - 14, "$qSupported#37") == 0) { | 411 | strcmp(lastchar - 11, "$qSupported") == 0) { |
| 412 | kdb_gdb_state_pass(lastchar - 11); | ||
| 399 | strcpy(buffer, "kgdb"); | 413 | strcpy(buffer, "kgdb"); |
| 400 | KDB_STATE_SET(DOING_KGDB2); | 414 | KDB_STATE_SET(DOING_KGDB); |
| 401 | return buffer; | 415 | return buffer; |
| 402 | } | 416 | } |
| 403 | } | 417 | } |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index be14779bcef6..63786e71a3cd 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -145,7 +145,6 @@ static char *__env[] = { | |||
| 145 | #endif | 145 | #endif |
| 146 | "RADIX=16", | 146 | "RADIX=16", |
| 147 | "MDCOUNT=8", /* lines of md output */ | 147 | "MDCOUNT=8", /* lines of md output */ |
| 148 | "BTARGS=9", /* 9 possible args in bt */ | ||
| 149 | KDB_PLATFORM_ENV, | 148 | KDB_PLATFORM_ENV, |
| 150 | "DTABCOUNT=30", | 149 | "DTABCOUNT=30", |
| 151 | "NOSECT=1", | 150 | "NOSECT=1", |
| @@ -172,6 +171,7 @@ static char *__env[] = { | |||
| 172 | (char *)0, | 171 | (char *)0, |
| 173 | (char *)0, | 172 | (char *)0, |
| 174 | (char *)0, | 173 | (char *)0, |
| 174 | (char *)0, | ||
| 175 | }; | 175 | }; |
| 176 | 176 | ||
| 177 | static const int __nenv = (sizeof(__env) / sizeof(char *)); | 177 | static const int __nenv = (sizeof(__env) / sizeof(char *)); |
| @@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, | |||
| 1386 | } | 1386 | } |
| 1387 | 1387 | ||
| 1388 | if (result == KDB_CMD_KGDB) { | 1388 | if (result == KDB_CMD_KGDB) { |
| 1389 | if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2))) | 1389 | if (!KDB_STATE(DOING_KGDB)) |
| 1390 | kdb_printf("Entering please attach debugger " | 1390 | kdb_printf("Entering please attach debugger " |
| 1391 | "or use $D#44+ or $3#33\n"); | 1391 | "or use $D#44+ or $3#33\n"); |
| 1392 | break; | 1392 | break; |
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 35d69ed1dfb5..e381d105b40b 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | #define KDB_CMD_SS (-1003) | 21 | #define KDB_CMD_SS (-1003) |
| 22 | #define KDB_CMD_SSB (-1004) | 22 | #define KDB_CMD_SSB (-1004) |
| 23 | #define KDB_CMD_KGDB (-1005) | 23 | #define KDB_CMD_KGDB (-1005) |
| 24 | #define KDB_CMD_KGDB2 (-1006) | ||
| 25 | 24 | ||
| 26 | /* Internal debug flags */ | 25 | /* Internal debug flags */ |
| 27 | #define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ | 26 | #define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ |
| @@ -146,7 +145,6 @@ extern int kdb_state; | |||
| 146 | * keyboard on this cpu */ | 145 | * keyboard on this cpu */ |
| 147 | #define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ | 146 | #define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ |
| 148 | #define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ | 147 | #define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ |
| 149 | #define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */ | ||
| 150 | #define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ | 148 | #define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ |
| 151 | #define KDB_STATE_ARCH 0xff000000 /* Reserved for arch | 149 | #define KDB_STATE_ARCH 0xff000000 /* Reserved for arch |
| 152 | * specific use */ | 150 | * specific use */ |
| @@ -218,6 +216,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val); | |||
| 218 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); | 216 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); |
| 219 | extern void kdb_meminfo_proc_show(void); | 217 | extern void kdb_meminfo_proc_show(void); |
| 220 | extern char *kdb_getstr(char *, size_t, char *); | 218 | extern char *kdb_getstr(char *, size_t, char *); |
| 219 | extern void kdb_gdb_state_pass(char *buf); | ||
| 221 | 220 | ||
| 222 | /* Defines for kdb_symbol_print */ | 221 | /* Defines for kdb_symbol_print */ |
| 223 | #define KDB_SP_SPACEB 0x0001 /* Space before string */ | 222 | #define KDB_SP_SPACEB 0x0001 /* Space before string */ |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index ead9b610aa71..418b3f7053aa 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
| @@ -19,8 +19,10 @@ | |||
| 19 | #include <linux/time.h> | 19 | #include <linux/time.h> |
| 20 | #include <linux/sysctl.h> | 20 | #include <linux/sysctl.h> |
| 21 | #include <linux/delayacct.h> | 21 | #include <linux/delayacct.h> |
| 22 | #include <linux/module.h> | ||
| 22 | 23 | ||
| 23 | int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ | 24 | int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ |
| 25 | EXPORT_SYMBOL_GPL(delayacct_on); | ||
| 24 | struct kmem_cache *delayacct_cache; | 26 | struct kmem_cache *delayacct_cache; |
| 25 | 27 | ||
| 26 | static int __init delayacct_setup_disable(char *str) | 28 | static int __init delayacct_setup_disable(char *str) |
diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 1ce23d3d8394..89e5e8aa4c36 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile | |||
| @@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER | |||
| 2 | CFLAGS_REMOVE_core.o = -pg | 2 | CFLAGS_REMOVE_core.o = -pg |
| 3 | endif | 3 | endif |
| 4 | 4 | ||
| 5 | obj-y := core.o | 5 | obj-y := core.o ring_buffer.o |
| 6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
diff --git a/kernel/events/core.c b/kernel/events/core.c index d863b3c057bb..b8785e26ee1c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -36,6 +36,8 @@ | |||
| 36 | #include <linux/ftrace_event.h> | 36 | #include <linux/ftrace_event.h> |
| 37 | #include <linux/hw_breakpoint.h> | 37 | #include <linux/hw_breakpoint.h> |
| 38 | 38 | ||
| 39 | #include "internal.h" | ||
| 40 | |||
| 39 | #include <asm/irq_regs.h> | 41 | #include <asm/irq_regs.h> |
| 40 | 42 | ||
| 41 | struct remote_function_call { | 43 | struct remote_function_call { |
| @@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx) | |||
| 200 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | 202 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); |
| 201 | } | 203 | } |
| 202 | 204 | ||
| 205 | static void perf_ctx_lock(struct perf_cpu_context *cpuctx, | ||
| 206 | struct perf_event_context *ctx) | ||
| 207 | { | ||
| 208 | raw_spin_lock(&cpuctx->ctx.lock); | ||
| 209 | if (ctx) | ||
| 210 | raw_spin_lock(&ctx->lock); | ||
| 211 | } | ||
| 212 | |||
| 213 | static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, | ||
| 214 | struct perf_event_context *ctx) | ||
| 215 | { | ||
| 216 | if (ctx) | ||
| 217 | raw_spin_unlock(&ctx->lock); | ||
| 218 | raw_spin_unlock(&cpuctx->ctx.lock); | ||
| 219 | } | ||
| 220 | |||
| 203 | #ifdef CONFIG_CGROUP_PERF | 221 | #ifdef CONFIG_CGROUP_PERF |
| 204 | 222 | ||
| 205 | /* | 223 | /* |
| @@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
| 340 | rcu_read_lock(); | 358 | rcu_read_lock(); |
| 341 | 359 | ||
| 342 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 360 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 343 | |||
| 344 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 361 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
| 345 | 362 | ||
| 346 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
| 347 | |||
| 348 | /* | 363 | /* |
| 349 | * perf_cgroup_events says at least one | 364 | * perf_cgroup_events says at least one |
| 350 | * context on this CPU has cgroup events. | 365 | * context on this CPU has cgroup events. |
| @@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
| 353 | * events for a context. | 368 | * events for a context. |
| 354 | */ | 369 | */ |
| 355 | if (cpuctx->ctx.nr_cgroups > 0) { | 370 | if (cpuctx->ctx.nr_cgroups > 0) { |
| 371 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
| 372 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
| 356 | 373 | ||
| 357 | if (mode & PERF_CGROUP_SWOUT) { | 374 | if (mode & PERF_CGROUP_SWOUT) { |
| 358 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); | 375 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); |
| @@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
| 372 | cpuctx->cgrp = perf_cgroup_from_task(task); | 389 | cpuctx->cgrp = perf_cgroup_from_task(task); |
| 373 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); | 390 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); |
| 374 | } | 391 | } |
| 392 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
| 393 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
| 375 | } | 394 | } |
| 376 | |||
| 377 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
| 378 | } | 395 | } |
| 379 | 396 | ||
| 380 | rcu_read_unlock(); | 397 | rcu_read_unlock(); |
| @@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event) | |||
| 731 | 748 | ||
| 732 | /* | 749 | /* |
| 733 | * Update the total_time_enabled and total_time_running fields for a event. | 750 | * Update the total_time_enabled and total_time_running fields for a event. |
| 751 | * The caller of this function needs to hold the ctx->lock. | ||
| 734 | */ | 752 | */ |
| 735 | static void update_event_times(struct perf_event *event) | 753 | static void update_event_times(struct perf_event *event) |
| 736 | { | 754 | { |
| @@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info) | |||
| 1105 | raw_spin_lock(&ctx->lock); | 1123 | raw_spin_lock(&ctx->lock); |
| 1106 | event_sched_out(event, cpuctx, ctx); | 1124 | event_sched_out(event, cpuctx, ctx); |
| 1107 | list_del_event(event, ctx); | 1125 | list_del_event(event, ctx); |
| 1126 | if (!ctx->nr_events && cpuctx->task_ctx == ctx) { | ||
| 1127 | ctx->is_active = 0; | ||
| 1128 | cpuctx->task_ctx = NULL; | ||
| 1129 | } | ||
| 1108 | raw_spin_unlock(&ctx->lock); | 1130 | raw_spin_unlock(&ctx->lock); |
| 1109 | 1131 | ||
| 1110 | return 0; | 1132 | return 0; |
| @@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event, | |||
| 1454 | event->tstamp_stopped = tstamp; | 1476 | event->tstamp_stopped = tstamp; |
| 1455 | } | 1477 | } |
| 1456 | 1478 | ||
| 1457 | static void perf_event_context_sched_in(struct perf_event_context *ctx, | 1479 | static void task_ctx_sched_out(struct perf_event_context *ctx); |
| 1458 | struct task_struct *tsk); | 1480 | static void |
| 1481 | ctx_sched_in(struct perf_event_context *ctx, | ||
| 1482 | struct perf_cpu_context *cpuctx, | ||
| 1483 | enum event_type_t event_type, | ||
| 1484 | struct task_struct *task); | ||
| 1485 | |||
| 1486 | static void perf_event_sched_in(struct perf_cpu_context *cpuctx, | ||
| 1487 | struct perf_event_context *ctx, | ||
| 1488 | struct task_struct *task) | ||
| 1489 | { | ||
| 1490 | cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); | ||
| 1491 | if (ctx) | ||
| 1492 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); | ||
| 1493 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); | ||
| 1494 | if (ctx) | ||
| 1495 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); | ||
| 1496 | } | ||
| 1459 | 1497 | ||
| 1460 | /* | 1498 | /* |
| 1461 | * Cross CPU call to install and enable a performance event | 1499 | * Cross CPU call to install and enable a performance event |
| @@ -1466,20 +1504,37 @@ static int __perf_install_in_context(void *info) | |||
| 1466 | { | 1504 | { |
| 1467 | struct perf_event *event = info; | 1505 | struct perf_event *event = info; |
| 1468 | struct perf_event_context *ctx = event->ctx; | 1506 | struct perf_event_context *ctx = event->ctx; |
| 1469 | struct perf_event *leader = event->group_leader; | ||
| 1470 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1507 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
| 1471 | int err; | 1508 | struct perf_event_context *task_ctx = cpuctx->task_ctx; |
| 1509 | struct task_struct *task = current; | ||
| 1510 | |||
| 1511 | perf_ctx_lock(cpuctx, task_ctx); | ||
| 1512 | perf_pmu_disable(cpuctx->ctx.pmu); | ||
| 1472 | 1513 | ||
| 1473 | /* | 1514 | /* |
| 1474 | * In case we're installing a new context to an already running task, | 1515 | * If there was an active task_ctx schedule it out. |
| 1475 | * could also happen before perf_event_task_sched_in() on architectures | ||
| 1476 | * which do context switches with IRQs enabled. | ||
| 1477 | */ | 1516 | */ |
| 1478 | if (ctx->task && !cpuctx->task_ctx) | 1517 | if (task_ctx) |
| 1479 | perf_event_context_sched_in(ctx, ctx->task); | 1518 | task_ctx_sched_out(task_ctx); |
| 1519 | |||
| 1520 | /* | ||
| 1521 | * If the context we're installing events in is not the | ||
| 1522 | * active task_ctx, flip them. | ||
| 1523 | */ | ||
| 1524 | if (ctx->task && task_ctx != ctx) { | ||
| 1525 | if (task_ctx) | ||
| 1526 | raw_spin_unlock(&task_ctx->lock); | ||
| 1527 | raw_spin_lock(&ctx->lock); | ||
| 1528 | task_ctx = ctx; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | if (task_ctx) { | ||
| 1532 | cpuctx->task_ctx = task_ctx; | ||
| 1533 | task = task_ctx->task; | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | cpu_ctx_sched_out(cpuctx, EVENT_ALL); | ||
| 1480 | 1537 | ||
| 1481 | raw_spin_lock(&ctx->lock); | ||
| 1482 | ctx->is_active = 1; | ||
| 1483 | update_context_time(ctx); | 1538 | update_context_time(ctx); |
| 1484 | /* | 1539 | /* |
| 1485 | * update cgrp time only if current cgrp | 1540 | * update cgrp time only if current cgrp |
| @@ -1490,43 +1545,13 @@ static int __perf_install_in_context(void *info) | |||
| 1490 | 1545 | ||
| 1491 | add_event_to_ctx(event, ctx); | 1546 | add_event_to_ctx(event, ctx); |
| 1492 | 1547 | ||
| 1493 | if (!event_filter_match(event)) | ||
| 1494 | goto unlock; | ||
| 1495 | |||
| 1496 | /* | ||
| 1497 | * Don't put the event on if it is disabled or if | ||
| 1498 | * it is in a group and the group isn't on. | ||
| 1499 | */ | ||
| 1500 | if (event->state != PERF_EVENT_STATE_INACTIVE || | ||
| 1501 | (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) | ||
| 1502 | goto unlock; | ||
| 1503 | |||
| 1504 | /* | 1548 | /* |
| 1505 | * An exclusive event can't go on if there are already active | 1549 | * Schedule everything back in |
| 1506 | * hardware events, and no hardware event can go on if there | ||
| 1507 | * is already an exclusive event on. | ||
| 1508 | */ | 1550 | */ |
| 1509 | if (!group_can_go_on(event, cpuctx, 1)) | 1551 | perf_event_sched_in(cpuctx, task_ctx, task); |
| 1510 | err = -EEXIST; | ||
| 1511 | else | ||
| 1512 | err = event_sched_in(event, cpuctx, ctx); | ||
| 1513 | |||
| 1514 | if (err) { | ||
| 1515 | /* | ||
| 1516 | * This event couldn't go on. If it is in a group | ||
| 1517 | * then we have to pull the whole group off. | ||
| 1518 | * If the event group is pinned then put it in error state. | ||
| 1519 | */ | ||
| 1520 | if (leader != event) | ||
| 1521 | group_sched_out(leader, cpuctx, ctx); | ||
| 1522 | if (leader->attr.pinned) { | ||
| 1523 | update_group_times(leader); | ||
| 1524 | leader->state = PERF_EVENT_STATE_ERROR; | ||
| 1525 | } | ||
| 1526 | } | ||
| 1527 | 1552 | ||
| 1528 | unlock: | 1553 | perf_pmu_enable(cpuctx->ctx.pmu); |
| 1529 | raw_spin_unlock(&ctx->lock); | 1554 | perf_ctx_unlock(cpuctx, task_ctx); |
| 1530 | 1555 | ||
| 1531 | return 0; | 1556 | return 0; |
| 1532 | } | 1557 | } |
| @@ -1739,7 +1764,7 @@ out: | |||
| 1739 | raw_spin_unlock_irq(&ctx->lock); | 1764 | raw_spin_unlock_irq(&ctx->lock); |
| 1740 | } | 1765 | } |
| 1741 | 1766 | ||
| 1742 | static int perf_event_refresh(struct perf_event *event, int refresh) | 1767 | int perf_event_refresh(struct perf_event *event, int refresh) |
| 1743 | { | 1768 | { |
| 1744 | /* | 1769 | /* |
| 1745 | * not supported on inherited events | 1770 | * not supported on inherited events |
| @@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
| 1752 | 1777 | ||
| 1753 | return 0; | 1778 | return 0; |
| 1754 | } | 1779 | } |
| 1780 | EXPORT_SYMBOL_GPL(perf_event_refresh); | ||
| 1755 | 1781 | ||
| 1756 | static void ctx_sched_out(struct perf_event_context *ctx, | 1782 | static void ctx_sched_out(struct perf_event_context *ctx, |
| 1757 | struct perf_cpu_context *cpuctx, | 1783 | struct perf_cpu_context *cpuctx, |
| 1758 | enum event_type_t event_type) | 1784 | enum event_type_t event_type) |
| 1759 | { | 1785 | { |
| 1760 | struct perf_event *event; | 1786 | struct perf_event *event; |
| 1787 | int is_active = ctx->is_active; | ||
| 1761 | 1788 | ||
| 1762 | raw_spin_lock(&ctx->lock); | 1789 | ctx->is_active &= ~event_type; |
| 1763 | perf_pmu_disable(ctx->pmu); | ||
| 1764 | ctx->is_active = 0; | ||
| 1765 | if (likely(!ctx->nr_events)) | 1790 | if (likely(!ctx->nr_events)) |
| 1766 | goto out; | 1791 | return; |
| 1792 | |||
| 1767 | update_context_time(ctx); | 1793 | update_context_time(ctx); |
| 1768 | update_cgrp_time_from_cpuctx(cpuctx); | 1794 | update_cgrp_time_from_cpuctx(cpuctx); |
| 1769 | |||
| 1770 | if (!ctx->nr_active) | 1795 | if (!ctx->nr_active) |
| 1771 | goto out; | 1796 | return; |
| 1772 | 1797 | ||
| 1773 | if (event_type & EVENT_PINNED) { | 1798 | perf_pmu_disable(ctx->pmu); |
| 1799 | if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) { | ||
| 1774 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 1800 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) |
| 1775 | group_sched_out(event, cpuctx, ctx); | 1801 | group_sched_out(event, cpuctx, ctx); |
| 1776 | } | 1802 | } |
| 1777 | 1803 | ||
| 1778 | if (event_type & EVENT_FLEXIBLE) { | 1804 | if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) { |
| 1779 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | 1805 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) |
| 1780 | group_sched_out(event, cpuctx, ctx); | 1806 | group_sched_out(event, cpuctx, ctx); |
| 1781 | } | 1807 | } |
| 1782 | out: | ||
| 1783 | perf_pmu_enable(ctx->pmu); | 1808 | perf_pmu_enable(ctx->pmu); |
| 1784 | raw_spin_unlock(&ctx->lock); | ||
| 1785 | } | 1809 | } |
| 1786 | 1810 | ||
| 1787 | /* | 1811 | /* |
| @@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
| 1929 | rcu_read_unlock(); | 1953 | rcu_read_unlock(); |
| 1930 | 1954 | ||
| 1931 | if (do_switch) { | 1955 | if (do_switch) { |
| 1956 | raw_spin_lock(&ctx->lock); | ||
| 1932 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); | 1957 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
| 1933 | cpuctx->task_ctx = NULL; | 1958 | cpuctx->task_ctx = NULL; |
| 1959 | raw_spin_unlock(&ctx->lock); | ||
| 1934 | } | 1960 | } |
| 1935 | } | 1961 | } |
| 1936 | 1962 | ||
| @@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
| 1965 | perf_cgroup_sched_out(task); | 1991 | perf_cgroup_sched_out(task); |
| 1966 | } | 1992 | } |
| 1967 | 1993 | ||
| 1968 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1994 | static void task_ctx_sched_out(struct perf_event_context *ctx) |
| 1969 | enum event_type_t event_type) | ||
| 1970 | { | 1995 | { |
| 1971 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1996 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
| 1972 | 1997 | ||
| @@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, | |||
| 1976 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 2001 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
| 1977 | return; | 2002 | return; |
| 1978 | 2003 | ||
| 1979 | ctx_sched_out(ctx, cpuctx, event_type); | 2004 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
| 1980 | cpuctx->task_ctx = NULL; | 2005 | cpuctx->task_ctx = NULL; |
| 1981 | } | 2006 | } |
| 1982 | 2007 | ||
| @@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
| 2055 | struct task_struct *task) | 2080 | struct task_struct *task) |
| 2056 | { | 2081 | { |
| 2057 | u64 now; | 2082 | u64 now; |
| 2083 | int is_active = ctx->is_active; | ||
| 2058 | 2084 | ||
| 2059 | raw_spin_lock(&ctx->lock); | 2085 | ctx->is_active |= event_type; |
| 2060 | ctx->is_active = 1; | ||
| 2061 | if (likely(!ctx->nr_events)) | 2086 | if (likely(!ctx->nr_events)) |
| 2062 | goto out; | 2087 | return; |
| 2063 | 2088 | ||
| 2064 | now = perf_clock(); | 2089 | now = perf_clock(); |
| 2065 | ctx->timestamp = now; | 2090 | ctx->timestamp = now; |
| @@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
| 2068 | * First go through the list and put on any pinned groups | 2093 | * First go through the list and put on any pinned groups |
| 2069 | * in order to give them the best chance of going on. | 2094 | * in order to give them the best chance of going on. |
| 2070 | */ | 2095 | */ |
| 2071 | if (event_type & EVENT_PINNED) | 2096 | if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) |
| 2072 | ctx_pinned_sched_in(ctx, cpuctx); | 2097 | ctx_pinned_sched_in(ctx, cpuctx); |
| 2073 | 2098 | ||
| 2074 | /* Then walk through the lower prio flexible groups */ | 2099 | /* Then walk through the lower prio flexible groups */ |
| 2075 | if (event_type & EVENT_FLEXIBLE) | 2100 | if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) |
| 2076 | ctx_flexible_sched_in(ctx, cpuctx); | 2101 | ctx_flexible_sched_in(ctx, cpuctx); |
| 2077 | |||
| 2078 | out: | ||
| 2079 | raw_spin_unlock(&ctx->lock); | ||
| 2080 | } | 2102 | } |
| 2081 | 2103 | ||
| 2082 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | 2104 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, |
| @@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
| 2088 | ctx_sched_in(ctx, cpuctx, event_type, task); | 2110 | ctx_sched_in(ctx, cpuctx, event_type, task); |
| 2089 | } | 2111 | } |
| 2090 | 2112 | ||
| 2091 | static void task_ctx_sched_in(struct perf_event_context *ctx, | ||
| 2092 | enum event_type_t event_type) | ||
| 2093 | { | ||
| 2094 | struct perf_cpu_context *cpuctx; | ||
| 2095 | |||
| 2096 | cpuctx = __get_cpu_context(ctx); | ||
| 2097 | if (cpuctx->task_ctx == ctx) | ||
| 2098 | return; | ||
| 2099 | |||
| 2100 | ctx_sched_in(ctx, cpuctx, event_type, NULL); | ||
| 2101 | cpuctx->task_ctx = ctx; | ||
| 2102 | } | ||
| 2103 | |||
| 2104 | static void perf_event_context_sched_in(struct perf_event_context *ctx, | 2113 | static void perf_event_context_sched_in(struct perf_event_context *ctx, |
| 2105 | struct task_struct *task) | 2114 | struct task_struct *task) |
| 2106 | { | 2115 | { |
| @@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2110 | if (cpuctx->task_ctx == ctx) | 2119 | if (cpuctx->task_ctx == ctx) |
| 2111 | return; | 2120 | return; |
| 2112 | 2121 | ||
| 2122 | perf_ctx_lock(cpuctx, ctx); | ||
| 2113 | perf_pmu_disable(ctx->pmu); | 2123 | perf_pmu_disable(ctx->pmu); |
| 2114 | /* | 2124 | /* |
| 2115 | * We want to keep the following priority order: | 2125 | * We want to keep the following priority order: |
| @@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2118 | */ | 2128 | */ |
| 2119 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2129 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
| 2120 | 2130 | ||
| 2121 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); | 2131 | perf_event_sched_in(cpuctx, ctx, task); |
| 2122 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); | ||
| 2123 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); | ||
| 2124 | 2132 | ||
| 2125 | cpuctx->task_ctx = ctx; | 2133 | cpuctx->task_ctx = ctx; |
| 2126 | 2134 | ||
| 2135 | perf_pmu_enable(ctx->pmu); | ||
| 2136 | perf_ctx_unlock(cpuctx, ctx); | ||
| 2137 | |||
| 2127 | /* | 2138 | /* |
| 2128 | * Since these rotations are per-cpu, we need to ensure the | 2139 | * Since these rotations are per-cpu, we need to ensure the |
| 2129 | * cpu-context we got scheduled on is actually rotating. | 2140 | * cpu-context we got scheduled on is actually rotating. |
| 2130 | */ | 2141 | */ |
| 2131 | perf_pmu_rotate_start(ctx->pmu); | 2142 | perf_pmu_rotate_start(ctx->pmu); |
| 2132 | perf_pmu_enable(ctx->pmu); | ||
| 2133 | } | 2143 | } |
| 2134 | 2144 | ||
| 2135 | /* | 2145 | /* |
| @@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
| 2269 | u64 interrupts, now; | 2279 | u64 interrupts, now; |
| 2270 | s64 delta; | 2280 | s64 delta; |
| 2271 | 2281 | ||
| 2272 | raw_spin_lock(&ctx->lock); | ||
| 2273 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 2282 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
| 2274 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 2283 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 2275 | continue; | 2284 | continue; |
| @@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
| 2301 | if (delta > 0) | 2310 | if (delta > 0) |
| 2302 | perf_adjust_period(event, period, delta); | 2311 | perf_adjust_period(event, period, delta); |
| 2303 | } | 2312 | } |
| 2304 | raw_spin_unlock(&ctx->lock); | ||
| 2305 | } | 2313 | } |
| 2306 | 2314 | ||
| 2307 | /* | 2315 | /* |
| @@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
| 2309 | */ | 2317 | */ |
| 2310 | static void rotate_ctx(struct perf_event_context *ctx) | 2318 | static void rotate_ctx(struct perf_event_context *ctx) |
| 2311 | { | 2319 | { |
| 2312 | raw_spin_lock(&ctx->lock); | ||
| 2313 | |||
| 2314 | /* | 2320 | /* |
| 2315 | * Rotate the first entry last of non-pinned groups. Rotation might be | 2321 | * Rotate the first entry last of non-pinned groups. Rotation might be |
| 2316 | * disabled by the inheritance code. | 2322 | * disabled by the inheritance code. |
| 2317 | */ | 2323 | */ |
| 2318 | if (!ctx->rotate_disable) | 2324 | if (!ctx->rotate_disable) |
| 2319 | list_rotate_left(&ctx->flexible_groups); | 2325 | list_rotate_left(&ctx->flexible_groups); |
| 2320 | |||
| 2321 | raw_spin_unlock(&ctx->lock); | ||
| 2322 | } | 2326 | } |
| 2323 | 2327 | ||
| 2324 | /* | 2328 | /* |
| @@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
| 2345 | rotate = 1; | 2349 | rotate = 1; |
| 2346 | } | 2350 | } |
| 2347 | 2351 | ||
| 2352 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
| 2348 | perf_pmu_disable(cpuctx->ctx.pmu); | 2353 | perf_pmu_disable(cpuctx->ctx.pmu); |
| 2349 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); | 2354 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); |
| 2350 | if (ctx) | 2355 | if (ctx) |
| @@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
| 2355 | 2360 | ||
| 2356 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 2361 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
| 2357 | if (ctx) | 2362 | if (ctx) |
| 2358 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); | 2363 | ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); |
| 2359 | 2364 | ||
| 2360 | rotate_ctx(&cpuctx->ctx); | 2365 | rotate_ctx(&cpuctx->ctx); |
| 2361 | if (ctx) | 2366 | if (ctx) |
| 2362 | rotate_ctx(ctx); | 2367 | rotate_ctx(ctx); |
| 2363 | 2368 | ||
| 2364 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); | 2369 | perf_event_sched_in(cpuctx, ctx, current); |
| 2365 | if (ctx) | ||
| 2366 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); | ||
| 2367 | 2370 | ||
| 2368 | done: | 2371 | done: |
| 2369 | if (remove) | 2372 | if (remove) |
| 2370 | list_del_init(&cpuctx->rotation_list); | 2373 | list_del_init(&cpuctx->rotation_list); |
| 2371 | 2374 | ||
| 2372 | perf_pmu_enable(cpuctx->ctx.pmu); | 2375 | perf_pmu_enable(cpuctx->ctx.pmu); |
| 2376 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
| 2373 | } | 2377 | } |
| 2374 | 2378 | ||
| 2375 | void perf_event_task_tick(void) | 2379 | void perf_event_task_tick(void) |
| @@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
| 2424 | * in. | 2428 | * in. |
| 2425 | */ | 2429 | */ |
| 2426 | perf_cgroup_sched_out(current); | 2430 | perf_cgroup_sched_out(current); |
| 2427 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
| 2428 | 2431 | ||
| 2429 | raw_spin_lock(&ctx->lock); | 2432 | raw_spin_lock(&ctx->lock); |
| 2433 | task_ctx_sched_out(ctx); | ||
| 2430 | 2434 | ||
| 2431 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 2435 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
| 2432 | ret = event_enable_on_exec(event, ctx); | 2436 | ret = event_enable_on_exec(event, ctx); |
| @@ -2835,16 +2839,12 @@ retry: | |||
| 2835 | unclone_ctx(ctx); | 2839 | unclone_ctx(ctx); |
| 2836 | ++ctx->pin_count; | 2840 | ++ctx->pin_count; |
| 2837 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2841 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
| 2838 | } | 2842 | } else { |
| 2839 | |||
| 2840 | if (!ctx) { | ||
| 2841 | ctx = alloc_perf_context(pmu, task); | 2843 | ctx = alloc_perf_context(pmu, task); |
| 2842 | err = -ENOMEM; | 2844 | err = -ENOMEM; |
| 2843 | if (!ctx) | 2845 | if (!ctx) |
| 2844 | goto errout; | 2846 | goto errout; |
| 2845 | 2847 | ||
| 2846 | get_ctx(ctx); | ||
| 2847 | |||
| 2848 | err = 0; | 2848 | err = 0; |
| 2849 | mutex_lock(&task->perf_event_mutex); | 2849 | mutex_lock(&task->perf_event_mutex); |
| 2850 | /* | 2850 | /* |
| @@ -2856,14 +2856,14 @@ retry: | |||
| 2856 | else if (task->perf_event_ctxp[ctxn]) | 2856 | else if (task->perf_event_ctxp[ctxn]) |
| 2857 | err = -EAGAIN; | 2857 | err = -EAGAIN; |
| 2858 | else { | 2858 | else { |
| 2859 | get_ctx(ctx); | ||
| 2859 | ++ctx->pin_count; | 2860 | ++ctx->pin_count; |
| 2860 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | 2861 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); |
| 2861 | } | 2862 | } |
| 2862 | mutex_unlock(&task->perf_event_mutex); | 2863 | mutex_unlock(&task->perf_event_mutex); |
| 2863 | 2864 | ||
| 2864 | if (unlikely(err)) { | 2865 | if (unlikely(err)) { |
| 2865 | put_task_struct(task); | 2866 | put_ctx(ctx); |
| 2866 | kfree(ctx); | ||
| 2867 | 2867 | ||
| 2868 | if (err == -EAGAIN) | 2868 | if (err == -EAGAIN) |
| 2869 | goto retry; | 2869 | goto retry; |
| @@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
| 2890 | kfree(event); | 2890 | kfree(event); |
| 2891 | } | 2891 | } |
| 2892 | 2892 | ||
| 2893 | static void perf_buffer_put(struct perf_buffer *buffer); | 2893 | static void ring_buffer_put(struct ring_buffer *rb); |
| 2894 | 2894 | ||
| 2895 | static void free_event(struct perf_event *event) | 2895 | static void free_event(struct perf_event *event) |
| 2896 | { | 2896 | { |
| @@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event) | |||
| 2913 | } | 2913 | } |
| 2914 | } | 2914 | } |
| 2915 | 2915 | ||
| 2916 | if (event->buffer) { | 2916 | if (event->rb) { |
| 2917 | perf_buffer_put(event->buffer); | 2917 | ring_buffer_put(event->rb); |
| 2918 | event->buffer = NULL; | 2918 | event->rb = NULL; |
| 2919 | } | 2919 | } |
| 2920 | 2920 | ||
| 2921 | if (is_cgroup_event(event)) | 2921 | if (is_cgroup_event(event)) |
| @@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
| 2934 | { | 2934 | { |
| 2935 | struct perf_event_context *ctx = event->ctx; | 2935 | struct perf_event_context *ctx = event->ctx; |
| 2936 | 2936 | ||
| 2937 | /* | ||
| 2938 | * Remove from the PMU, can't get re-enabled since we got | ||
| 2939 | * here because the last ref went. | ||
| 2940 | */ | ||
| 2941 | perf_event_disable(event); | ||
| 2942 | |||
| 2943 | WARN_ON_ONCE(ctx->parent_ctx); | 2937 | WARN_ON_ONCE(ctx->parent_ctx); |
| 2944 | /* | 2938 | /* |
| 2945 | * There are two ways this annotation is useful: | 2939 | * There are two ways this annotation is useful: |
| @@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event) | |||
| 2956 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 2950 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); |
| 2957 | raw_spin_lock_irq(&ctx->lock); | 2951 | raw_spin_lock_irq(&ctx->lock); |
| 2958 | perf_group_detach(event); | 2952 | perf_group_detach(event); |
| 2959 | list_del_event(event, ctx); | ||
| 2960 | raw_spin_unlock_irq(&ctx->lock); | 2953 | raw_spin_unlock_irq(&ctx->lock); |
| 2954 | perf_remove_from_context(event); | ||
| 2961 | mutex_unlock(&ctx->mutex); | 2955 | mutex_unlock(&ctx->mutex); |
| 2962 | 2956 | ||
| 2963 | free_event(event); | 2957 | free_event(event); |
| @@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
| 3149 | static unsigned int perf_poll(struct file *file, poll_table *wait) | 3143 | static unsigned int perf_poll(struct file *file, poll_table *wait) |
| 3150 | { | 3144 | { |
| 3151 | struct perf_event *event = file->private_data; | 3145 | struct perf_event *event = file->private_data; |
| 3152 | struct perf_buffer *buffer; | 3146 | struct ring_buffer *rb; |
| 3153 | unsigned int events = POLL_HUP; | 3147 | unsigned int events = POLL_HUP; |
| 3154 | 3148 | ||
| 3155 | rcu_read_lock(); | 3149 | rcu_read_lock(); |
| 3156 | buffer = rcu_dereference(event->buffer); | 3150 | rb = rcu_dereference(event->rb); |
| 3157 | if (buffer) | 3151 | if (rb) |
| 3158 | events = atomic_xchg(&buffer->poll, 0); | 3152 | events = atomic_xchg(&rb->poll, 0); |
| 3159 | rcu_read_unlock(); | 3153 | rcu_read_unlock(); |
| 3160 | 3154 | ||
| 3161 | poll_wait(file, &event->waitq, wait); | 3155 | poll_wait(file, &event->waitq, wait); |
| @@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event) | |||
| 3358 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; | 3352 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; |
| 3359 | } | 3353 | } |
| 3360 | 3354 | ||
| 3355 | static void calc_timer_values(struct perf_event *event, | ||
| 3356 | u64 *running, | ||
| 3357 | u64 *enabled) | ||
| 3358 | { | ||
| 3359 | u64 now, ctx_time; | ||
| 3360 | |||
| 3361 | now = perf_clock(); | ||
| 3362 | ctx_time = event->shadow_ctx_time + now; | ||
| 3363 | *enabled = ctx_time - event->tstamp_enabled; | ||
| 3364 | *running = ctx_time - event->tstamp_running; | ||
| 3365 | } | ||
| 3366 | |||
| 3361 | /* | 3367 | /* |
| 3362 | * Callers need to ensure there can be no nesting of this function, otherwise | 3368 | * Callers need to ensure there can be no nesting of this function, otherwise |
| 3363 | * the seqlock logic goes bad. We can not serialize this because the arch | 3369 | * the seqlock logic goes bad. We can not serialize this because the arch |
| @@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event) | |||
| 3366 | void perf_event_update_userpage(struct perf_event *event) | 3372 | void perf_event_update_userpage(struct perf_event *event) |
| 3367 | { | 3373 | { |
| 3368 | struct perf_event_mmap_page *userpg; | 3374 | struct perf_event_mmap_page *userpg; |
| 3369 | struct perf_buffer *buffer; | 3375 | struct ring_buffer *rb; |
| 3376 | u64 enabled, running; | ||
| 3370 | 3377 | ||
| 3371 | rcu_read_lock(); | 3378 | rcu_read_lock(); |
| 3372 | buffer = rcu_dereference(event->buffer); | 3379 | /* |
| 3373 | if (!buffer) | 3380 | * compute total_time_enabled, total_time_running |
| 3381 | * based on snapshot values taken when the event | ||
| 3382 | * was last scheduled in. | ||
| 3383 | * | ||
| 3384 | * we cannot simply called update_context_time() | ||
| 3385 | * because of locking issue as we can be called in | ||
| 3386 | * NMI context | ||
| 3387 | */ | ||
| 3388 | calc_timer_values(event, &enabled, &running); | ||
| 3389 | rb = rcu_dereference(event->rb); | ||
| 3390 | if (!rb) | ||
| 3374 | goto unlock; | 3391 | goto unlock; |
| 3375 | 3392 | ||
| 3376 | userpg = buffer->user_page; | 3393 | userpg = rb->user_page; |
| 3377 | 3394 | ||
| 3378 | /* | 3395 | /* |
| 3379 | * Disable preemption so as to not let the corresponding user-space | 3396 | * Disable preemption so as to not let the corresponding user-space |
| @@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3387 | if (event->state == PERF_EVENT_STATE_ACTIVE) | 3404 | if (event->state == PERF_EVENT_STATE_ACTIVE) |
| 3388 | userpg->offset -= local64_read(&event->hw.prev_count); | 3405 | userpg->offset -= local64_read(&event->hw.prev_count); |
| 3389 | 3406 | ||
| 3390 | userpg->time_enabled = event->total_time_enabled + | 3407 | userpg->time_enabled = enabled + |
| 3391 | atomic64_read(&event->child_total_time_enabled); | 3408 | atomic64_read(&event->child_total_time_enabled); |
| 3392 | 3409 | ||
| 3393 | userpg->time_running = event->total_time_running + | 3410 | userpg->time_running = running + |
| 3394 | atomic64_read(&event->child_total_time_running); | 3411 | atomic64_read(&event->child_total_time_running); |
| 3395 | 3412 | ||
| 3396 | barrier(); | 3413 | barrier(); |
| @@ -3400,220 +3417,10 @@ unlock: | |||
| 3400 | rcu_read_unlock(); | 3417 | rcu_read_unlock(); |
| 3401 | } | 3418 | } |
| 3402 | 3419 | ||
| 3403 | static unsigned long perf_data_size(struct perf_buffer *buffer); | ||
| 3404 | |||
| 3405 | static void | ||
| 3406 | perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags) | ||
| 3407 | { | ||
| 3408 | long max_size = perf_data_size(buffer); | ||
| 3409 | |||
| 3410 | if (watermark) | ||
| 3411 | buffer->watermark = min(max_size, watermark); | ||
| 3412 | |||
| 3413 | if (!buffer->watermark) | ||
| 3414 | buffer->watermark = max_size / 2; | ||
| 3415 | |||
| 3416 | if (flags & PERF_BUFFER_WRITABLE) | ||
| 3417 | buffer->writable = 1; | ||
| 3418 | |||
| 3419 | atomic_set(&buffer->refcount, 1); | ||
| 3420 | } | ||
| 3421 | |||
| 3422 | #ifndef CONFIG_PERF_USE_VMALLOC | ||
| 3423 | |||
| 3424 | /* | ||
| 3425 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. | ||
| 3426 | */ | ||
| 3427 | |||
| 3428 | static struct page * | ||
| 3429 | perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) | ||
| 3430 | { | ||
| 3431 | if (pgoff > buffer->nr_pages) | ||
| 3432 | return NULL; | ||
| 3433 | |||
| 3434 | if (pgoff == 0) | ||
| 3435 | return virt_to_page(buffer->user_page); | ||
| 3436 | |||
| 3437 | return virt_to_page(buffer->data_pages[pgoff - 1]); | ||
| 3438 | } | ||
| 3439 | |||
| 3440 | static void *perf_mmap_alloc_page(int cpu) | ||
| 3441 | { | ||
| 3442 | struct page *page; | ||
| 3443 | int node; | ||
| 3444 | |||
| 3445 | node = (cpu == -1) ? cpu : cpu_to_node(cpu); | ||
| 3446 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | ||
| 3447 | if (!page) | ||
| 3448 | return NULL; | ||
| 3449 | |||
| 3450 | return page_address(page); | ||
| 3451 | } | ||
| 3452 | |||
| 3453 | static struct perf_buffer * | ||
| 3454 | perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
| 3455 | { | ||
| 3456 | struct perf_buffer *buffer; | ||
| 3457 | unsigned long size; | ||
| 3458 | int i; | ||
| 3459 | |||
| 3460 | size = sizeof(struct perf_buffer); | ||
| 3461 | size += nr_pages * sizeof(void *); | ||
| 3462 | |||
| 3463 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 3464 | if (!buffer) | ||
| 3465 | goto fail; | ||
| 3466 | |||
| 3467 | buffer->user_page = perf_mmap_alloc_page(cpu); | ||
| 3468 | if (!buffer->user_page) | ||
| 3469 | goto fail_user_page; | ||
| 3470 | |||
| 3471 | for (i = 0; i < nr_pages; i++) { | ||
| 3472 | buffer->data_pages[i] = perf_mmap_alloc_page(cpu); | ||
| 3473 | if (!buffer->data_pages[i]) | ||
| 3474 | goto fail_data_pages; | ||
| 3475 | } | ||
| 3476 | |||
| 3477 | buffer->nr_pages = nr_pages; | ||
| 3478 | |||
| 3479 | perf_buffer_init(buffer, watermark, flags); | ||
| 3480 | |||
| 3481 | return buffer; | ||
| 3482 | |||
| 3483 | fail_data_pages: | ||
| 3484 | for (i--; i >= 0; i--) | ||
| 3485 | free_page((unsigned long)buffer->data_pages[i]); | ||
| 3486 | |||
| 3487 | free_page((unsigned long)buffer->user_page); | ||
| 3488 | |||
| 3489 | fail_user_page: | ||
| 3490 | kfree(buffer); | ||
| 3491 | |||
| 3492 | fail: | ||
| 3493 | return NULL; | ||
| 3494 | } | ||
| 3495 | |||
| 3496 | static void perf_mmap_free_page(unsigned long addr) | ||
| 3497 | { | ||
| 3498 | struct page *page = virt_to_page((void *)addr); | ||
| 3499 | |||
| 3500 | page->mapping = NULL; | ||
| 3501 | __free_page(page); | ||
| 3502 | } | ||
| 3503 | |||
| 3504 | static void perf_buffer_free(struct perf_buffer *buffer) | ||
| 3505 | { | ||
| 3506 | int i; | ||
| 3507 | |||
| 3508 | perf_mmap_free_page((unsigned long)buffer->user_page); | ||
| 3509 | for (i = 0; i < buffer->nr_pages; i++) | ||
| 3510 | perf_mmap_free_page((unsigned long)buffer->data_pages[i]); | ||
| 3511 | kfree(buffer); | ||
| 3512 | } | ||
| 3513 | |||
| 3514 | static inline int page_order(struct perf_buffer *buffer) | ||
| 3515 | { | ||
| 3516 | return 0; | ||
| 3517 | } | ||
| 3518 | |||
| 3519 | #else | ||
| 3520 | |||
| 3521 | /* | ||
| 3522 | * Back perf_mmap() with vmalloc memory. | ||
| 3523 | * | ||
| 3524 | * Required for architectures that have d-cache aliasing issues. | ||
| 3525 | */ | ||
| 3526 | |||
| 3527 | static inline int page_order(struct perf_buffer *buffer) | ||
| 3528 | { | ||
| 3529 | return buffer->page_order; | ||
| 3530 | } | ||
| 3531 | |||
| 3532 | static struct page * | ||
| 3533 | perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) | ||
| 3534 | { | ||
| 3535 | if (pgoff > (1UL << page_order(buffer))) | ||
| 3536 | return NULL; | ||
| 3537 | |||
| 3538 | return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE); | ||
| 3539 | } | ||
| 3540 | |||
| 3541 | static void perf_mmap_unmark_page(void *addr) | ||
| 3542 | { | ||
| 3543 | struct page *page = vmalloc_to_page(addr); | ||
| 3544 | |||
| 3545 | page->mapping = NULL; | ||
| 3546 | } | ||
| 3547 | |||
| 3548 | static void perf_buffer_free_work(struct work_struct *work) | ||
| 3549 | { | ||
| 3550 | struct perf_buffer *buffer; | ||
| 3551 | void *base; | ||
| 3552 | int i, nr; | ||
| 3553 | |||
| 3554 | buffer = container_of(work, struct perf_buffer, work); | ||
| 3555 | nr = 1 << page_order(buffer); | ||
| 3556 | |||
| 3557 | base = buffer->user_page; | ||
| 3558 | for (i = 0; i < nr + 1; i++) | ||
| 3559 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
| 3560 | |||
| 3561 | vfree(base); | ||
| 3562 | kfree(buffer); | ||
| 3563 | } | ||
| 3564 | |||
| 3565 | static void perf_buffer_free(struct perf_buffer *buffer) | ||
| 3566 | { | ||
| 3567 | schedule_work(&buffer->work); | ||
| 3568 | } | ||
| 3569 | |||
| 3570 | static struct perf_buffer * | ||
| 3571 | perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
| 3572 | { | ||
| 3573 | struct perf_buffer *buffer; | ||
| 3574 | unsigned long size; | ||
| 3575 | void *all_buf; | ||
| 3576 | |||
| 3577 | size = sizeof(struct perf_buffer); | ||
| 3578 | size += sizeof(void *); | ||
| 3579 | |||
| 3580 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 3581 | if (!buffer) | ||
| 3582 | goto fail; | ||
| 3583 | |||
| 3584 | INIT_WORK(&buffer->work, perf_buffer_free_work); | ||
| 3585 | |||
| 3586 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
| 3587 | if (!all_buf) | ||
| 3588 | goto fail_all_buf; | ||
| 3589 | |||
| 3590 | buffer->user_page = all_buf; | ||
| 3591 | buffer->data_pages[0] = all_buf + PAGE_SIZE; | ||
| 3592 | buffer->page_order = ilog2(nr_pages); | ||
| 3593 | buffer->nr_pages = 1; | ||
| 3594 | |||
| 3595 | perf_buffer_init(buffer, watermark, flags); | ||
| 3596 | |||
| 3597 | return buffer; | ||
| 3598 | |||
| 3599 | fail_all_buf: | ||
| 3600 | kfree(buffer); | ||
| 3601 | |||
| 3602 | fail: | ||
| 3603 | return NULL; | ||
| 3604 | } | ||
| 3605 | |||
| 3606 | #endif | ||
| 3607 | |||
| 3608 | static unsigned long perf_data_size(struct perf_buffer *buffer) | ||
| 3609 | { | ||
| 3610 | return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer)); | ||
| 3611 | } | ||
| 3612 | |||
| 3613 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 3420 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
| 3614 | { | 3421 | { |
| 3615 | struct perf_event *event = vma->vm_file->private_data; | 3422 | struct perf_event *event = vma->vm_file->private_data; |
| 3616 | struct perf_buffer *buffer; | 3423 | struct ring_buffer *rb; |
| 3617 | int ret = VM_FAULT_SIGBUS; | 3424 | int ret = VM_FAULT_SIGBUS; |
| 3618 | 3425 | ||
| 3619 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | 3426 | if (vmf->flags & FAULT_FLAG_MKWRITE) { |
| @@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 3623 | } | 3430 | } |
| 3624 | 3431 | ||
| 3625 | rcu_read_lock(); | 3432 | rcu_read_lock(); |
| 3626 | buffer = rcu_dereference(event->buffer); | 3433 | rb = rcu_dereference(event->rb); |
| 3627 | if (!buffer) | 3434 | if (!rb) |
| 3628 | goto unlock; | 3435 | goto unlock; |
| 3629 | 3436 | ||
| 3630 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) | 3437 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) |
| 3631 | goto unlock; | 3438 | goto unlock; |
| 3632 | 3439 | ||
| 3633 | vmf->page = perf_mmap_to_page(buffer, vmf->pgoff); | 3440 | vmf->page = perf_mmap_to_page(rb, vmf->pgoff); |
| 3634 | if (!vmf->page) | 3441 | if (!vmf->page) |
| 3635 | goto unlock; | 3442 | goto unlock; |
| 3636 | 3443 | ||
| @@ -3645,35 +3452,35 @@ unlock: | |||
| 3645 | return ret; | 3452 | return ret; |
| 3646 | } | 3453 | } |
| 3647 | 3454 | ||
| 3648 | static void perf_buffer_free_rcu(struct rcu_head *rcu_head) | 3455 | static void rb_free_rcu(struct rcu_head *rcu_head) |
| 3649 | { | 3456 | { |
| 3650 | struct perf_buffer *buffer; | 3457 | struct ring_buffer *rb; |
| 3651 | 3458 | ||
| 3652 | buffer = container_of(rcu_head, struct perf_buffer, rcu_head); | 3459 | rb = container_of(rcu_head, struct ring_buffer, rcu_head); |
| 3653 | perf_buffer_free(buffer); | 3460 | rb_free(rb); |
| 3654 | } | 3461 | } |
| 3655 | 3462 | ||
| 3656 | static struct perf_buffer *perf_buffer_get(struct perf_event *event) | 3463 | static struct ring_buffer *ring_buffer_get(struct perf_event *event) |
| 3657 | { | 3464 | { |
| 3658 | struct perf_buffer *buffer; | 3465 | struct ring_buffer *rb; |
| 3659 | 3466 | ||
| 3660 | rcu_read_lock(); | 3467 | rcu_read_lock(); |
| 3661 | buffer = rcu_dereference(event->buffer); | 3468 | rb = rcu_dereference(event->rb); |
| 3662 | if (buffer) { | 3469 | if (rb) { |
| 3663 | if (!atomic_inc_not_zero(&buffer->refcount)) | 3470 | if (!atomic_inc_not_zero(&rb->refcount)) |
| 3664 | buffer = NULL; | 3471 | rb = NULL; |
| 3665 | } | 3472 | } |
| 3666 | rcu_read_unlock(); | 3473 | rcu_read_unlock(); |
| 3667 | 3474 | ||
| 3668 | return buffer; | 3475 | return rb; |
| 3669 | } | 3476 | } |
| 3670 | 3477 | ||
| 3671 | static void perf_buffer_put(struct perf_buffer *buffer) | 3478 | static void ring_buffer_put(struct ring_buffer *rb) |
| 3672 | { | 3479 | { |
| 3673 | if (!atomic_dec_and_test(&buffer->refcount)) | 3480 | if (!atomic_dec_and_test(&rb->refcount)) |
| 3674 | return; | 3481 | return; |
| 3675 | 3482 | ||
| 3676 | call_rcu(&buffer->rcu_head, perf_buffer_free_rcu); | 3483 | call_rcu(&rb->rcu_head, rb_free_rcu); |
| 3677 | } | 3484 | } |
| 3678 | 3485 | ||
| 3679 | static void perf_mmap_open(struct vm_area_struct *vma) | 3486 | static void perf_mmap_open(struct vm_area_struct *vma) |
| @@ -3688,16 +3495,16 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 3688 | struct perf_event *event = vma->vm_file->private_data; | 3495 | struct perf_event *event = vma->vm_file->private_data; |
| 3689 | 3496 | ||
| 3690 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 3497 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
| 3691 | unsigned long size = perf_data_size(event->buffer); | 3498 | unsigned long size = perf_data_size(event->rb); |
| 3692 | struct user_struct *user = event->mmap_user; | 3499 | struct user_struct *user = event->mmap_user; |
| 3693 | struct perf_buffer *buffer = event->buffer; | 3500 | struct ring_buffer *rb = event->rb; |
| 3694 | 3501 | ||
| 3695 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3502 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
| 3696 | vma->vm_mm->locked_vm -= event->mmap_locked; | 3503 | vma->vm_mm->locked_vm -= event->mmap_locked; |
| 3697 | rcu_assign_pointer(event->buffer, NULL); | 3504 | rcu_assign_pointer(event->rb, NULL); |
| 3698 | mutex_unlock(&event->mmap_mutex); | 3505 | mutex_unlock(&event->mmap_mutex); |
| 3699 | 3506 | ||
| 3700 | perf_buffer_put(buffer); | 3507 | ring_buffer_put(rb); |
| 3701 | free_uid(user); | 3508 | free_uid(user); |
| 3702 | } | 3509 | } |
| 3703 | } | 3510 | } |
| @@ -3715,7 +3522,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 3715 | unsigned long user_locked, user_lock_limit; | 3522 | unsigned long user_locked, user_lock_limit; |
| 3716 | struct user_struct *user = current_user(); | 3523 | struct user_struct *user = current_user(); |
| 3717 | unsigned long locked, lock_limit; | 3524 | unsigned long locked, lock_limit; |
| 3718 | struct perf_buffer *buffer; | 3525 | struct ring_buffer *rb; |
| 3719 | unsigned long vma_size; | 3526 | unsigned long vma_size; |
| 3720 | unsigned long nr_pages; | 3527 | unsigned long nr_pages; |
| 3721 | long user_extra, extra; | 3528 | long user_extra, extra; |
| @@ -3724,7 +3531,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 3724 | /* | 3531 | /* |
| 3725 | * Don't allow mmap() of inherited per-task counters. This would | 3532 | * Don't allow mmap() of inherited per-task counters. This would |
| 3726 | * create a performance issue due to all children writing to the | 3533 | * create a performance issue due to all children writing to the |
| 3727 | * same buffer. | 3534 | * same rb. |
| 3728 | */ | 3535 | */ |
| 3729 | if (event->cpu == -1 && event->attr.inherit) | 3536 | if (event->cpu == -1 && event->attr.inherit) |
| 3730 | return -EINVAL; | 3537 | return -EINVAL; |
| @@ -3736,7 +3543,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 3736 | nr_pages = (vma_size / PAGE_SIZE) - 1; | 3543 | nr_pages = (vma_size / PAGE_SIZE) - 1; |
| 3737 | 3544 | ||
| 3738 | /* | 3545 | /* |
| 3739 | * If we have buffer pages ensure they're a power-of-two number, so we | 3546 | * If we have rb pages ensure they're a power-of-two number, so we |
| 3740 | * can do bitmasks instead of modulo. | 3547 | * can do bitmasks instead of modulo. |
| 3741 | */ | 3548 | */ |
| 3742 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) | 3549 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) |
| @@ -3750,9 +3557,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 3750 | 3557 | ||
| 3751 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3558 | WARN_ON_ONCE(event->ctx->parent_ctx); |
| 3752 | mutex_lock(&event->mmap_mutex); | 3559 | mutex_lock(&event->mmap_mutex); |
| 3753 | if (event->buffer) { | 3560 | if (event->rb) { |
| 3754 | if (event->buffer->nr_pages == nr_pages) | 3561 | if (event->rb->nr_pages == nr_pages) |
| 3755 | atomic_inc(&event->buffer->refcount); | 3562 | atomic_inc(&event->rb->refcount); |
| 3756 | else | 3563 | else |
| 3757 | ret = -EINVAL; | 3564 | ret = -EINVAL; |
| 3758 | goto unlock; | 3565 | goto unlock; |
| @@ -3782,18 +3589,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 3782 | goto unlock; | 3589 | goto unlock; |
| 3783 | } | 3590 | } |
| 3784 | 3591 | ||
| 3785 | WARN_ON(event->buffer); | 3592 | WARN_ON(event->rb); |
| 3786 | 3593 | ||
| 3787 | if (vma->vm_flags & VM_WRITE) | 3594 | if (vma->vm_flags & VM_WRITE) |
| 3788 | flags |= PERF_BUFFER_WRITABLE; | 3595 | flags |= RING_BUFFER_WRITABLE; |
| 3596 | |||
| 3597 | rb = rb_alloc(nr_pages, | ||
| 3598 | event->attr.watermark ? event->attr.wakeup_watermark : 0, | ||
| 3599 | event->cpu, flags); | ||
| 3789 | 3600 | ||
| 3790 | buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark, | 3601 | if (!rb) { |
| 3791 | event->cpu, flags); | ||
| 3792 | if (!buffer) { | ||
| 3793 | ret = -ENOMEM; | 3602 | ret = -ENOMEM; |
| 3794 | goto unlock; | 3603 | goto unlock; |
| 3795 | } | 3604 | } |
| 3796 | rcu_assign_pointer(event->buffer, buffer); | 3605 | rcu_assign_pointer(event->rb, rb); |
| 3797 | 3606 | ||
| 3798 | atomic_long_add(user_extra, &user->locked_vm); | 3607 | atomic_long_add(user_extra, &user->locked_vm); |
| 3799 | event->mmap_locked = extra; | 3608 | event->mmap_locked = extra; |
| @@ -3892,117 +3701,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) | |||
| 3892 | } | 3701 | } |
| 3893 | EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); | 3702 | EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); |
| 3894 | 3703 | ||
| 3895 | /* | ||
| 3896 | * Output | ||
| 3897 | */ | ||
| 3898 | static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail, | ||
| 3899 | unsigned long offset, unsigned long head) | ||
| 3900 | { | ||
| 3901 | unsigned long mask; | ||
| 3902 | |||
| 3903 | if (!buffer->writable) | ||
| 3904 | return true; | ||
| 3905 | |||
| 3906 | mask = perf_data_size(buffer) - 1; | ||
| 3907 | |||
| 3908 | offset = (offset - tail) & mask; | ||
| 3909 | head = (head - tail) & mask; | ||
| 3910 | |||
| 3911 | if ((int)(head - offset) < 0) | ||
| 3912 | return false; | ||
| 3913 | |||
| 3914 | return true; | ||
| 3915 | } | ||
| 3916 | |||
| 3917 | static void perf_output_wakeup(struct perf_output_handle *handle) | ||
| 3918 | { | ||
| 3919 | atomic_set(&handle->buffer->poll, POLL_IN); | ||
| 3920 | |||
| 3921 | if (handle->nmi) { | ||
| 3922 | handle->event->pending_wakeup = 1; | ||
| 3923 | irq_work_queue(&handle->event->pending); | ||
| 3924 | } else | ||
| 3925 | perf_event_wakeup(handle->event); | ||
| 3926 | } | ||
| 3927 | |||
| 3928 | /* | ||
| 3929 | * We need to ensure a later event_id doesn't publish a head when a former | ||
| 3930 | * event isn't done writing. However since we need to deal with NMIs we | ||
| 3931 | * cannot fully serialize things. | ||
| 3932 | * | ||
| 3933 | * We only publish the head (and generate a wakeup) when the outer-most | ||
| 3934 | * event completes. | ||
| 3935 | */ | ||
| 3936 | static void perf_output_get_handle(struct perf_output_handle *handle) | ||
| 3937 | { | ||
| 3938 | struct perf_buffer *buffer = handle->buffer; | ||
| 3939 | |||
| 3940 | preempt_disable(); | ||
| 3941 | local_inc(&buffer->nest); | ||
| 3942 | handle->wakeup = local_read(&buffer->wakeup); | ||
| 3943 | } | ||
| 3944 | |||
| 3945 | static void perf_output_put_handle(struct perf_output_handle *handle) | ||
| 3946 | { | ||
| 3947 | struct perf_buffer *buffer = handle->buffer; | ||
| 3948 | unsigned long head; | ||
| 3949 | |||
| 3950 | again: | ||
| 3951 | head = local_read(&buffer->head); | ||
| 3952 | |||
| 3953 | /* | ||
| 3954 | * IRQ/NMI can happen here, which means we can miss a head update. | ||
| 3955 | */ | ||
| 3956 | |||
| 3957 | if (!local_dec_and_test(&buffer->nest)) | ||
| 3958 | goto out; | ||
| 3959 | |||
| 3960 | /* | ||
| 3961 | * Publish the known good head. Rely on the full barrier implied | ||
| 3962 | * by atomic_dec_and_test() order the buffer->head read and this | ||
| 3963 | * write. | ||
| 3964 | */ | ||
| 3965 | buffer->user_page->data_head = head; | ||
| 3966 | |||
| 3967 | /* | ||
| 3968 | * Now check if we missed an update, rely on the (compiler) | ||
| 3969 | * barrier in atomic_dec_and_test() to re-read buffer->head. | ||
| 3970 | */ | ||
| 3971 | if (unlikely(head != local_read(&buffer->head))) { | ||
| 3972 | local_inc(&buffer->nest); | ||
| 3973 | goto again; | ||
| 3974 | } | ||
| 3975 | |||
| 3976 | if (handle->wakeup != local_read(&buffer->wakeup)) | ||
| 3977 | perf_output_wakeup(handle); | ||
| 3978 | |||
| 3979 | out: | ||
| 3980 | preempt_enable(); | ||
| 3981 | } | ||
| 3982 | |||
| 3983 | __always_inline void perf_output_copy(struct perf_output_handle *handle, | ||
| 3984 | const void *buf, unsigned int len) | ||
| 3985 | { | ||
| 3986 | do { | ||
| 3987 | unsigned long size = min_t(unsigned long, handle->size, len); | ||
| 3988 | |||
| 3989 | memcpy(handle->addr, buf, size); | ||
| 3990 | |||
| 3991 | len -= size; | ||
| 3992 | handle->addr += size; | ||
| 3993 | buf += size; | ||
| 3994 | handle->size -= size; | ||
| 3995 | if (!handle->size) { | ||
| 3996 | struct perf_buffer *buffer = handle->buffer; | ||
| 3997 | |||
| 3998 | handle->page++; | ||
| 3999 | handle->page &= buffer->nr_pages - 1; | ||
| 4000 | handle->addr = buffer->data_pages[handle->page]; | ||
| 4001 | handle->size = PAGE_SIZE << page_order(buffer); | ||
| 4002 | } | ||
| 4003 | } while (len); | ||
| 4004 | } | ||
| 4005 | |||
| 4006 | static void __perf_event_header__init_id(struct perf_event_header *header, | 3704 | static void __perf_event_header__init_id(struct perf_event_header *header, |
| 4007 | struct perf_sample_data *data, | 3705 | struct perf_sample_data *data, |
| 4008 | struct perf_event *event) | 3706 | struct perf_event *event) |
| @@ -4033,9 +3731,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header, | |||
| 4033 | } | 3731 | } |
| 4034 | } | 3732 | } |
| 4035 | 3733 | ||
| 4036 | static void perf_event_header__init_id(struct perf_event_header *header, | 3734 | void perf_event_header__init_id(struct perf_event_header *header, |
| 4037 | struct perf_sample_data *data, | 3735 | struct perf_sample_data *data, |
| 4038 | struct perf_event *event) | 3736 | struct perf_event *event) |
| 4039 | { | 3737 | { |
| 4040 | if (event->attr.sample_id_all) | 3738 | if (event->attr.sample_id_all) |
| 4041 | __perf_event_header__init_id(header, data, event); | 3739 | __perf_event_header__init_id(header, data, event); |
| @@ -4062,121 +3760,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle, | |||
| 4062 | perf_output_put(handle, data->cpu_entry); | 3760 | perf_output_put(handle, data->cpu_entry); |
| 4063 | } | 3761 | } |
| 4064 | 3762 | ||
| 4065 | static void perf_event__output_id_sample(struct perf_event *event, | 3763 | void perf_event__output_id_sample(struct perf_event *event, |
| 4066 | struct perf_output_handle *handle, | 3764 | struct perf_output_handle *handle, |
| 4067 | struct perf_sample_data *sample) | 3765 | struct perf_sample_data *sample) |
| 4068 | { | 3766 | { |
| 4069 | if (event->attr.sample_id_all) | 3767 | if (event->attr.sample_id_all) |
| 4070 | __perf_event__output_id_sample(handle, sample); | 3768 | __perf_event__output_id_sample(handle, sample); |
| 4071 | } | 3769 | } |
| 4072 | 3770 | ||
| 4073 | int perf_output_begin(struct perf_output_handle *handle, | ||
| 4074 | struct perf_event *event, unsigned int size, | ||
| 4075 | int nmi, int sample) | ||
| 4076 | { | ||
| 4077 | struct perf_buffer *buffer; | ||
| 4078 | unsigned long tail, offset, head; | ||
| 4079 | int have_lost; | ||
| 4080 | struct perf_sample_data sample_data; | ||
| 4081 | struct { | ||
| 4082 | struct perf_event_header header; | ||
| 4083 | u64 id; | ||
| 4084 | u64 lost; | ||
| 4085 | } lost_event; | ||
| 4086 | |||
| 4087 | rcu_read_lock(); | ||
| 4088 | /* | ||
| 4089 | * For inherited events we send all the output towards the parent. | ||
| 4090 | */ | ||
| 4091 | if (event->parent) | ||
| 4092 | event = event->parent; | ||
| 4093 | |||
| 4094 | buffer = rcu_dereference(event->buffer); | ||
| 4095 | if (!buffer) | ||
| 4096 | goto out; | ||
| 4097 | |||
| 4098 | handle->buffer = buffer; | ||
| 4099 | handle->event = event; | ||
| 4100 | handle->nmi = nmi; | ||
| 4101 | handle->sample = sample; | ||
| 4102 | |||
| 4103 | if (!buffer->nr_pages) | ||
| 4104 | goto out; | ||
| 4105 | |||
| 4106 | have_lost = local_read(&buffer->lost); | ||
| 4107 | if (have_lost) { | ||
| 4108 | lost_event.header.size = sizeof(lost_event); | ||
| 4109 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
| 4110 | event); | ||
| 4111 | size += lost_event.header.size; | ||
| 4112 | } | ||
| 4113 | |||
| 4114 | perf_output_get_handle(handle); | ||
| 4115 | |||
| 4116 | do { | ||
| 4117 | /* | ||
| 4118 | * Userspace could choose to issue a mb() before updating the | ||
| 4119 | * tail pointer. So that all reads will be completed before the | ||
| 4120 | * write is issued. | ||
| 4121 | */ | ||
| 4122 | tail = ACCESS_ONCE(buffer->user_page->data_tail); | ||
| 4123 | smp_rmb(); | ||
| 4124 | offset = head = local_read(&buffer->head); | ||
| 4125 | head += size; | ||
| 4126 | if (unlikely(!perf_output_space(buffer, tail, offset, head))) | ||
| 4127 | goto fail; | ||
| 4128 | } while (local_cmpxchg(&buffer->head, offset, head) != offset); | ||
| 4129 | |||
| 4130 | if (head - local_read(&buffer->wakeup) > buffer->watermark) | ||
| 4131 | local_add(buffer->watermark, &buffer->wakeup); | ||
| 4132 | |||
| 4133 | handle->page = offset >> (PAGE_SHIFT + page_order(buffer)); | ||
| 4134 | handle->page &= buffer->nr_pages - 1; | ||
| 4135 | handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1); | ||
| 4136 | handle->addr = buffer->data_pages[handle->page]; | ||
| 4137 | handle->addr += handle->size; | ||
| 4138 | handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size; | ||
| 4139 | |||
| 4140 | if (have_lost) { | ||
| 4141 | lost_event.header.type = PERF_RECORD_LOST; | ||
| 4142 | lost_event.header.misc = 0; | ||
| 4143 | lost_event.id = event->id; | ||
| 4144 | lost_event.lost = local_xchg(&buffer->lost, 0); | ||
| 4145 | |||
| 4146 | perf_output_put(handle, lost_event); | ||
| 4147 | perf_event__output_id_sample(event, handle, &sample_data); | ||
| 4148 | } | ||
| 4149 | |||
| 4150 | return 0; | ||
| 4151 | |||
| 4152 | fail: | ||
| 4153 | local_inc(&buffer->lost); | ||
| 4154 | perf_output_put_handle(handle); | ||
| 4155 | out: | ||
| 4156 | rcu_read_unlock(); | ||
| 4157 | |||
| 4158 | return -ENOSPC; | ||
| 4159 | } | ||
| 4160 | |||
| 4161 | void perf_output_end(struct perf_output_handle *handle) | ||
| 4162 | { | ||
| 4163 | struct perf_event *event = handle->event; | ||
| 4164 | struct perf_buffer *buffer = handle->buffer; | ||
| 4165 | |||
| 4166 | int wakeup_events = event->attr.wakeup_events; | ||
| 4167 | |||
| 4168 | if (handle->sample && wakeup_events) { | ||
| 4169 | int events = local_inc_return(&buffer->events); | ||
| 4170 | if (events >= wakeup_events) { | ||
| 4171 | local_sub(wakeup_events, &buffer->events); | ||
| 4172 | local_inc(&buffer->wakeup); | ||
| 4173 | } | ||
| 4174 | } | ||
| 4175 | |||
| 4176 | perf_output_put_handle(handle); | ||
| 4177 | rcu_read_unlock(); | ||
| 4178 | } | ||
| 4179 | |||
| 4180 | static void perf_output_read_one(struct perf_output_handle *handle, | 3771 | static void perf_output_read_one(struct perf_output_handle *handle, |
| 4181 | struct perf_event *event, | 3772 | struct perf_event *event, |
| 4182 | u64 enabled, u64 running) | 3773 | u64 enabled, u64 running) |
| @@ -4197,7 +3788,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
| 4197 | if (read_format & PERF_FORMAT_ID) | 3788 | if (read_format & PERF_FORMAT_ID) |
| 4198 | values[n++] = primary_event_id(event); | 3789 | values[n++] = primary_event_id(event); |
| 4199 | 3790 | ||
| 4200 | perf_output_copy(handle, values, n * sizeof(u64)); | 3791 | __output_copy(handle, values, n * sizeof(u64)); |
| 4201 | } | 3792 | } |
| 4202 | 3793 | ||
| 4203 | /* | 3794 | /* |
| @@ -4227,7 +3818,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
| 4227 | if (read_format & PERF_FORMAT_ID) | 3818 | if (read_format & PERF_FORMAT_ID) |
| 4228 | values[n++] = primary_event_id(leader); | 3819 | values[n++] = primary_event_id(leader); |
| 4229 | 3820 | ||
| 4230 | perf_output_copy(handle, values, n * sizeof(u64)); | 3821 | __output_copy(handle, values, n * sizeof(u64)); |
| 4231 | 3822 | ||
| 4232 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 3823 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
| 4233 | n = 0; | 3824 | n = 0; |
| @@ -4239,7 +3830,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
| 4239 | if (read_format & PERF_FORMAT_ID) | 3830 | if (read_format & PERF_FORMAT_ID) |
| 4240 | values[n++] = primary_event_id(sub); | 3831 | values[n++] = primary_event_id(sub); |
| 4241 | 3832 | ||
| 4242 | perf_output_copy(handle, values, n * sizeof(u64)); | 3833 | __output_copy(handle, values, n * sizeof(u64)); |
| 4243 | } | 3834 | } |
| 4244 | } | 3835 | } |
| 4245 | 3836 | ||
| @@ -4249,7 +3840,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
| 4249 | static void perf_output_read(struct perf_output_handle *handle, | 3840 | static void perf_output_read(struct perf_output_handle *handle, |
| 4250 | struct perf_event *event) | 3841 | struct perf_event *event) |
| 4251 | { | 3842 | { |
| 4252 | u64 enabled = 0, running = 0, now, ctx_time; | 3843 | u64 enabled = 0, running = 0; |
| 4253 | u64 read_format = event->attr.read_format; | 3844 | u64 read_format = event->attr.read_format; |
| 4254 | 3845 | ||
| 4255 | /* | 3846 | /* |
| @@ -4261,12 +3852,8 @@ static void perf_output_read(struct perf_output_handle *handle, | |||
| 4261 | * because of locking issue as we are called in | 3852 | * because of locking issue as we are called in |
| 4262 | * NMI context | 3853 | * NMI context |
| 4263 | */ | 3854 | */ |
| 4264 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | 3855 | if (read_format & PERF_FORMAT_TOTAL_TIMES) |
| 4265 | now = perf_clock(); | 3856 | calc_timer_values(event, &enabled, &running); |
| 4266 | ctx_time = event->shadow_ctx_time + now; | ||
| 4267 | enabled = ctx_time - event->tstamp_enabled; | ||
| 4268 | running = ctx_time - event->tstamp_running; | ||
| 4269 | } | ||
| 4270 | 3857 | ||
| 4271 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3858 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
| 4272 | perf_output_read_group(handle, event, enabled, running); | 3859 | perf_output_read_group(handle, event, enabled, running); |
| @@ -4319,7 +3906,7 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
| 4319 | 3906 | ||
| 4320 | size *= sizeof(u64); | 3907 | size *= sizeof(u64); |
| 4321 | 3908 | ||
| 4322 | perf_output_copy(handle, data->callchain, size); | 3909 | __output_copy(handle, data->callchain, size); |
| 4323 | } else { | 3910 | } else { |
| 4324 | u64 nr = 0; | 3911 | u64 nr = 0; |
| 4325 | perf_output_put(handle, nr); | 3912 | perf_output_put(handle, nr); |
| @@ -4329,8 +3916,8 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
| 4329 | if (sample_type & PERF_SAMPLE_RAW) { | 3916 | if (sample_type & PERF_SAMPLE_RAW) { |
| 4330 | if (data->raw) { | 3917 | if (data->raw) { |
| 4331 | perf_output_put(handle, data->raw->size); | 3918 | perf_output_put(handle, data->raw->size); |
| 4332 | perf_output_copy(handle, data->raw->data, | 3919 | __output_copy(handle, data->raw->data, |
| 4333 | data->raw->size); | 3920 | data->raw->size); |
| 4334 | } else { | 3921 | } else { |
| 4335 | struct { | 3922 | struct { |
| 4336 | u32 size; | 3923 | u32 size; |
| @@ -4342,6 +3929,20 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
| 4342 | perf_output_put(handle, raw); | 3929 | perf_output_put(handle, raw); |
| 4343 | } | 3930 | } |
| 4344 | } | 3931 | } |
| 3932 | |||
| 3933 | if (!event->attr.watermark) { | ||
| 3934 | int wakeup_events = event->attr.wakeup_events; | ||
| 3935 | |||
| 3936 | if (wakeup_events) { | ||
| 3937 | struct ring_buffer *rb = handle->rb; | ||
| 3938 | int events = local_inc_return(&rb->events); | ||
| 3939 | |||
| 3940 | if (events >= wakeup_events) { | ||
| 3941 | local_sub(wakeup_events, &rb->events); | ||
| 3942 | local_inc(&rb->wakeup); | ||
| 3943 | } | ||
| 3944 | } | ||
| 3945 | } | ||
| 4345 | } | 3946 | } |
| 4346 | 3947 | ||
| 4347 | void perf_prepare_sample(struct perf_event_header *header, | 3948 | void perf_prepare_sample(struct perf_event_header *header, |
| @@ -4386,7 +3987,7 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
| 4386 | } | 3987 | } |
| 4387 | } | 3988 | } |
| 4388 | 3989 | ||
| 4389 | static void perf_event_output(struct perf_event *event, int nmi, | 3990 | static void perf_event_output(struct perf_event *event, |
| 4390 | struct perf_sample_data *data, | 3991 | struct perf_sample_data *data, |
| 4391 | struct pt_regs *regs) | 3992 | struct pt_regs *regs) |
| 4392 | { | 3993 | { |
| @@ -4398,7 +3999,7 @@ static void perf_event_output(struct perf_event *event, int nmi, | |||
| 4398 | 3999 | ||
| 4399 | perf_prepare_sample(&header, data, event, regs); | 4000 | perf_prepare_sample(&header, data, event, regs); |
| 4400 | 4001 | ||
| 4401 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 4002 | if (perf_output_begin(&handle, event, header.size)) |
| 4402 | goto exit; | 4003 | goto exit; |
| 4403 | 4004 | ||
| 4404 | perf_output_sample(&handle, &header, data, event); | 4005 | perf_output_sample(&handle, &header, data, event); |
| @@ -4438,7 +4039,7 @@ perf_event_read_event(struct perf_event *event, | |||
| 4438 | int ret; | 4039 | int ret; |
| 4439 | 4040 | ||
| 4440 | perf_event_header__init_id(&read_event.header, &sample, event); | 4041 | perf_event_header__init_id(&read_event.header, &sample, event); |
| 4441 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); | 4042 | ret = perf_output_begin(&handle, event, read_event.header.size); |
| 4442 | if (ret) | 4043 | if (ret) |
| 4443 | return; | 4044 | return; |
| 4444 | 4045 | ||
| @@ -4481,7 +4082,7 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 4481 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); | 4082 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); |
| 4482 | 4083 | ||
| 4483 | ret = perf_output_begin(&handle, event, | 4084 | ret = perf_output_begin(&handle, event, |
| 4484 | task_event->event_id.header.size, 0, 0); | 4085 | task_event->event_id.header.size); |
| 4485 | if (ret) | 4086 | if (ret) |
| 4486 | goto out; | 4087 | goto out; |
| 4487 | 4088 | ||
| @@ -4618,7 +4219,7 @@ static void perf_event_comm_output(struct perf_event *event, | |||
| 4618 | 4219 | ||
| 4619 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); | 4220 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); |
| 4620 | ret = perf_output_begin(&handle, event, | 4221 | ret = perf_output_begin(&handle, event, |
| 4621 | comm_event->event_id.header.size, 0, 0); | 4222 | comm_event->event_id.header.size); |
| 4622 | 4223 | ||
| 4623 | if (ret) | 4224 | if (ret) |
| 4624 | goto out; | 4225 | goto out; |
| @@ -4627,7 +4228,7 @@ static void perf_event_comm_output(struct perf_event *event, | |||
| 4627 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); | 4228 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
| 4628 | 4229 | ||
| 4629 | perf_output_put(&handle, comm_event->event_id); | 4230 | perf_output_put(&handle, comm_event->event_id); |
| 4630 | perf_output_copy(&handle, comm_event->comm, | 4231 | __output_copy(&handle, comm_event->comm, |
| 4631 | comm_event->comm_size); | 4232 | comm_event->comm_size); |
| 4632 | 4233 | ||
| 4633 | perf_event__output_id_sample(event, &handle, &sample); | 4234 | perf_event__output_id_sample(event, &handle, &sample); |
| @@ -4765,7 +4366,7 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 4765 | 4366 | ||
| 4766 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | 4367 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); |
| 4767 | ret = perf_output_begin(&handle, event, | 4368 | ret = perf_output_begin(&handle, event, |
| 4768 | mmap_event->event_id.header.size, 0, 0); | 4369 | mmap_event->event_id.header.size); |
| 4769 | if (ret) | 4370 | if (ret) |
| 4770 | goto out; | 4371 | goto out; |
| 4771 | 4372 | ||
| @@ -4773,7 +4374,7 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 4773 | mmap_event->event_id.tid = perf_event_tid(event, current); | 4374 | mmap_event->event_id.tid = perf_event_tid(event, current); |
| 4774 | 4375 | ||
| 4775 | perf_output_put(&handle, mmap_event->event_id); | 4376 | perf_output_put(&handle, mmap_event->event_id); |
| 4776 | perf_output_copy(&handle, mmap_event->file_name, | 4377 | __output_copy(&handle, mmap_event->file_name, |
| 4777 | mmap_event->file_size); | 4378 | mmap_event->file_size); |
| 4778 | 4379 | ||
| 4779 | perf_event__output_id_sample(event, &handle, &sample); | 4380 | perf_event__output_id_sample(event, &handle, &sample); |
| @@ -4829,7 +4430,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
| 4829 | 4430 | ||
| 4830 | if (file) { | 4431 | if (file) { |
| 4831 | /* | 4432 | /* |
| 4832 | * d_path works from the end of the buffer backwards, so we | 4433 | * d_path works from the end of the rb backwards, so we |
| 4833 | * need to add enough zero bytes after the string to handle | 4434 | * need to add enough zero bytes after the string to handle |
| 4834 | * the 64bit alignment we do later. | 4435 | * the 64bit alignment we do later. |
| 4835 | */ | 4436 | */ |
| @@ -4960,7 +4561,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
| 4960 | perf_event_header__init_id(&throttle_event.header, &sample, event); | 4561 | perf_event_header__init_id(&throttle_event.header, &sample, event); |
| 4961 | 4562 | ||
| 4962 | ret = perf_output_begin(&handle, event, | 4563 | ret = perf_output_begin(&handle, event, |
| 4963 | throttle_event.header.size, 1, 0); | 4564 | throttle_event.header.size); |
| 4964 | if (ret) | 4565 | if (ret) |
| 4965 | return; | 4566 | return; |
| 4966 | 4567 | ||
| @@ -4973,7 +4574,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
| 4973 | * Generic event overflow handling, sampling. | 4574 | * Generic event overflow handling, sampling. |
| 4974 | */ | 4575 | */ |
| 4975 | 4576 | ||
| 4976 | static int __perf_event_overflow(struct perf_event *event, int nmi, | 4577 | static int __perf_event_overflow(struct perf_event *event, |
| 4977 | int throttle, struct perf_sample_data *data, | 4578 | int throttle, struct perf_sample_data *data, |
| 4978 | struct pt_regs *regs) | 4579 | struct pt_regs *regs) |
| 4979 | { | 4580 | { |
| @@ -5016,34 +4617,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
| 5016 | if (events && atomic_dec_and_test(&event->event_limit)) { | 4617 | if (events && atomic_dec_and_test(&event->event_limit)) { |
| 5017 | ret = 1; | 4618 | ret = 1; |
| 5018 | event->pending_kill = POLL_HUP; | 4619 | event->pending_kill = POLL_HUP; |
| 5019 | if (nmi) { | 4620 | event->pending_disable = 1; |
| 5020 | event->pending_disable = 1; | 4621 | irq_work_queue(&event->pending); |
| 5021 | irq_work_queue(&event->pending); | ||
| 5022 | } else | ||
| 5023 | perf_event_disable(event); | ||
| 5024 | } | 4622 | } |
| 5025 | 4623 | ||
| 5026 | if (event->overflow_handler) | 4624 | if (event->overflow_handler) |
| 5027 | event->overflow_handler(event, nmi, data, regs); | 4625 | event->overflow_handler(event, data, regs); |
| 5028 | else | 4626 | else |
| 5029 | perf_event_output(event, nmi, data, regs); | 4627 | perf_event_output(event, data, regs); |
| 5030 | 4628 | ||
| 5031 | if (event->fasync && event->pending_kill) { | 4629 | if (event->fasync && event->pending_kill) { |
| 5032 | if (nmi) { | 4630 | event->pending_wakeup = 1; |
| 5033 | event->pending_wakeup = 1; | 4631 | irq_work_queue(&event->pending); |
| 5034 | irq_work_queue(&event->pending); | ||
| 5035 | } else | ||
| 5036 | perf_event_wakeup(event); | ||
| 5037 | } | 4632 | } |
| 5038 | 4633 | ||
| 5039 | return ret; | 4634 | return ret; |
| 5040 | } | 4635 | } |
| 5041 | 4636 | ||
| 5042 | int perf_event_overflow(struct perf_event *event, int nmi, | 4637 | int perf_event_overflow(struct perf_event *event, |
| 5043 | struct perf_sample_data *data, | 4638 | struct perf_sample_data *data, |
| 5044 | struct pt_regs *regs) | 4639 | struct pt_regs *regs) |
| 5045 | { | 4640 | { |
| 5046 | return __perf_event_overflow(event, nmi, 1, data, regs); | 4641 | return __perf_event_overflow(event, 1, data, regs); |
| 5047 | } | 4642 | } |
| 5048 | 4643 | ||
| 5049 | /* | 4644 | /* |
| @@ -5092,7 +4687,7 @@ again: | |||
| 5092 | } | 4687 | } |
| 5093 | 4688 | ||
| 5094 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | 4689 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, |
| 5095 | int nmi, struct perf_sample_data *data, | 4690 | struct perf_sample_data *data, |
| 5096 | struct pt_regs *regs) | 4691 | struct pt_regs *regs) |
| 5097 | { | 4692 | { |
| 5098 | struct hw_perf_event *hwc = &event->hw; | 4693 | struct hw_perf_event *hwc = &event->hw; |
| @@ -5106,7 +4701,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
| 5106 | return; | 4701 | return; |
| 5107 | 4702 | ||
| 5108 | for (; overflow; overflow--) { | 4703 | for (; overflow; overflow--) { |
| 5109 | if (__perf_event_overflow(event, nmi, throttle, | 4704 | if (__perf_event_overflow(event, throttle, |
| 5110 | data, regs)) { | 4705 | data, regs)) { |
| 5111 | /* | 4706 | /* |
| 5112 | * We inhibit the overflow from happening when | 4707 | * We inhibit the overflow from happening when |
| @@ -5119,7 +4714,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
| 5119 | } | 4714 | } |
| 5120 | 4715 | ||
| 5121 | static void perf_swevent_event(struct perf_event *event, u64 nr, | 4716 | static void perf_swevent_event(struct perf_event *event, u64 nr, |
| 5122 | int nmi, struct perf_sample_data *data, | 4717 | struct perf_sample_data *data, |
| 5123 | struct pt_regs *regs) | 4718 | struct pt_regs *regs) |
| 5124 | { | 4719 | { |
| 5125 | struct hw_perf_event *hwc = &event->hw; | 4720 | struct hw_perf_event *hwc = &event->hw; |
| @@ -5133,12 +4728,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
| 5133 | return; | 4728 | return; |
| 5134 | 4729 | ||
| 5135 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4730 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
| 5136 | return perf_swevent_overflow(event, 1, nmi, data, regs); | 4731 | return perf_swevent_overflow(event, 1, data, regs); |
| 5137 | 4732 | ||
| 5138 | if (local64_add_negative(nr, &hwc->period_left)) | 4733 | if (local64_add_negative(nr, &hwc->period_left)) |
| 5139 | return; | 4734 | return; |
| 5140 | 4735 | ||
| 5141 | perf_swevent_overflow(event, 0, nmi, data, regs); | 4736 | perf_swevent_overflow(event, 0, data, regs); |
| 5142 | } | 4737 | } |
| 5143 | 4738 | ||
| 5144 | static int perf_exclude_event(struct perf_event *event, | 4739 | static int perf_exclude_event(struct perf_event *event, |
| @@ -5226,7 +4821,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) | |||
| 5226 | } | 4821 | } |
| 5227 | 4822 | ||
| 5228 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 4823 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
| 5229 | u64 nr, int nmi, | 4824 | u64 nr, |
| 5230 | struct perf_sample_data *data, | 4825 | struct perf_sample_data *data, |
| 5231 | struct pt_regs *regs) | 4826 | struct pt_regs *regs) |
| 5232 | { | 4827 | { |
| @@ -5242,7 +4837,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
| 5242 | 4837 | ||
| 5243 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4838 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
| 5244 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4839 | if (perf_swevent_match(event, type, event_id, data, regs)) |
| 5245 | perf_swevent_event(event, nr, nmi, data, regs); | 4840 | perf_swevent_event(event, nr, data, regs); |
| 5246 | } | 4841 | } |
| 5247 | end: | 4842 | end: |
| 5248 | rcu_read_unlock(); | 4843 | rcu_read_unlock(); |
| @@ -5263,8 +4858,7 @@ inline void perf_swevent_put_recursion_context(int rctx) | |||
| 5263 | put_recursion_context(swhash->recursion, rctx); | 4858 | put_recursion_context(swhash->recursion, rctx); |
| 5264 | } | 4859 | } |
| 5265 | 4860 | ||
| 5266 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4861 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
| 5267 | struct pt_regs *regs, u64 addr) | ||
| 5268 | { | 4862 | { |
| 5269 | struct perf_sample_data data; | 4863 | struct perf_sample_data data; |
| 5270 | int rctx; | 4864 | int rctx; |
| @@ -5276,7 +4870,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, | |||
| 5276 | 4870 | ||
| 5277 | perf_sample_data_init(&data, addr); | 4871 | perf_sample_data_init(&data, addr); |
| 5278 | 4872 | ||
| 5279 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | 4873 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); |
| 5280 | 4874 | ||
| 5281 | perf_swevent_put_recursion_context(rctx); | 4875 | perf_swevent_put_recursion_context(rctx); |
| 5282 | preempt_enable_notrace(); | 4876 | preempt_enable_notrace(); |
| @@ -5524,7 +5118,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
| 5524 | 5118 | ||
| 5525 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 5119 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
| 5526 | if (perf_tp_event_match(event, &data, regs)) | 5120 | if (perf_tp_event_match(event, &data, regs)) |
| 5527 | perf_swevent_event(event, count, 1, &data, regs); | 5121 | perf_swevent_event(event, count, &data, regs); |
| 5528 | } | 5122 | } |
| 5529 | 5123 | ||
| 5530 | perf_swevent_put_recursion_context(rctx); | 5124 | perf_swevent_put_recursion_context(rctx); |
| @@ -5617,7 +5211,7 @@ void perf_bp_event(struct perf_event *bp, void *data) | |||
| 5617 | perf_sample_data_init(&sample, bp->attr.bp_addr); | 5211 | perf_sample_data_init(&sample, bp->attr.bp_addr); |
| 5618 | 5212 | ||
| 5619 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) | 5213 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) |
| 5620 | perf_swevent_event(bp, 1, 1, &sample, regs); | 5214 | perf_swevent_event(bp, 1, &sample, regs); |
| 5621 | } | 5215 | } |
| 5622 | #endif | 5216 | #endif |
| 5623 | 5217 | ||
| @@ -5646,7 +5240,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
| 5646 | 5240 | ||
| 5647 | if (regs && !perf_exclude_event(event, regs)) { | 5241 | if (regs && !perf_exclude_event(event, regs)) { |
| 5648 | if (!(event->attr.exclude_idle && current->pid == 0)) | 5242 | if (!(event->attr.exclude_idle && current->pid == 0)) |
| 5649 | if (perf_event_overflow(event, 0, &data, regs)) | 5243 | if (perf_event_overflow(event, &data, regs)) |
| 5650 | ret = HRTIMER_NORESTART; | 5244 | ret = HRTIMER_NORESTART; |
| 5651 | } | 5245 | } |
| 5652 | 5246 | ||
| @@ -5986,6 +5580,7 @@ free_dev: | |||
| 5986 | } | 5580 | } |
| 5987 | 5581 | ||
| 5988 | static struct lock_class_key cpuctx_mutex; | 5582 | static struct lock_class_key cpuctx_mutex; |
| 5583 | static struct lock_class_key cpuctx_lock; | ||
| 5989 | 5584 | ||
| 5990 | int perf_pmu_register(struct pmu *pmu, char *name, int type) | 5585 | int perf_pmu_register(struct pmu *pmu, char *name, int type) |
| 5991 | { | 5586 | { |
| @@ -6036,6 +5631,7 @@ skip_type: | |||
| 6036 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5631 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
| 6037 | __perf_event_init_context(&cpuctx->ctx); | 5632 | __perf_event_init_context(&cpuctx->ctx); |
| 6038 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); | 5633 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); |
| 5634 | lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); | ||
| 6039 | cpuctx->ctx.type = cpu_context; | 5635 | cpuctx->ctx.type = cpu_context; |
| 6040 | cpuctx->ctx.pmu = pmu; | 5636 | cpuctx->ctx.pmu = pmu; |
| 6041 | cpuctx->jiffies_interval = 1; | 5637 | cpuctx->jiffies_interval = 1; |
| @@ -6150,7 +5746,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 6150 | struct task_struct *task, | 5746 | struct task_struct *task, |
| 6151 | struct perf_event *group_leader, | 5747 | struct perf_event *group_leader, |
| 6152 | struct perf_event *parent_event, | 5748 | struct perf_event *parent_event, |
| 6153 | perf_overflow_handler_t overflow_handler) | 5749 | perf_overflow_handler_t overflow_handler, |
| 5750 | void *context) | ||
| 6154 | { | 5751 | { |
| 6155 | struct pmu *pmu; | 5752 | struct pmu *pmu; |
| 6156 | struct perf_event *event; | 5753 | struct perf_event *event; |
| @@ -6208,10 +5805,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 6208 | #endif | 5805 | #endif |
| 6209 | } | 5806 | } |
| 6210 | 5807 | ||
| 6211 | if (!overflow_handler && parent_event) | 5808 | if (!overflow_handler && parent_event) { |
| 6212 | overflow_handler = parent_event->overflow_handler; | 5809 | overflow_handler = parent_event->overflow_handler; |
| 5810 | context = parent_event->overflow_handler_context; | ||
| 5811 | } | ||
| 6213 | 5812 | ||
| 6214 | event->overflow_handler = overflow_handler; | 5813 | event->overflow_handler = overflow_handler; |
| 5814 | event->overflow_handler_context = context; | ||
| 6215 | 5815 | ||
| 6216 | if (attr->disabled) | 5816 | if (attr->disabled) |
| 6217 | event->state = PERF_EVENT_STATE_OFF; | 5817 | event->state = PERF_EVENT_STATE_OFF; |
| @@ -6326,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
| 6326 | if (ret) | 5926 | if (ret) |
| 6327 | return -EFAULT; | 5927 | return -EFAULT; |
| 6328 | 5928 | ||
| 6329 | /* | ||
| 6330 | * If the type exists, the corresponding creation will verify | ||
| 6331 | * the attr->config. | ||
| 6332 | */ | ||
| 6333 | if (attr->type >= PERF_TYPE_MAX) | ||
| 6334 | return -EINVAL; | ||
| 6335 | |||
| 6336 | if (attr->__reserved_1) | 5929 | if (attr->__reserved_1) |
| 6337 | return -EINVAL; | 5930 | return -EINVAL; |
| 6338 | 5931 | ||
| @@ -6354,7 +5947,7 @@ err_size: | |||
| 6354 | static int | 5947 | static int |
| 6355 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | 5948 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) |
| 6356 | { | 5949 | { |
| 6357 | struct perf_buffer *buffer = NULL, *old_buffer = NULL; | 5950 | struct ring_buffer *rb = NULL, *old_rb = NULL; |
| 6358 | int ret = -EINVAL; | 5951 | int ret = -EINVAL; |
| 6359 | 5952 | ||
| 6360 | if (!output_event) | 5953 | if (!output_event) |
| @@ -6371,7 +5964,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | |||
| 6371 | goto out; | 5964 | goto out; |
| 6372 | 5965 | ||
| 6373 | /* | 5966 | /* |
| 6374 | * If its not a per-cpu buffer, it must be the same task. | 5967 | * If its not a per-cpu rb, it must be the same task. |
| 6375 | */ | 5968 | */ |
| 6376 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) | 5969 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) |
| 6377 | goto out; | 5970 | goto out; |
| @@ -6383,20 +5976,20 @@ set: | |||
| 6383 | goto unlock; | 5976 | goto unlock; |
| 6384 | 5977 | ||
| 6385 | if (output_event) { | 5978 | if (output_event) { |
| 6386 | /* get the buffer we want to redirect to */ | 5979 | /* get the rb we want to redirect to */ |
| 6387 | buffer = perf_buffer_get(output_event); | 5980 | rb = ring_buffer_get(output_event); |
| 6388 | if (!buffer) | 5981 | if (!rb) |
| 6389 | goto unlock; | 5982 | goto unlock; |
| 6390 | } | 5983 | } |
| 6391 | 5984 | ||
| 6392 | old_buffer = event->buffer; | 5985 | old_rb = event->rb; |
| 6393 | rcu_assign_pointer(event->buffer, buffer); | 5986 | rcu_assign_pointer(event->rb, rb); |
| 6394 | ret = 0; | 5987 | ret = 0; |
| 6395 | unlock: | 5988 | unlock: |
| 6396 | mutex_unlock(&event->mmap_mutex); | 5989 | mutex_unlock(&event->mmap_mutex); |
| 6397 | 5990 | ||
| 6398 | if (old_buffer) | 5991 | if (old_rb) |
| 6399 | perf_buffer_put(old_buffer); | 5992 | ring_buffer_put(old_rb); |
| 6400 | out: | 5993 | out: |
| 6401 | return ret; | 5994 | return ret; |
| 6402 | } | 5995 | } |
| @@ -6478,7 +6071,8 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6478 | } | 6071 | } |
| 6479 | } | 6072 | } |
| 6480 | 6073 | ||
| 6481 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL); | 6074 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
| 6075 | NULL, NULL); | ||
| 6482 | if (IS_ERR(event)) { | 6076 | if (IS_ERR(event)) { |
| 6483 | err = PTR_ERR(event); | 6077 | err = PTR_ERR(event); |
| 6484 | goto err_task; | 6078 | goto err_task; |
| @@ -6663,7 +6257,8 @@ err_fd: | |||
| 6663 | struct perf_event * | 6257 | struct perf_event * |
| 6664 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 6258 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, |
| 6665 | struct task_struct *task, | 6259 | struct task_struct *task, |
| 6666 | perf_overflow_handler_t overflow_handler) | 6260 | perf_overflow_handler_t overflow_handler, |
| 6261 | void *context) | ||
| 6667 | { | 6262 | { |
| 6668 | struct perf_event_context *ctx; | 6263 | struct perf_event_context *ctx; |
| 6669 | struct perf_event *event; | 6264 | struct perf_event *event; |
| @@ -6673,7 +6268,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 6673 | * Get the target context (task or percpu): | 6268 | * Get the target context (task or percpu): |
| 6674 | */ | 6269 | */ |
| 6675 | 6270 | ||
| 6676 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler); | 6271 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, |
| 6272 | overflow_handler, context); | ||
| 6677 | if (IS_ERR(event)) { | 6273 | if (IS_ERR(event)) { |
| 6678 | err = PTR_ERR(event); | 6274 | err = PTR_ERR(event); |
| 6679 | goto err; | 6275 | goto err; |
| @@ -6780,7 +6376,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 6780 | * our context. | 6376 | * our context. |
| 6781 | */ | 6377 | */ |
| 6782 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); | 6378 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); |
| 6783 | task_ctx_sched_out(child_ctx, EVENT_ALL); | ||
| 6784 | 6379 | ||
| 6785 | /* | 6380 | /* |
| 6786 | * Take the context lock here so that if find_get_context is | 6381 | * Take the context lock here so that if find_get_context is |
| @@ -6788,6 +6383,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 6788 | * incremented the context's refcount before we do put_ctx below. | 6383 | * incremented the context's refcount before we do put_ctx below. |
| 6789 | */ | 6384 | */ |
| 6790 | raw_spin_lock(&child_ctx->lock); | 6385 | raw_spin_lock(&child_ctx->lock); |
| 6386 | task_ctx_sched_out(child_ctx); | ||
| 6791 | child->perf_event_ctxp[ctxn] = NULL; | 6387 | child->perf_event_ctxp[ctxn] = NULL; |
| 6792 | /* | 6388 | /* |
| 6793 | * If this context is a clone; unclone it so it can't get | 6389 | * If this context is a clone; unclone it so it can't get |
| @@ -6957,7 +6553,7 @@ inherit_event(struct perf_event *parent_event, | |||
| 6957 | parent_event->cpu, | 6553 | parent_event->cpu, |
| 6958 | child, | 6554 | child, |
| 6959 | group_leader, parent_event, | 6555 | group_leader, parent_event, |
| 6960 | NULL); | 6556 | NULL, NULL); |
| 6961 | if (IS_ERR(child_event)) | 6557 | if (IS_ERR(child_event)) |
| 6962 | return child_event; | 6558 | return child_event; |
| 6963 | get_ctx(child_ctx); | 6559 | get_ctx(child_ctx); |
| @@ -6984,6 +6580,8 @@ inherit_event(struct perf_event *parent_event, | |||
| 6984 | 6580 | ||
| 6985 | child_event->ctx = child_ctx; | 6581 | child_event->ctx = child_ctx; |
| 6986 | child_event->overflow_handler = parent_event->overflow_handler; | 6582 | child_event->overflow_handler = parent_event->overflow_handler; |
| 6583 | child_event->overflow_handler_context | ||
| 6584 | = parent_event->overflow_handler_context; | ||
| 6987 | 6585 | ||
| 6988 | /* | 6586 | /* |
| 6989 | * Precalculate sample_data sizes | 6587 | * Precalculate sample_data sizes |
| @@ -7402,26 +7000,12 @@ static int __perf_cgroup_move(void *info) | |||
| 7402 | return 0; | 7000 | return 0; |
| 7403 | } | 7001 | } |
| 7404 | 7002 | ||
| 7405 | static void perf_cgroup_move(struct task_struct *task) | 7003 | static void |
| 7004 | perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) | ||
| 7406 | { | 7005 | { |
| 7407 | task_function_call(task, __perf_cgroup_move, task); | 7006 | task_function_call(task, __perf_cgroup_move, task); |
| 7408 | } | 7007 | } |
| 7409 | 7008 | ||
| 7410 | static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
| 7411 | struct cgroup *old_cgrp, struct task_struct *task, | ||
| 7412 | bool threadgroup) | ||
| 7413 | { | ||
| 7414 | perf_cgroup_move(task); | ||
| 7415 | if (threadgroup) { | ||
| 7416 | struct task_struct *c; | ||
| 7417 | rcu_read_lock(); | ||
| 7418 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
| 7419 | perf_cgroup_move(c); | ||
| 7420 | } | ||
| 7421 | rcu_read_unlock(); | ||
| 7422 | } | ||
| 7423 | } | ||
| 7424 | |||
| 7425 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7009 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 7426 | struct cgroup *old_cgrp, struct task_struct *task) | 7010 | struct cgroup *old_cgrp, struct task_struct *task) |
| 7427 | { | 7011 | { |
| @@ -7433,7 +7017,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 7433 | if (!(task->flags & PF_EXITING)) | 7017 | if (!(task->flags & PF_EXITING)) |
| 7434 | return; | 7018 | return; |
| 7435 | 7019 | ||
| 7436 | perf_cgroup_move(task); | 7020 | perf_cgroup_attach_task(cgrp, task); |
| 7437 | } | 7021 | } |
| 7438 | 7022 | ||
| 7439 | struct cgroup_subsys perf_subsys = { | 7023 | struct cgroup_subsys perf_subsys = { |
| @@ -7442,6 +7026,6 @@ struct cgroup_subsys perf_subsys = { | |||
| 7442 | .create = perf_cgroup_create, | 7026 | .create = perf_cgroup_create, |
| 7443 | .destroy = perf_cgroup_destroy, | 7027 | .destroy = perf_cgroup_destroy, |
| 7444 | .exit = perf_cgroup_exit, | 7028 | .exit = perf_cgroup_exit, |
| 7445 | .attach = perf_cgroup_attach, | 7029 | .attach_task = perf_cgroup_attach_task, |
| 7446 | }; | 7030 | }; |
| 7447 | #endif /* CONFIG_CGROUP_PERF */ | 7031 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 086adf25a55e..b7971d6f38bf 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
| @@ -431,9 +431,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
| 431 | struct perf_event * | 431 | struct perf_event * |
| 432 | register_user_hw_breakpoint(struct perf_event_attr *attr, | 432 | register_user_hw_breakpoint(struct perf_event_attr *attr, |
| 433 | perf_overflow_handler_t triggered, | 433 | perf_overflow_handler_t triggered, |
| 434 | void *context, | ||
| 434 | struct task_struct *tsk) | 435 | struct task_struct *tsk) |
| 435 | { | 436 | { |
| 436 | return perf_event_create_kernel_counter(attr, -1, tsk, triggered); | 437 | return perf_event_create_kernel_counter(attr, -1, tsk, triggered, |
| 438 | context); | ||
| 437 | } | 439 | } |
| 438 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | 440 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); |
| 439 | 441 | ||
| @@ -502,7 +504,8 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | |||
| 502 | */ | 504 | */ |
| 503 | struct perf_event * __percpu * | 505 | struct perf_event * __percpu * |
| 504 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | 506 | register_wide_hw_breakpoint(struct perf_event_attr *attr, |
| 505 | perf_overflow_handler_t triggered) | 507 | perf_overflow_handler_t triggered, |
| 508 | void *context) | ||
| 506 | { | 509 | { |
| 507 | struct perf_event * __percpu *cpu_events, **pevent, *bp; | 510 | struct perf_event * __percpu *cpu_events, **pevent, *bp; |
| 508 | long err; | 511 | long err; |
| @@ -515,7 +518,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
| 515 | get_online_cpus(); | 518 | get_online_cpus(); |
| 516 | for_each_online_cpu(cpu) { | 519 | for_each_online_cpu(cpu) { |
| 517 | pevent = per_cpu_ptr(cpu_events, cpu); | 520 | pevent = per_cpu_ptr(cpu_events, cpu); |
| 518 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); | 521 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, |
| 522 | triggered, context); | ||
| 519 | 523 | ||
| 520 | *pevent = bp; | 524 | *pevent = bp; |
| 521 | 525 | ||
diff --git a/kernel/events/internal.h b/kernel/events/internal.h new file mode 100644 index 000000000000..09097dd8116c --- /dev/null +++ b/kernel/events/internal.h | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | #ifndef _KERNEL_EVENTS_INTERNAL_H | ||
| 2 | #define _KERNEL_EVENTS_INTERNAL_H | ||
| 3 | |||
| 4 | #define RING_BUFFER_WRITABLE 0x01 | ||
| 5 | |||
| 6 | struct ring_buffer { | ||
| 7 | atomic_t refcount; | ||
| 8 | struct rcu_head rcu_head; | ||
| 9 | #ifdef CONFIG_PERF_USE_VMALLOC | ||
| 10 | struct work_struct work; | ||
| 11 | int page_order; /* allocation order */ | ||
| 12 | #endif | ||
| 13 | int nr_pages; /* nr of data pages */ | ||
| 14 | int writable; /* are we writable */ | ||
| 15 | |||
| 16 | atomic_t poll; /* POLL_ for wakeups */ | ||
| 17 | |||
| 18 | local_t head; /* write position */ | ||
| 19 | local_t nest; /* nested writers */ | ||
| 20 | local_t events; /* event limit */ | ||
| 21 | local_t wakeup; /* wakeup stamp */ | ||
| 22 | local_t lost; /* nr records lost */ | ||
| 23 | |||
| 24 | long watermark; /* wakeup watermark */ | ||
| 25 | |||
| 26 | struct perf_event_mmap_page *user_page; | ||
| 27 | void *data_pages[0]; | ||
| 28 | }; | ||
| 29 | |||
| 30 | extern void rb_free(struct ring_buffer *rb); | ||
| 31 | extern struct ring_buffer * | ||
| 32 | rb_alloc(int nr_pages, long watermark, int cpu, int flags); | ||
| 33 | extern void perf_event_wakeup(struct perf_event *event); | ||
| 34 | |||
| 35 | extern void | ||
| 36 | perf_event_header__init_id(struct perf_event_header *header, | ||
| 37 | struct perf_sample_data *data, | ||
| 38 | struct perf_event *event); | ||
| 39 | extern void | ||
| 40 | perf_event__output_id_sample(struct perf_event *event, | ||
| 41 | struct perf_output_handle *handle, | ||
| 42 | struct perf_sample_data *sample); | ||
| 43 | |||
| 44 | extern struct page * | ||
| 45 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); | ||
| 46 | |||
| 47 | #ifdef CONFIG_PERF_USE_VMALLOC | ||
| 48 | /* | ||
| 49 | * Back perf_mmap() with vmalloc memory. | ||
| 50 | * | ||
| 51 | * Required for architectures that have d-cache aliasing issues. | ||
| 52 | */ | ||
| 53 | |||
| 54 | static inline int page_order(struct ring_buffer *rb) | ||
| 55 | { | ||
| 56 | return rb->page_order; | ||
| 57 | } | ||
| 58 | |||
| 59 | #else | ||
| 60 | |||
| 61 | static inline int page_order(struct ring_buffer *rb) | ||
| 62 | { | ||
| 63 | return 0; | ||
| 64 | } | ||
| 65 | #endif | ||
| 66 | |||
| 67 | static unsigned long perf_data_size(struct ring_buffer *rb) | ||
| 68 | { | ||
| 69 | return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline void | ||
| 73 | __output_copy(struct perf_output_handle *handle, | ||
| 74 | const void *buf, unsigned int len) | ||
| 75 | { | ||
| 76 | do { | ||
| 77 | unsigned long size = min_t(unsigned long, handle->size, len); | ||
| 78 | |||
| 79 | memcpy(handle->addr, buf, size); | ||
| 80 | |||
| 81 | len -= size; | ||
| 82 | handle->addr += size; | ||
| 83 | buf += size; | ||
| 84 | handle->size -= size; | ||
| 85 | if (!handle->size) { | ||
| 86 | struct ring_buffer *rb = handle->rb; | ||
| 87 | |||
| 88 | handle->page++; | ||
| 89 | handle->page &= rb->nr_pages - 1; | ||
| 90 | handle->addr = rb->data_pages[handle->page]; | ||
| 91 | handle->size = PAGE_SIZE << page_order(rb); | ||
| 92 | } | ||
| 93 | } while (len); | ||
| 94 | } | ||
| 95 | |||
| 96 | #endif /* _KERNEL_EVENTS_INTERNAL_H */ | ||
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c new file mode 100644 index 000000000000..a2a29205cc0f --- /dev/null +++ b/kernel/events/ring_buffer.c | |||
| @@ -0,0 +1,380 @@ | |||
| 1 | /* | ||
| 2 | * Performance events ring-buffer code: | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
| 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | ||
| 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | ||
| 8 | * | ||
| 9 | * For licensing details see kernel-base/COPYING | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <linux/vmalloc.h> | ||
| 14 | #include <linux/slab.h> | ||
| 15 | |||
| 16 | #include "internal.h" | ||
| 17 | |||
| 18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, | ||
| 19 | unsigned long offset, unsigned long head) | ||
| 20 | { | ||
| 21 | unsigned long mask; | ||
| 22 | |||
| 23 | if (!rb->writable) | ||
| 24 | return true; | ||
| 25 | |||
| 26 | mask = perf_data_size(rb) - 1; | ||
| 27 | |||
| 28 | offset = (offset - tail) & mask; | ||
| 29 | head = (head - tail) & mask; | ||
| 30 | |||
| 31 | if ((int)(head - offset) < 0) | ||
| 32 | return false; | ||
| 33 | |||
| 34 | return true; | ||
| 35 | } | ||
| 36 | |||
| 37 | static void perf_output_wakeup(struct perf_output_handle *handle) | ||
| 38 | { | ||
| 39 | atomic_set(&handle->rb->poll, POLL_IN); | ||
| 40 | |||
| 41 | handle->event->pending_wakeup = 1; | ||
| 42 | irq_work_queue(&handle->event->pending); | ||
| 43 | } | ||
| 44 | |||
| 45 | /* | ||
| 46 | * We need to ensure a later event_id doesn't publish a head when a former | ||
| 47 | * event isn't done writing. However since we need to deal with NMIs we | ||
| 48 | * cannot fully serialize things. | ||
| 49 | * | ||
| 50 | * We only publish the head (and generate a wakeup) when the outer-most | ||
| 51 | * event completes. | ||
| 52 | */ | ||
| 53 | static void perf_output_get_handle(struct perf_output_handle *handle) | ||
| 54 | { | ||
| 55 | struct ring_buffer *rb = handle->rb; | ||
| 56 | |||
| 57 | preempt_disable(); | ||
| 58 | local_inc(&rb->nest); | ||
| 59 | handle->wakeup = local_read(&rb->wakeup); | ||
| 60 | } | ||
| 61 | |||
| 62 | static void perf_output_put_handle(struct perf_output_handle *handle) | ||
| 63 | { | ||
| 64 | struct ring_buffer *rb = handle->rb; | ||
| 65 | unsigned long head; | ||
| 66 | |||
| 67 | again: | ||
| 68 | head = local_read(&rb->head); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * IRQ/NMI can happen here, which means we can miss a head update. | ||
| 72 | */ | ||
| 73 | |||
| 74 | if (!local_dec_and_test(&rb->nest)) | ||
| 75 | goto out; | ||
| 76 | |||
| 77 | /* | ||
| 78 | * Publish the known good head. Rely on the full barrier implied | ||
| 79 | * by atomic_dec_and_test() order the rb->head read and this | ||
| 80 | * write. | ||
| 81 | */ | ||
| 82 | rb->user_page->data_head = head; | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Now check if we missed an update, rely on the (compiler) | ||
| 86 | * barrier in atomic_dec_and_test() to re-read rb->head. | ||
| 87 | */ | ||
| 88 | if (unlikely(head != local_read(&rb->head))) { | ||
| 89 | local_inc(&rb->nest); | ||
| 90 | goto again; | ||
| 91 | } | ||
| 92 | |||
| 93 | if (handle->wakeup != local_read(&rb->wakeup)) | ||
| 94 | perf_output_wakeup(handle); | ||
| 95 | |||
| 96 | out: | ||
| 97 | preempt_enable(); | ||
| 98 | } | ||
| 99 | |||
| 100 | int perf_output_begin(struct perf_output_handle *handle, | ||
| 101 | struct perf_event *event, unsigned int size) | ||
| 102 | { | ||
| 103 | struct ring_buffer *rb; | ||
| 104 | unsigned long tail, offset, head; | ||
| 105 | int have_lost; | ||
| 106 | struct perf_sample_data sample_data; | ||
| 107 | struct { | ||
| 108 | struct perf_event_header header; | ||
| 109 | u64 id; | ||
| 110 | u64 lost; | ||
| 111 | } lost_event; | ||
| 112 | |||
| 113 | rcu_read_lock(); | ||
| 114 | /* | ||
| 115 | * For inherited events we send all the output towards the parent. | ||
| 116 | */ | ||
| 117 | if (event->parent) | ||
| 118 | event = event->parent; | ||
| 119 | |||
| 120 | rb = rcu_dereference(event->rb); | ||
| 121 | if (!rb) | ||
| 122 | goto out; | ||
| 123 | |||
| 124 | handle->rb = rb; | ||
| 125 | handle->event = event; | ||
| 126 | |||
| 127 | if (!rb->nr_pages) | ||
| 128 | goto out; | ||
| 129 | |||
| 130 | have_lost = local_read(&rb->lost); | ||
| 131 | if (have_lost) { | ||
| 132 | lost_event.header.size = sizeof(lost_event); | ||
| 133 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
| 134 | event); | ||
| 135 | size += lost_event.header.size; | ||
| 136 | } | ||
| 137 | |||
| 138 | perf_output_get_handle(handle); | ||
| 139 | |||
| 140 | do { | ||
| 141 | /* | ||
| 142 | * Userspace could choose to issue a mb() before updating the | ||
| 143 | * tail pointer. So that all reads will be completed before the | ||
| 144 | * write is issued. | ||
| 145 | */ | ||
| 146 | tail = ACCESS_ONCE(rb->user_page->data_tail); | ||
| 147 | smp_rmb(); | ||
| 148 | offset = head = local_read(&rb->head); | ||
| 149 | head += size; | ||
| 150 | if (unlikely(!perf_output_space(rb, tail, offset, head))) | ||
| 151 | goto fail; | ||
| 152 | } while (local_cmpxchg(&rb->head, offset, head) != offset); | ||
| 153 | |||
| 154 | if (head - local_read(&rb->wakeup) > rb->watermark) | ||
| 155 | local_add(rb->watermark, &rb->wakeup); | ||
| 156 | |||
| 157 | handle->page = offset >> (PAGE_SHIFT + page_order(rb)); | ||
| 158 | handle->page &= rb->nr_pages - 1; | ||
| 159 | handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); | ||
| 160 | handle->addr = rb->data_pages[handle->page]; | ||
| 161 | handle->addr += handle->size; | ||
| 162 | handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; | ||
| 163 | |||
| 164 | if (have_lost) { | ||
| 165 | lost_event.header.type = PERF_RECORD_LOST; | ||
| 166 | lost_event.header.misc = 0; | ||
| 167 | lost_event.id = event->id; | ||
| 168 | lost_event.lost = local_xchg(&rb->lost, 0); | ||
| 169 | |||
| 170 | perf_output_put(handle, lost_event); | ||
| 171 | perf_event__output_id_sample(event, handle, &sample_data); | ||
| 172 | } | ||
| 173 | |||
| 174 | return 0; | ||
| 175 | |||
| 176 | fail: | ||
| 177 | local_inc(&rb->lost); | ||
| 178 | perf_output_put_handle(handle); | ||
| 179 | out: | ||
| 180 | rcu_read_unlock(); | ||
| 181 | |||
| 182 | return -ENOSPC; | ||
| 183 | } | ||
| 184 | |||
| 185 | void perf_output_copy(struct perf_output_handle *handle, | ||
| 186 | const void *buf, unsigned int len) | ||
| 187 | { | ||
| 188 | __output_copy(handle, buf, len); | ||
| 189 | } | ||
| 190 | |||
| 191 | void perf_output_end(struct perf_output_handle *handle) | ||
| 192 | { | ||
| 193 | perf_output_put_handle(handle); | ||
| 194 | rcu_read_unlock(); | ||
| 195 | } | ||
| 196 | |||
| 197 | static void | ||
| 198 | ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | ||
| 199 | { | ||
| 200 | long max_size = perf_data_size(rb); | ||
| 201 | |||
| 202 | if (watermark) | ||
| 203 | rb->watermark = min(max_size, watermark); | ||
| 204 | |||
| 205 | if (!rb->watermark) | ||
| 206 | rb->watermark = max_size / 2; | ||
| 207 | |||
| 208 | if (flags & RING_BUFFER_WRITABLE) | ||
| 209 | rb->writable = 1; | ||
| 210 | |||
| 211 | atomic_set(&rb->refcount, 1); | ||
| 212 | } | ||
| 213 | |||
| 214 | #ifndef CONFIG_PERF_USE_VMALLOC | ||
| 215 | |||
| 216 | /* | ||
| 217 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. | ||
| 218 | */ | ||
| 219 | |||
| 220 | struct page * | ||
| 221 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | ||
| 222 | { | ||
| 223 | if (pgoff > rb->nr_pages) | ||
| 224 | return NULL; | ||
| 225 | |||
| 226 | if (pgoff == 0) | ||
| 227 | return virt_to_page(rb->user_page); | ||
| 228 | |||
| 229 | return virt_to_page(rb->data_pages[pgoff - 1]); | ||
| 230 | } | ||
| 231 | |||
| 232 | static void *perf_mmap_alloc_page(int cpu) | ||
| 233 | { | ||
| 234 | struct page *page; | ||
| 235 | int node; | ||
| 236 | |||
| 237 | node = (cpu == -1) ? cpu : cpu_to_node(cpu); | ||
| 238 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | ||
| 239 | if (!page) | ||
| 240 | return NULL; | ||
| 241 | |||
| 242 | return page_address(page); | ||
| 243 | } | ||
| 244 | |||
| 245 | struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
| 246 | { | ||
| 247 | struct ring_buffer *rb; | ||
| 248 | unsigned long size; | ||
| 249 | int i; | ||
| 250 | |||
| 251 | size = sizeof(struct ring_buffer); | ||
| 252 | size += nr_pages * sizeof(void *); | ||
| 253 | |||
| 254 | rb = kzalloc(size, GFP_KERNEL); | ||
| 255 | if (!rb) | ||
| 256 | goto fail; | ||
| 257 | |||
| 258 | rb->user_page = perf_mmap_alloc_page(cpu); | ||
| 259 | if (!rb->user_page) | ||
| 260 | goto fail_user_page; | ||
| 261 | |||
| 262 | for (i = 0; i < nr_pages; i++) { | ||
| 263 | rb->data_pages[i] = perf_mmap_alloc_page(cpu); | ||
| 264 | if (!rb->data_pages[i]) | ||
| 265 | goto fail_data_pages; | ||
| 266 | } | ||
| 267 | |||
| 268 | rb->nr_pages = nr_pages; | ||
| 269 | |||
| 270 | ring_buffer_init(rb, watermark, flags); | ||
| 271 | |||
| 272 | return rb; | ||
| 273 | |||
| 274 | fail_data_pages: | ||
| 275 | for (i--; i >= 0; i--) | ||
| 276 | free_page((unsigned long)rb->data_pages[i]); | ||
| 277 | |||
| 278 | free_page((unsigned long)rb->user_page); | ||
| 279 | |||
| 280 | fail_user_page: | ||
| 281 | kfree(rb); | ||
| 282 | |||
| 283 | fail: | ||
| 284 | return NULL; | ||
| 285 | } | ||
| 286 | |||
| 287 | static void perf_mmap_free_page(unsigned long addr) | ||
| 288 | { | ||
| 289 | struct page *page = virt_to_page((void *)addr); | ||
| 290 | |||
| 291 | page->mapping = NULL; | ||
| 292 | __free_page(page); | ||
| 293 | } | ||
| 294 | |||
| 295 | void rb_free(struct ring_buffer *rb) | ||
| 296 | { | ||
| 297 | int i; | ||
| 298 | |||
| 299 | perf_mmap_free_page((unsigned long)rb->user_page); | ||
| 300 | for (i = 0; i < rb->nr_pages; i++) | ||
| 301 | perf_mmap_free_page((unsigned long)rb->data_pages[i]); | ||
| 302 | kfree(rb); | ||
| 303 | } | ||
| 304 | |||
| 305 | #else | ||
| 306 | |||
| 307 | struct page * | ||
| 308 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | ||
| 309 | { | ||
| 310 | if (pgoff > (1UL << page_order(rb))) | ||
| 311 | return NULL; | ||
| 312 | |||
| 313 | return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); | ||
| 314 | } | ||
| 315 | |||
| 316 | static void perf_mmap_unmark_page(void *addr) | ||
| 317 | { | ||
| 318 | struct page *page = vmalloc_to_page(addr); | ||
| 319 | |||
| 320 | page->mapping = NULL; | ||
| 321 | } | ||
| 322 | |||
| 323 | static void rb_free_work(struct work_struct *work) | ||
| 324 | { | ||
| 325 | struct ring_buffer *rb; | ||
| 326 | void *base; | ||
| 327 | int i, nr; | ||
| 328 | |||
| 329 | rb = container_of(work, struct ring_buffer, work); | ||
| 330 | nr = 1 << page_order(rb); | ||
| 331 | |||
| 332 | base = rb->user_page; | ||
| 333 | for (i = 0; i < nr + 1; i++) | ||
| 334 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
| 335 | |||
| 336 | vfree(base); | ||
| 337 | kfree(rb); | ||
| 338 | } | ||
| 339 | |||
| 340 | void rb_free(struct ring_buffer *rb) | ||
| 341 | { | ||
| 342 | schedule_work(&rb->work); | ||
| 343 | } | ||
| 344 | |||
| 345 | struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | ||
| 346 | { | ||
| 347 | struct ring_buffer *rb; | ||
| 348 | unsigned long size; | ||
| 349 | void *all_buf; | ||
| 350 | |||
| 351 | size = sizeof(struct ring_buffer); | ||
| 352 | size += sizeof(void *); | ||
| 353 | |||
| 354 | rb = kzalloc(size, GFP_KERNEL); | ||
| 355 | if (!rb) | ||
| 356 | goto fail; | ||
| 357 | |||
| 358 | INIT_WORK(&rb->work, rb_free_work); | ||
| 359 | |||
| 360 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
| 361 | if (!all_buf) | ||
| 362 | goto fail_all_buf; | ||
| 363 | |||
| 364 | rb->user_page = all_buf; | ||
| 365 | rb->data_pages[0] = all_buf + PAGE_SIZE; | ||
| 366 | rb->page_order = ilog2(nr_pages); | ||
| 367 | rb->nr_pages = 1; | ||
| 368 | |||
| 369 | ring_buffer_init(rb, watermark, flags); | ||
| 370 | |||
| 371 | return rb; | ||
| 372 | |||
| 373 | fail_all_buf: | ||
| 374 | kfree(rb); | ||
| 375 | |||
| 376 | fail: | ||
| 377 | return NULL; | ||
| 378 | } | ||
| 379 | |||
| 380 | #endif | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 20a406471525..2913b3509d42 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
| 85 | struct tty_struct *uninitialized_var(tty); | 85 | struct tty_struct *uninitialized_var(tty); |
| 86 | 86 | ||
| 87 | sighand = rcu_dereference_check(tsk->sighand, | 87 | sighand = rcu_dereference_check(tsk->sighand, |
| 88 | rcu_read_lock_held() || | ||
| 89 | lockdep_tasklist_lock_is_held()); | 88 | lockdep_tasklist_lock_is_held()); |
| 90 | spin_lock(&sighand->siglock); | 89 | spin_lock(&sighand->siglock); |
| 91 | 90 | ||
| @@ -169,7 +168,6 @@ void release_task(struct task_struct * p) | |||
| 169 | struct task_struct *leader; | 168 | struct task_struct *leader; |
| 170 | int zap_leader; | 169 | int zap_leader; |
| 171 | repeat: | 170 | repeat: |
| 172 | tracehook_prepare_release_task(p); | ||
| 173 | /* don't need to get the RCU readlock here - the process is dead and | 171 | /* don't need to get the RCU readlock here - the process is dead and |
| 174 | * can't be modifying its own credentials. But shut RCU-lockdep up */ | 172 | * can't be modifying its own credentials. But shut RCU-lockdep up */ |
| 175 | rcu_read_lock(); | 173 | rcu_read_lock(); |
| @@ -179,7 +177,7 @@ repeat: | |||
| 179 | proc_flush_task(p); | 177 | proc_flush_task(p); |
| 180 | 178 | ||
| 181 | write_lock_irq(&tasklist_lock); | 179 | write_lock_irq(&tasklist_lock); |
| 182 | tracehook_finish_release_task(p); | 180 | ptrace_release_task(p); |
| 183 | __exit_signal(p); | 181 | __exit_signal(p); |
| 184 | 182 | ||
| 185 | /* | 183 | /* |
| @@ -190,22 +188,12 @@ repeat: | |||
| 190 | zap_leader = 0; | 188 | zap_leader = 0; |
| 191 | leader = p->group_leader; | 189 | leader = p->group_leader; |
| 192 | if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { | 190 | if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { |
| 193 | BUG_ON(task_detached(leader)); | ||
| 194 | do_notify_parent(leader, leader->exit_signal); | ||
| 195 | /* | 191 | /* |
| 196 | * If we were the last child thread and the leader has | 192 | * If we were the last child thread and the leader has |
| 197 | * exited already, and the leader's parent ignores SIGCHLD, | 193 | * exited already, and the leader's parent ignores SIGCHLD, |
| 198 | * then we are the one who should release the leader. | 194 | * then we are the one who should release the leader. |
| 199 | * | ||
| 200 | * do_notify_parent() will have marked it self-reaping in | ||
| 201 | * that case. | ||
| 202 | */ | ||
| 203 | zap_leader = task_detached(leader); | ||
| 204 | |||
| 205 | /* | ||
| 206 | * This maintains the invariant that release_task() | ||
| 207 | * only runs on a task in EXIT_DEAD, just for sanity. | ||
| 208 | */ | 195 | */ |
| 196 | zap_leader = do_notify_parent(leader, leader->exit_signal); | ||
| 209 | if (zap_leader) | 197 | if (zap_leader) |
| 210 | leader->exit_state = EXIT_DEAD; | 198 | leader->exit_state = EXIT_DEAD; |
| 211 | } | 199 | } |
| @@ -277,18 +265,16 @@ int is_current_pgrp_orphaned(void) | |||
| 277 | return retval; | 265 | return retval; |
| 278 | } | 266 | } |
| 279 | 267 | ||
| 280 | static int has_stopped_jobs(struct pid *pgrp) | 268 | static bool has_stopped_jobs(struct pid *pgrp) |
| 281 | { | 269 | { |
| 282 | int retval = 0; | ||
| 283 | struct task_struct *p; | 270 | struct task_struct *p; |
| 284 | 271 | ||
| 285 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 272 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { |
| 286 | if (!task_is_stopped(p)) | 273 | if (p->signal->flags & SIGNAL_STOP_STOPPED) |
| 287 | continue; | 274 | return true; |
| 288 | retval = 1; | ||
| 289 | break; | ||
| 290 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 275 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); |
| 291 | return retval; | 276 | |
| 277 | return false; | ||
| 292 | } | 278 | } |
| 293 | 279 | ||
| 294 | /* | 280 | /* |
| @@ -561,29 +547,28 @@ void exit_files(struct task_struct *tsk) | |||
| 561 | 547 | ||
| 562 | #ifdef CONFIG_MM_OWNER | 548 | #ifdef CONFIG_MM_OWNER |
| 563 | /* | 549 | /* |
| 564 | * Task p is exiting and it owned mm, lets find a new owner for it | 550 | * A task is exiting. If it owned this mm, find a new owner for the mm. |
| 565 | */ | 551 | */ |
| 566 | static inline int | ||
| 567 | mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | ||
| 568 | { | ||
| 569 | /* | ||
| 570 | * If there are other users of the mm and the owner (us) is exiting | ||
| 571 | * we need to find a new owner to take on the responsibility. | ||
| 572 | */ | ||
| 573 | if (atomic_read(&mm->mm_users) <= 1) | ||
| 574 | return 0; | ||
| 575 | if (mm->owner != p) | ||
| 576 | return 0; | ||
| 577 | return 1; | ||
| 578 | } | ||
| 579 | |||
| 580 | void mm_update_next_owner(struct mm_struct *mm) | 552 | void mm_update_next_owner(struct mm_struct *mm) |
| 581 | { | 553 | { |
| 582 | struct task_struct *c, *g, *p = current; | 554 | struct task_struct *c, *g, *p = current; |
| 583 | 555 | ||
| 584 | retry: | 556 | retry: |
| 585 | if (!mm_need_new_owner(mm, p)) | 557 | /* |
| 558 | * If the exiting or execing task is not the owner, it's | ||
| 559 | * someone else's problem. | ||
| 560 | */ | ||
| 561 | if (mm->owner != p) | ||
| 586 | return; | 562 | return; |
| 563 | /* | ||
| 564 | * The current owner is exiting/execing and there are no other | ||
| 565 | * candidates. Do not leave the mm pointing to a possibly | ||
| 566 | * freed task structure. | ||
| 567 | */ | ||
| 568 | if (atomic_read(&mm->mm_users) <= 1) { | ||
| 569 | mm->owner = NULL; | ||
| 570 | return; | ||
| 571 | } | ||
| 587 | 572 | ||
| 588 | read_lock(&tasklist_lock); | 573 | read_lock(&tasklist_lock); |
| 589 | /* | 574 | /* |
| @@ -752,7 +737,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, | |||
| 752 | { | 737 | { |
| 753 | list_move_tail(&p->sibling, &p->real_parent->children); | 738 | list_move_tail(&p->sibling, &p->real_parent->children); |
| 754 | 739 | ||
| 755 | if (task_detached(p)) | 740 | if (p->exit_state == EXIT_DEAD) |
| 756 | return; | 741 | return; |
| 757 | /* | 742 | /* |
| 758 | * If this is a threaded reparent there is no need to | 743 | * If this is a threaded reparent there is no need to |
| @@ -765,10 +750,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, | |||
| 765 | p->exit_signal = SIGCHLD; | 750 | p->exit_signal = SIGCHLD; |
| 766 | 751 | ||
| 767 | /* If it has exited notify the new parent about this child's death. */ | 752 | /* If it has exited notify the new parent about this child's death. */ |
| 768 | if (!task_ptrace(p) && | 753 | if (!p->ptrace && |
| 769 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { | 754 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { |
| 770 | do_notify_parent(p, p->exit_signal); | 755 | if (do_notify_parent(p, p->exit_signal)) { |
| 771 | if (task_detached(p)) { | ||
| 772 | p->exit_state = EXIT_DEAD; | 756 | p->exit_state = EXIT_DEAD; |
| 773 | list_move_tail(&p->sibling, dead); | 757 | list_move_tail(&p->sibling, dead); |
| 774 | } | 758 | } |
| @@ -795,7 +779,7 @@ static void forget_original_parent(struct task_struct *father) | |||
| 795 | do { | 779 | do { |
| 796 | t->real_parent = reaper; | 780 | t->real_parent = reaper; |
| 797 | if (t->parent == father) { | 781 | if (t->parent == father) { |
| 798 | BUG_ON(task_ptrace(t)); | 782 | BUG_ON(t->ptrace); |
| 799 | t->parent = t->real_parent; | 783 | t->parent = t->real_parent; |
| 800 | } | 784 | } |
| 801 | if (t->pdeath_signal) | 785 | if (t->pdeath_signal) |
| @@ -820,8 +804,7 @@ static void forget_original_parent(struct task_struct *father) | |||
| 820 | */ | 804 | */ |
| 821 | static void exit_notify(struct task_struct *tsk, int group_dead) | 805 | static void exit_notify(struct task_struct *tsk, int group_dead) |
| 822 | { | 806 | { |
| 823 | int signal; | 807 | bool autoreap; |
| 824 | void *cookie; | ||
| 825 | 808 | ||
| 826 | /* | 809 | /* |
| 827 | * This does two things: | 810 | * This does two things: |
| @@ -852,26 +835,33 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
| 852 | * we have changed execution domain as these two values started | 835 | * we have changed execution domain as these two values started |
| 853 | * the same after a fork. | 836 | * the same after a fork. |
| 854 | */ | 837 | */ |
| 855 | if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && | 838 | if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && |
| 856 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || | 839 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || |
| 857 | tsk->self_exec_id != tsk->parent_exec_id)) | 840 | tsk->self_exec_id != tsk->parent_exec_id)) |
| 858 | tsk->exit_signal = SIGCHLD; | 841 | tsk->exit_signal = SIGCHLD; |
| 859 | 842 | ||
| 860 | signal = tracehook_notify_death(tsk, &cookie, group_dead); | 843 | if (unlikely(tsk->ptrace)) { |
| 861 | if (signal >= 0) | 844 | int sig = thread_group_leader(tsk) && |
| 862 | signal = do_notify_parent(tsk, signal); | 845 | thread_group_empty(tsk) && |
| 846 | !ptrace_reparented(tsk) ? | ||
| 847 | tsk->exit_signal : SIGCHLD; | ||
| 848 | autoreap = do_notify_parent(tsk, sig); | ||
| 849 | } else if (thread_group_leader(tsk)) { | ||
| 850 | autoreap = thread_group_empty(tsk) && | ||
| 851 | do_notify_parent(tsk, tsk->exit_signal); | ||
| 852 | } else { | ||
| 853 | autoreap = true; | ||
| 854 | } | ||
| 863 | 855 | ||
| 864 | tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; | 856 | tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; |
| 865 | 857 | ||
| 866 | /* mt-exec, de_thread() is waiting for group leader */ | 858 | /* mt-exec, de_thread() is waiting for group leader */ |
| 867 | if (unlikely(tsk->signal->notify_count < 0)) | 859 | if (unlikely(tsk->signal->notify_count < 0)) |
| 868 | wake_up_process(tsk->signal->group_exit_task); | 860 | wake_up_process(tsk->signal->group_exit_task); |
| 869 | write_unlock_irq(&tasklist_lock); | 861 | write_unlock_irq(&tasklist_lock); |
| 870 | 862 | ||
| 871 | tracehook_report_death(tsk, signal, cookie, group_dead); | ||
| 872 | |||
| 873 | /* If the process is dead, release it - nobody will wait for it */ | 863 | /* If the process is dead, release it - nobody will wait for it */ |
| 874 | if (signal == DEATH_REAP) | 864 | if (autoreap) |
| 875 | release_task(tsk); | 865 | release_task(tsk); |
| 876 | } | 866 | } |
| 877 | 867 | ||
| @@ -907,7 +897,6 @@ NORET_TYPE void do_exit(long code) | |||
| 907 | 897 | ||
| 908 | profile_task_exit(tsk); | 898 | profile_task_exit(tsk); |
| 909 | 899 | ||
| 910 | WARN_ON(atomic_read(&tsk->fs_excl)); | ||
| 911 | WARN_ON(blk_needs_flush_plug(tsk)); | 900 | WARN_ON(blk_needs_flush_plug(tsk)); |
| 912 | 901 | ||
| 913 | if (unlikely(in_interrupt())) | 902 | if (unlikely(in_interrupt())) |
| @@ -924,7 +913,7 @@ NORET_TYPE void do_exit(long code) | |||
| 924 | */ | 913 | */ |
| 925 | set_fs(USER_DS); | 914 | set_fs(USER_DS); |
| 926 | 915 | ||
| 927 | tracehook_report_exit(&code); | 916 | ptrace_event(PTRACE_EVENT_EXIT, code); |
| 928 | 917 | ||
| 929 | validate_creds_for_do_exit(tsk); | 918 | validate_creds_for_do_exit(tsk); |
| 930 | 919 | ||
| @@ -991,6 +980,7 @@ NORET_TYPE void do_exit(long code) | |||
| 991 | trace_sched_process_exit(tsk); | 980 | trace_sched_process_exit(tsk); |
| 992 | 981 | ||
| 993 | exit_sem(tsk); | 982 | exit_sem(tsk); |
| 983 | exit_shm(tsk); | ||
| 994 | exit_files(tsk); | 984 | exit_files(tsk); |
| 995 | exit_fs(tsk); | 985 | exit_fs(tsk); |
| 996 | check_stack_usage(); | 986 | check_stack_usage(); |
| @@ -1236,9 +1226,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
| 1236 | traced = ptrace_reparented(p); | 1226 | traced = ptrace_reparented(p); |
| 1237 | /* | 1227 | /* |
| 1238 | * It can be ptraced but not reparented, check | 1228 | * It can be ptraced but not reparented, check |
| 1239 | * !task_detached() to filter out sub-threads. | 1229 | * thread_group_leader() to filter out sub-threads. |
| 1240 | */ | 1230 | */ |
| 1241 | if (likely(!traced) && likely(!task_detached(p))) { | 1231 | if (likely(!traced) && thread_group_leader(p)) { |
| 1242 | struct signal_struct *psig; | 1232 | struct signal_struct *psig; |
| 1243 | struct signal_struct *sig; | 1233 | struct signal_struct *sig; |
| 1244 | unsigned long maxrss; | 1234 | unsigned long maxrss; |
| @@ -1346,16 +1336,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
| 1346 | /* We dropped tasklist, ptracer could die and untrace */ | 1336 | /* We dropped tasklist, ptracer could die and untrace */ |
| 1347 | ptrace_unlink(p); | 1337 | ptrace_unlink(p); |
| 1348 | /* | 1338 | /* |
| 1349 | * If this is not a detached task, notify the parent. | 1339 | * If this is not a sub-thread, notify the parent. |
| 1350 | * If it's still not detached after that, don't release | 1340 | * If parent wants a zombie, don't release it now. |
| 1351 | * it now. | ||
| 1352 | */ | 1341 | */ |
| 1353 | if (!task_detached(p)) { | 1342 | if (thread_group_leader(p) && |
| 1354 | do_notify_parent(p, p->exit_signal); | 1343 | !do_notify_parent(p, p->exit_signal)) { |
| 1355 | if (!task_detached(p)) { | 1344 | p->exit_state = EXIT_ZOMBIE; |
| 1356 | p->exit_state = EXIT_ZOMBIE; | 1345 | p = NULL; |
| 1357 | p = NULL; | ||
| 1358 | } | ||
| 1359 | } | 1346 | } |
| 1360 | write_unlock_irq(&tasklist_lock); | 1347 | write_unlock_irq(&tasklist_lock); |
| 1361 | } | 1348 | } |
| @@ -1368,7 +1355,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
| 1368 | static int *task_stopped_code(struct task_struct *p, bool ptrace) | 1355 | static int *task_stopped_code(struct task_struct *p, bool ptrace) |
| 1369 | { | 1356 | { |
| 1370 | if (ptrace) { | 1357 | if (ptrace) { |
| 1371 | if (task_is_stopped_or_traced(p)) | 1358 | if (task_is_stopped_or_traced(p) && |
| 1359 | !(p->jobctl & JOBCTL_LISTENING)) | ||
| 1372 | return &p->exit_code; | 1360 | return &p->exit_code; |
| 1373 | } else { | 1361 | } else { |
| 1374 | if (p->signal->flags & SIGNAL_STOP_STOPPED) | 1362 | if (p->signal->flags & SIGNAL_STOP_STOPPED) |
| @@ -1564,7 +1552,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, | |||
| 1564 | * Notification and reaping will be cascaded to the real | 1552 | * Notification and reaping will be cascaded to the real |
| 1565 | * parent when the ptracer detaches. | 1553 | * parent when the ptracer detaches. |
| 1566 | */ | 1554 | */ |
| 1567 | if (likely(!ptrace) && unlikely(task_ptrace(p))) { | 1555 | if (likely(!ptrace) && unlikely(p->ptrace)) { |
| 1568 | /* it will become visible, clear notask_error */ | 1556 | /* it will become visible, clear notask_error */ |
| 1569 | wo->notask_error = 0; | 1557 | wo->notask_error = 0; |
| 1570 | return 0; | 1558 | return 0; |
| @@ -1607,8 +1595,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, | |||
| 1607 | * own children, it should create a separate process which | 1595 | * own children, it should create a separate process which |
| 1608 | * takes the role of real parent. | 1596 | * takes the role of real parent. |
| 1609 | */ | 1597 | */ |
| 1610 | if (likely(!ptrace) && task_ptrace(p) && | 1598 | if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p)) |
| 1611 | same_thread_group(p->parent, p->real_parent)) | ||
| 1612 | return 0; | 1599 | return 0; |
| 1613 | 1600 | ||
| 1614 | /* | 1601 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 0276c30401a0..e7ceaca89609 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include <linux/swap.h> | 37 | #include <linux/swap.h> |
| 38 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
| 39 | #include <linux/jiffies.h> | 39 | #include <linux/jiffies.h> |
| 40 | #include <linux/tracehook.h> | ||
| 41 | #include <linux/futex.h> | 40 | #include <linux/futex.h> |
| 42 | #include <linux/compat.h> | 41 | #include <linux/compat.h> |
| 43 | #include <linux/kthread.h> | 42 | #include <linux/kthread.h> |
| @@ -81,7 +80,7 @@ | |||
| 81 | * Protected counters by write_lock_irq(&tasklist_lock) | 80 | * Protected counters by write_lock_irq(&tasklist_lock) |
| 82 | */ | 81 | */ |
| 83 | unsigned long total_forks; /* Handle normal Linux uptimes. */ | 82 | unsigned long total_forks; /* Handle normal Linux uptimes. */ |
| 84 | int nr_threads; /* The idle threads do not count.. */ | 83 | int nr_threads; /* The idle threads do not count.. */ |
| 85 | 84 | ||
| 86 | int max_threads; /* tunable limit on nr_threads */ | 85 | int max_threads; /* tunable limit on nr_threads */ |
| 87 | 86 | ||
| @@ -233,7 +232,7 @@ void __init fork_init(unsigned long mempages) | |||
| 233 | /* | 232 | /* |
| 234 | * we need to allow at least 20 threads to boot a system | 233 | * we need to allow at least 20 threads to boot a system |
| 235 | */ | 234 | */ |
| 236 | if(max_threads < 20) | 235 | if (max_threads < 20) |
| 237 | max_threads = 20; | 236 | max_threads = 20; |
| 238 | 237 | ||
| 239 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; | 238 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; |
| @@ -269,7 +268,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 269 | return NULL; | 268 | return NULL; |
| 270 | } | 269 | } |
| 271 | 270 | ||
| 272 | err = arch_dup_task_struct(tsk, orig); | 271 | err = arch_dup_task_struct(tsk, orig); |
| 273 | if (err) | 272 | if (err) |
| 274 | goto out; | 273 | goto out; |
| 275 | 274 | ||
| @@ -289,9 +288,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 289 | tsk->stack_canary = get_random_int(); | 288 | tsk->stack_canary = get_random_int(); |
| 290 | #endif | 289 | #endif |
| 291 | 290 | ||
| 292 | /* One for us, one for whoever does the "release_task()" (usually parent) */ | 291 | /* |
| 293 | atomic_set(&tsk->usage,2); | 292 | * One for us, one for whoever does the "release_task()" (usually |
| 294 | atomic_set(&tsk->fs_excl, 0); | 293 | * parent) |
| 294 | */ | ||
| 295 | atomic_set(&tsk->usage, 2); | ||
| 295 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 296 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
| 296 | tsk->btrace_seq = 0; | 297 | tsk->btrace_seq = 0; |
| 297 | #endif | 298 | #endif |
| @@ -439,7 +440,7 @@ fail_nomem: | |||
| 439 | goto out; | 440 | goto out; |
| 440 | } | 441 | } |
| 441 | 442 | ||
| 442 | static inline int mm_alloc_pgd(struct mm_struct * mm) | 443 | static inline int mm_alloc_pgd(struct mm_struct *mm) |
| 443 | { | 444 | { |
| 444 | mm->pgd = pgd_alloc(mm); | 445 | mm->pgd = pgd_alloc(mm); |
| 445 | if (unlikely(!mm->pgd)) | 446 | if (unlikely(!mm->pgd)) |
| @@ -447,7 +448,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm) | |||
| 447 | return 0; | 448 | return 0; |
| 448 | } | 449 | } |
| 449 | 450 | ||
| 450 | static inline void mm_free_pgd(struct mm_struct * mm) | 451 | static inline void mm_free_pgd(struct mm_struct *mm) |
| 451 | { | 452 | { |
| 452 | pgd_free(mm, mm->pgd); | 453 | pgd_free(mm, mm->pgd); |
| 453 | } | 454 | } |
| @@ -484,7 +485,7 @@ static void mm_init_aio(struct mm_struct *mm) | |||
| 484 | #endif | 485 | #endif |
| 485 | } | 486 | } |
| 486 | 487 | ||
| 487 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 488 | static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) |
| 488 | { | 489 | { |
| 489 | atomic_set(&mm->mm_users, 1); | 490 | atomic_set(&mm->mm_users, 1); |
| 490 | atomic_set(&mm->mm_count, 1); | 491 | atomic_set(&mm->mm_count, 1); |
| @@ -515,9 +516,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
| 515 | /* | 516 | /* |
| 516 | * Allocate and initialize an mm_struct. | 517 | * Allocate and initialize an mm_struct. |
| 517 | */ | 518 | */ |
| 518 | struct mm_struct * mm_alloc(void) | 519 | struct mm_struct *mm_alloc(void) |
| 519 | { | 520 | { |
| 520 | struct mm_struct * mm; | 521 | struct mm_struct *mm; |
| 521 | 522 | ||
| 522 | mm = allocate_mm(); | 523 | mm = allocate_mm(); |
| 523 | if (!mm) | 524 | if (!mm) |
| @@ -585,7 +586,7 @@ void added_exe_file_vma(struct mm_struct *mm) | |||
| 585 | void removed_exe_file_vma(struct mm_struct *mm) | 586 | void removed_exe_file_vma(struct mm_struct *mm) |
| 586 | { | 587 | { |
| 587 | mm->num_exe_file_vmas--; | 588 | mm->num_exe_file_vmas--; |
| 588 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ | 589 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file) { |
| 589 | fput(mm->exe_file); | 590 | fput(mm->exe_file); |
| 590 | mm->exe_file = NULL; | 591 | mm->exe_file = NULL; |
| 591 | } | 592 | } |
| @@ -777,9 +778,9 @@ fail_nocontext: | |||
| 777 | return NULL; | 778 | return NULL; |
| 778 | } | 779 | } |
| 779 | 780 | ||
| 780 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | 781 | static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) |
| 781 | { | 782 | { |
| 782 | struct mm_struct * mm, *oldmm; | 783 | struct mm_struct *mm, *oldmm; |
| 783 | int retval; | 784 | int retval; |
| 784 | 785 | ||
| 785 | tsk->min_flt = tsk->maj_flt = 0; | 786 | tsk->min_flt = tsk->maj_flt = 0; |
| @@ -846,7 +847,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) | |||
| 846 | return 0; | 847 | return 0; |
| 847 | } | 848 | } |
| 848 | 849 | ||
| 849 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 850 | static int copy_files(unsigned long clone_flags, struct task_struct *tsk) |
| 850 | { | 851 | { |
| 851 | struct files_struct *oldf, *newf; | 852 | struct files_struct *oldf, *newf; |
| 852 | int error = 0; | 853 | int error = 0; |
| @@ -1013,7 +1014,7 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
| 1013 | { | 1014 | { |
| 1014 | raw_spin_lock_init(&p->pi_lock); | 1015 | raw_spin_lock_init(&p->pi_lock); |
| 1015 | #ifdef CONFIG_RT_MUTEXES | 1016 | #ifdef CONFIG_RT_MUTEXES |
| 1016 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); | 1017 | plist_head_init(&p->pi_waiters); |
| 1017 | p->pi_blocked_on = NULL; | 1018 | p->pi_blocked_on = NULL; |
| 1018 | #endif | 1019 | #endif |
| 1019 | } | 1020 | } |
| @@ -1168,13 +1169,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1168 | cgroup_fork(p); | 1169 | cgroup_fork(p); |
| 1169 | #ifdef CONFIG_NUMA | 1170 | #ifdef CONFIG_NUMA |
| 1170 | p->mempolicy = mpol_dup(p->mempolicy); | 1171 | p->mempolicy = mpol_dup(p->mempolicy); |
| 1171 | if (IS_ERR(p->mempolicy)) { | 1172 | if (IS_ERR(p->mempolicy)) { |
| 1172 | retval = PTR_ERR(p->mempolicy); | 1173 | retval = PTR_ERR(p->mempolicy); |
| 1173 | p->mempolicy = NULL; | 1174 | p->mempolicy = NULL; |
| 1174 | goto bad_fork_cleanup_cgroup; | 1175 | goto bad_fork_cleanup_cgroup; |
| 1175 | } | 1176 | } |
| 1176 | mpol_fix_fork_child_flag(p); | 1177 | mpol_fix_fork_child_flag(p); |
| 1177 | #endif | 1178 | #endif |
| 1179 | #ifdef CONFIG_CPUSETS | ||
| 1180 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; | ||
| 1181 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; | ||
| 1182 | #endif | ||
| 1178 | #ifdef CONFIG_TRACE_IRQFLAGS | 1183 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 1179 | p->irq_events = 0; | 1184 | p->irq_events = 0; |
| 1180 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 1185 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
| @@ -1214,25 +1219,33 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1214 | retval = perf_event_init_task(p); | 1219 | retval = perf_event_init_task(p); |
| 1215 | if (retval) | 1220 | if (retval) |
| 1216 | goto bad_fork_cleanup_policy; | 1221 | goto bad_fork_cleanup_policy; |
| 1217 | 1222 | retval = audit_alloc(p); | |
| 1218 | if ((retval = audit_alloc(p))) | 1223 | if (retval) |
| 1219 | goto bad_fork_cleanup_policy; | 1224 | goto bad_fork_cleanup_policy; |
| 1220 | /* copy all the process information */ | 1225 | /* copy all the process information */ |
| 1221 | if ((retval = copy_semundo(clone_flags, p))) | 1226 | retval = copy_semundo(clone_flags, p); |
| 1227 | if (retval) | ||
| 1222 | goto bad_fork_cleanup_audit; | 1228 | goto bad_fork_cleanup_audit; |
| 1223 | if ((retval = copy_files(clone_flags, p))) | 1229 | retval = copy_files(clone_flags, p); |
| 1230 | if (retval) | ||
| 1224 | goto bad_fork_cleanup_semundo; | 1231 | goto bad_fork_cleanup_semundo; |
| 1225 | if ((retval = copy_fs(clone_flags, p))) | 1232 | retval = copy_fs(clone_flags, p); |
| 1233 | if (retval) | ||
| 1226 | goto bad_fork_cleanup_files; | 1234 | goto bad_fork_cleanup_files; |
| 1227 | if ((retval = copy_sighand(clone_flags, p))) | 1235 | retval = copy_sighand(clone_flags, p); |
| 1236 | if (retval) | ||
| 1228 | goto bad_fork_cleanup_fs; | 1237 | goto bad_fork_cleanup_fs; |
| 1229 | if ((retval = copy_signal(clone_flags, p))) | 1238 | retval = copy_signal(clone_flags, p); |
| 1239 | if (retval) | ||
| 1230 | goto bad_fork_cleanup_sighand; | 1240 | goto bad_fork_cleanup_sighand; |
| 1231 | if ((retval = copy_mm(clone_flags, p))) | 1241 | retval = copy_mm(clone_flags, p); |
| 1242 | if (retval) | ||
| 1232 | goto bad_fork_cleanup_signal; | 1243 | goto bad_fork_cleanup_signal; |
| 1233 | if ((retval = copy_namespaces(clone_flags, p))) | 1244 | retval = copy_namespaces(clone_flags, p); |
| 1245 | if (retval) | ||
| 1234 | goto bad_fork_cleanup_mm; | 1246 | goto bad_fork_cleanup_mm; |
| 1235 | if ((retval = copy_io(clone_flags, p))) | 1247 | retval = copy_io(clone_flags, p); |
| 1248 | if (retval) | ||
| 1236 | goto bad_fork_cleanup_namespaces; | 1249 | goto bad_fork_cleanup_namespaces; |
| 1237 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); | 1250 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); |
| 1238 | if (retval) | 1251 | if (retval) |
| @@ -1254,7 +1267,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1254 | /* | 1267 | /* |
| 1255 | * Clear TID on mm_release()? | 1268 | * Clear TID on mm_release()? |
| 1256 | */ | 1269 | */ |
| 1257 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1270 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; |
| 1258 | #ifdef CONFIG_BLOCK | 1271 | #ifdef CONFIG_BLOCK |
| 1259 | p->plug = NULL; | 1272 | p->plug = NULL; |
| 1260 | #endif | 1273 | #endif |
| @@ -1322,7 +1335,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1322 | * it's process group. | 1335 | * it's process group. |
| 1323 | * A fatal signal pending means that current will exit, so the new | 1336 | * A fatal signal pending means that current will exit, so the new |
| 1324 | * thread can't slip out of an OOM kill (or normal SIGKILL). | 1337 | * thread can't slip out of an OOM kill (or normal SIGKILL). |
| 1325 | */ | 1338 | */ |
| 1326 | recalc_sigpending(); | 1339 | recalc_sigpending(); |
| 1327 | if (signal_pending(current)) { | 1340 | if (signal_pending(current)) { |
| 1328 | spin_unlock(¤t->sighand->siglock); | 1341 | spin_unlock(¤t->sighand->siglock); |
| @@ -1340,7 +1353,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1340 | } | 1353 | } |
| 1341 | 1354 | ||
| 1342 | if (likely(p->pid)) { | 1355 | if (likely(p->pid)) { |
| 1343 | tracehook_finish_clone(p, clone_flags, trace); | 1356 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
| 1344 | 1357 | ||
| 1345 | if (thread_group_leader(p)) { | 1358 | if (thread_group_leader(p)) { |
| 1346 | if (is_child_reaper(pid)) | 1359 | if (is_child_reaper(pid)) |
| @@ -1481,10 +1494,22 @@ long do_fork(unsigned long clone_flags, | |||
| 1481 | } | 1494 | } |
| 1482 | 1495 | ||
| 1483 | /* | 1496 | /* |
| 1484 | * When called from kernel_thread, don't do user tracing stuff. | 1497 | * Determine whether and which event to report to ptracer. When |
| 1498 | * called from kernel_thread or CLONE_UNTRACED is explicitly | ||
| 1499 | * requested, no event is reported; otherwise, report if the event | ||
| 1500 | * for the type of forking is enabled. | ||
| 1485 | */ | 1501 | */ |
| 1486 | if (likely(user_mode(regs))) | 1502 | if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { |
| 1487 | trace = tracehook_prepare_clone(clone_flags); | 1503 | if (clone_flags & CLONE_VFORK) |
| 1504 | trace = PTRACE_EVENT_VFORK; | ||
| 1505 | else if ((clone_flags & CSIGNAL) != SIGCHLD) | ||
| 1506 | trace = PTRACE_EVENT_CLONE; | ||
| 1507 | else | ||
| 1508 | trace = PTRACE_EVENT_FORK; | ||
| 1509 | |||
| 1510 | if (likely(!ptrace_event_enabled(current, trace))) | ||
| 1511 | trace = 0; | ||
| 1512 | } | ||
| 1488 | 1513 | ||
| 1489 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1514 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
| 1490 | child_tidptr, NULL, trace); | 1515 | child_tidptr, NULL, trace); |
| @@ -1508,26 +1533,26 @@ long do_fork(unsigned long clone_flags, | |||
| 1508 | } | 1533 | } |
| 1509 | 1534 | ||
| 1510 | audit_finish_fork(p); | 1535 | audit_finish_fork(p); |
| 1511 | tracehook_report_clone(regs, clone_flags, nr, p); | ||
| 1512 | 1536 | ||
| 1513 | /* | 1537 | /* |
| 1514 | * We set PF_STARTING at creation in case tracing wants to | 1538 | * We set PF_STARTING at creation in case tracing wants to |
| 1515 | * use this to distinguish a fully live task from one that | 1539 | * use this to distinguish a fully live task from one that |
| 1516 | * hasn't gotten to tracehook_report_clone() yet. Now we | 1540 | * hasn't finished SIGSTOP raising yet. Now we clear it |
| 1517 | * clear it and set the child going. | 1541 | * and set the child going. |
| 1518 | */ | 1542 | */ |
| 1519 | p->flags &= ~PF_STARTING; | 1543 | p->flags &= ~PF_STARTING; |
| 1520 | 1544 | ||
| 1521 | wake_up_new_task(p); | 1545 | wake_up_new_task(p); |
| 1522 | 1546 | ||
| 1523 | tracehook_report_clone_complete(trace, regs, | 1547 | /* forking complete and child started to run, tell ptracer */ |
| 1524 | clone_flags, nr, p); | 1548 | if (unlikely(trace)) |
| 1549 | ptrace_event(trace, nr); | ||
| 1525 | 1550 | ||
| 1526 | if (clone_flags & CLONE_VFORK) { | 1551 | if (clone_flags & CLONE_VFORK) { |
| 1527 | freezer_do_not_count(); | 1552 | freezer_do_not_count(); |
| 1528 | wait_for_completion(&vfork); | 1553 | wait_for_completion(&vfork); |
| 1529 | freezer_count(); | 1554 | freezer_count(); |
| 1530 | tracehook_report_vfork_done(p, nr); | 1555 | ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); |
| 1531 | } | 1556 | } |
| 1532 | } else { | 1557 | } else { |
| 1533 | nr = PTR_ERR(p); | 1558 | nr = PTR_ERR(p); |
| @@ -1574,6 +1599,7 @@ void __init proc_caches_init(void) | |||
| 1574 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1599 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
| 1575 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); | 1600 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); |
| 1576 | mmap_init(); | 1601 | mmap_init(); |
| 1602 | nsproxy_cache_init(); | ||
| 1577 | } | 1603 | } |
| 1578 | 1604 | ||
| 1579 | /* | 1605 | /* |
| @@ -1670,12 +1696,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
| 1670 | */ | 1696 | */ |
| 1671 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | 1697 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
| 1672 | do_sysvsem = 1; | 1698 | do_sysvsem = 1; |
| 1673 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1699 | err = unshare_fs(unshare_flags, &new_fs); |
| 1700 | if (err) | ||
| 1674 | goto bad_unshare_out; | 1701 | goto bad_unshare_out; |
| 1675 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1702 | err = unshare_fd(unshare_flags, &new_fd); |
| 1703 | if (err) | ||
| 1676 | goto bad_unshare_cleanup_fs; | 1704 | goto bad_unshare_cleanup_fs; |
| 1677 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1705 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); |
| 1678 | new_fs))) | 1706 | if (err) |
| 1679 | goto bad_unshare_cleanup_fd; | 1707 | goto bad_unshare_cleanup_fd; |
| 1680 | 1708 | ||
| 1681 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { | 1709 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
diff --git a/kernel/futex.c b/kernel/futex.c index fe28dc282eae..11cbe052b2e8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -218,6 +218,8 @@ static void drop_futex_key_refs(union futex_key *key) | |||
| 218 | * @uaddr: virtual address of the futex | 218 | * @uaddr: virtual address of the futex |
| 219 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 219 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
| 220 | * @key: address where result is stored. | 220 | * @key: address where result is stored. |
| 221 | * @rw: mapping needs to be read/write (values: VERIFY_READ, | ||
| 222 | * VERIFY_WRITE) | ||
| 221 | * | 223 | * |
| 222 | * Returns a negative error code or 0 | 224 | * Returns a negative error code or 0 |
| 223 | * The key words are stored in *key on success. | 225 | * The key words are stored in *key on success. |
| @@ -229,12 +231,12 @@ static void drop_futex_key_refs(union futex_key *key) | |||
| 229 | * lock_page() might sleep, the caller should not hold a spinlock. | 231 | * lock_page() might sleep, the caller should not hold a spinlock. |
| 230 | */ | 232 | */ |
| 231 | static int | 233 | static int |
| 232 | get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) | 234 | get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) |
| 233 | { | 235 | { |
| 234 | unsigned long address = (unsigned long)uaddr; | 236 | unsigned long address = (unsigned long)uaddr; |
| 235 | struct mm_struct *mm = current->mm; | 237 | struct mm_struct *mm = current->mm; |
| 236 | struct page *page, *page_head; | 238 | struct page *page, *page_head; |
| 237 | int err; | 239 | int err, ro = 0; |
| 238 | 240 | ||
| 239 | /* | 241 | /* |
| 240 | * The futex address must be "naturally" aligned. | 242 | * The futex address must be "naturally" aligned. |
| @@ -262,8 +264,18 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) | |||
| 262 | 264 | ||
| 263 | again: | 265 | again: |
| 264 | err = get_user_pages_fast(address, 1, 1, &page); | 266 | err = get_user_pages_fast(address, 1, 1, &page); |
| 267 | /* | ||
| 268 | * If write access is not required (eg. FUTEX_WAIT), try | ||
| 269 | * and get read-only access. | ||
| 270 | */ | ||
| 271 | if (err == -EFAULT && rw == VERIFY_READ) { | ||
| 272 | err = get_user_pages_fast(address, 1, 0, &page); | ||
| 273 | ro = 1; | ||
| 274 | } | ||
| 265 | if (err < 0) | 275 | if (err < 0) |
| 266 | return err; | 276 | return err; |
| 277 | else | ||
| 278 | err = 0; | ||
| 267 | 279 | ||
| 268 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 280 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 269 | page_head = page; | 281 | page_head = page; |
| @@ -305,6 +317,13 @@ again: | |||
| 305 | if (!page_head->mapping) { | 317 | if (!page_head->mapping) { |
| 306 | unlock_page(page_head); | 318 | unlock_page(page_head); |
| 307 | put_page(page_head); | 319 | put_page(page_head); |
| 320 | /* | ||
| 321 | * ZERO_PAGE pages don't have a mapping. Avoid a busy loop | ||
| 322 | * trying to find one. RW mapping would have COW'd (and thus | ||
| 323 | * have a mapping) so this page is RO and won't ever change. | ||
| 324 | */ | ||
| 325 | if ((page_head == ZERO_PAGE(address))) | ||
| 326 | return -EFAULT; | ||
| 308 | goto again; | 327 | goto again; |
| 309 | } | 328 | } |
| 310 | 329 | ||
| @@ -316,6 +335,15 @@ again: | |||
| 316 | * the object not the particular process. | 335 | * the object not the particular process. |
| 317 | */ | 336 | */ |
| 318 | if (PageAnon(page_head)) { | 337 | if (PageAnon(page_head)) { |
| 338 | /* | ||
| 339 | * A RO anonymous page will never change and thus doesn't make | ||
| 340 | * sense for futex operations. | ||
| 341 | */ | ||
| 342 | if (ro) { | ||
| 343 | err = -EFAULT; | ||
| 344 | goto out; | ||
| 345 | } | ||
| 346 | |||
| 319 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ | 347 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ |
| 320 | key->private.mm = mm; | 348 | key->private.mm = mm; |
| 321 | key->private.address = address; | 349 | key->private.address = address; |
| @@ -327,9 +355,10 @@ again: | |||
| 327 | 355 | ||
| 328 | get_futex_key_refs(key); | 356 | get_futex_key_refs(key); |
| 329 | 357 | ||
| 358 | out: | ||
| 330 | unlock_page(page_head); | 359 | unlock_page(page_head); |
| 331 | put_page(page_head); | 360 | put_page(page_head); |
| 332 | return 0; | 361 | return err; |
| 333 | } | 362 | } |
| 334 | 363 | ||
| 335 | static inline void put_futex_key(union futex_key *key) | 364 | static inline void put_futex_key(union futex_key *key) |
| @@ -355,8 +384,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) | |||
| 355 | int ret; | 384 | int ret; |
| 356 | 385 | ||
| 357 | down_read(&mm->mmap_sem); | 386 | down_read(&mm->mmap_sem); |
| 358 | ret = get_user_pages(current, mm, (unsigned long)uaddr, | 387 | ret = fixup_user_fault(current, mm, (unsigned long)uaddr, |
| 359 | 1, 1, 0, NULL, NULL); | 388 | FAULT_FLAG_WRITE); |
| 360 | up_read(&mm->mmap_sem); | 389 | up_read(&mm->mmap_sem); |
| 361 | 390 | ||
| 362 | return ret < 0 ? ret : 0; | 391 | return ret < 0 ? ret : 0; |
| @@ -940,7 +969,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) | |||
| 940 | if (!bitset) | 969 | if (!bitset) |
| 941 | return -EINVAL; | 970 | return -EINVAL; |
| 942 | 971 | ||
| 943 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); | 972 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ); |
| 944 | if (unlikely(ret != 0)) | 973 | if (unlikely(ret != 0)) |
| 945 | goto out; | 974 | goto out; |
| 946 | 975 | ||
| @@ -986,10 +1015,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, | |||
| 986 | int ret, op_ret; | 1015 | int ret, op_ret; |
| 987 | 1016 | ||
| 988 | retry: | 1017 | retry: |
| 989 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); | 1018 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); |
| 990 | if (unlikely(ret != 0)) | 1019 | if (unlikely(ret != 0)) |
| 991 | goto out; | 1020 | goto out; |
| 992 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); | 1021 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); |
| 993 | if (unlikely(ret != 0)) | 1022 | if (unlikely(ret != 0)) |
| 994 | goto out_put_key1; | 1023 | goto out_put_key1; |
| 995 | 1024 | ||
| @@ -1243,10 +1272,11 @@ retry: | |||
| 1243 | pi_state = NULL; | 1272 | pi_state = NULL; |
| 1244 | } | 1273 | } |
| 1245 | 1274 | ||
| 1246 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); | 1275 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); |
| 1247 | if (unlikely(ret != 0)) | 1276 | if (unlikely(ret != 0)) |
| 1248 | goto out; | 1277 | goto out; |
| 1249 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); | 1278 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, |
| 1279 | requeue_pi ? VERIFY_WRITE : VERIFY_READ); | ||
| 1250 | if (unlikely(ret != 0)) | 1280 | if (unlikely(ret != 0)) |
| 1251 | goto out_put_key1; | 1281 | goto out_put_key1; |
| 1252 | 1282 | ||
| @@ -1790,7 +1820,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, | |||
| 1790 | * while the syscall executes. | 1820 | * while the syscall executes. |
| 1791 | */ | 1821 | */ |
| 1792 | retry: | 1822 | retry: |
| 1793 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); | 1823 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ); |
| 1794 | if (unlikely(ret != 0)) | 1824 | if (unlikely(ret != 0)) |
| 1795 | return ret; | 1825 | return ret; |
| 1796 | 1826 | ||
| @@ -1941,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, | |||
| 1941 | } | 1971 | } |
| 1942 | 1972 | ||
| 1943 | retry: | 1973 | retry: |
| 1944 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); | 1974 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE); |
| 1945 | if (unlikely(ret != 0)) | 1975 | if (unlikely(ret != 0)) |
| 1946 | goto out; | 1976 | goto out; |
| 1947 | 1977 | ||
| @@ -2060,7 +2090,7 @@ retry: | |||
| 2060 | if ((uval & FUTEX_TID_MASK) != vpid) | 2090 | if ((uval & FUTEX_TID_MASK) != vpid) |
| 2061 | return -EPERM; | 2091 | return -EPERM; |
| 2062 | 2092 | ||
| 2063 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); | 2093 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); |
| 2064 | if (unlikely(ret != 0)) | 2094 | if (unlikely(ret != 0)) |
| 2065 | goto out; | 2095 | goto out; |
| 2066 | 2096 | ||
| @@ -2249,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |||
| 2249 | debug_rt_mutex_init_waiter(&rt_waiter); | 2279 | debug_rt_mutex_init_waiter(&rt_waiter); |
| 2250 | rt_waiter.task = NULL; | 2280 | rt_waiter.task = NULL; |
| 2251 | 2281 | ||
| 2252 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); | 2282 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); |
| 2253 | if (unlikely(ret != 0)) | 2283 | if (unlikely(ret != 0)) |
| 2254 | goto out; | 2284 | goto out; |
| 2255 | 2285 | ||
| @@ -2697,7 +2727,7 @@ static int __init futex_init(void) | |||
| 2697 | futex_cmpxchg_enabled = 1; | 2727 | futex_cmpxchg_enabled = 1; |
| 2698 | 2728 | ||
| 2699 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | 2729 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { |
| 2700 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); | 2730 | plist_head_init(&futex_queues[i].chain); |
| 2701 | spin_lock_init(&futex_queues[i].lock); | 2731 | spin_lock_init(&futex_queues[i].lock); |
| 2702 | } | 2732 | } |
| 2703 | 2733 | ||
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index b8cadf70b1fb..a92028196cc1 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig | |||
| @@ -2,7 +2,8 @@ menu "GCOV-based kernel profiling" | |||
| 2 | 2 | ||
| 3 | config GCOV_KERNEL | 3 | config GCOV_KERNEL |
| 4 | bool "Enable gcov-based kernel profiling" | 4 | bool "Enable gcov-based kernel profiling" |
| 5 | depends on DEBUG_FS && CONSTRUCTORS | 5 | depends on DEBUG_FS |
| 6 | select CONSTRUCTORS if !UML | ||
| 6 | default n | 7 | default n |
| 7 | ---help--- | 8 | ---help--- |
| 8 | This option enables gcov-based code profiling (e.g. for code coverage | 9 | This option enables gcov-based code profiling (e.g. for code coverage |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d1d051b38e0b..5a38bf4de641 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
| @@ -52,6 +52,10 @@ config IRQ_EDGE_EOI_HANDLER | |||
| 52 | config GENERIC_IRQ_CHIP | 52 | config GENERIC_IRQ_CHIP |
| 53 | bool | 53 | bool |
| 54 | 54 | ||
| 55 | # Generic irq_domain hw <--> linux irq number translation | ||
| 56 | config IRQ_DOMAIN | ||
| 57 | bool | ||
| 58 | |||
| 55 | # Support forced irq threading | 59 | # Support forced irq threading |
| 56 | config IRQ_FORCED_THREADING | 60 | config IRQ_FORCED_THREADING |
| 57 | bool | 61 | bool |
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 73290056cfb6..fff17381f0af 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o | 2 | obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o |
| 3 | obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o | 3 | obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o |
| 4 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o | 4 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o |
| 5 | obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o | ||
| 5 | obj-$(CONFIG_PROC_FS) += proc.o | 6 | obj-$(CONFIG_PROC_FS) += proc.o |
| 6 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o | 7 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o |
| 7 | obj-$(CONFIG_PM_SLEEP) += pm.o | 8 | obj-$(CONFIG_PM_SLEEP) += pm.o |
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 1ef4ffcdfa55..bd8e788d71e0 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
| @@ -87,8 +87,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) | |||
| 87 | { | 87 | { |
| 88 | struct irq_devres match_data = { irq, dev_id }; | 88 | struct irq_devres match_data = { irq, dev_id }; |
| 89 | 89 | ||
| 90 | free_irq(irq, dev_id); | ||
| 91 | WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, | 90 | WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, |
| 92 | &match_data)); | 91 | &match_data)); |
| 92 | free_irq(irq, dev_id); | ||
| 93 | } | 93 | } |
| 94 | EXPORT_SYMBOL(devm_free_irq); | 94 | EXPORT_SYMBOL(devm_free_irq); |
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 31a9db711906..3a2cab407b93 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
| @@ -101,10 +101,10 @@ void irq_gc_unmask_enable_reg(struct irq_data *d) | |||
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | /** | 103 | /** |
| 104 | * irq_gc_ack - Ack pending interrupt | 104 | * irq_gc_ack_set_bit - Ack pending interrupt via setting bit |
| 105 | * @d: irq_data | 105 | * @d: irq_data |
| 106 | */ | 106 | */ |
| 107 | void irq_gc_ack(struct irq_data *d) | 107 | void irq_gc_ack_set_bit(struct irq_data *d) |
| 108 | { | 108 | { |
| 109 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); | 109 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); |
| 110 | u32 mask = 1 << (d->irq - gc->irq_base); | 110 | u32 mask = 1 << (d->irq - gc->irq_base); |
| @@ -115,6 +115,20 @@ void irq_gc_ack(struct irq_data *d) | |||
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | /** | 117 | /** |
| 118 | * irq_gc_ack_clr_bit - Ack pending interrupt via clearing bit | ||
| 119 | * @d: irq_data | ||
| 120 | */ | ||
| 121 | void irq_gc_ack_clr_bit(struct irq_data *d) | ||
| 122 | { | ||
| 123 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); | ||
| 124 | u32 mask = ~(1 << (d->irq - gc->irq_base)); | ||
| 125 | |||
| 126 | irq_gc_lock(gc); | ||
| 127 | irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack); | ||
| 128 | irq_gc_unlock(gc); | ||
| 129 | } | ||
| 130 | |||
| 131 | /** | ||
| 118 | * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt | 132 | * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt |
| 119 | * @d: irq_data | 133 | * @d: irq_data |
| 120 | */ | 134 | */ |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 90cb55f6d7eb..470d08c82bbe 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -133,12 +133,6 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) | |||
| 133 | switch (res) { | 133 | switch (res) { |
| 134 | case IRQ_WAKE_THREAD: | 134 | case IRQ_WAKE_THREAD: |
| 135 | /* | 135 | /* |
| 136 | * Set result to handled so the spurious check | ||
| 137 | * does not trigger. | ||
| 138 | */ | ||
| 139 | res = IRQ_HANDLED; | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Catch drivers which return WAKE_THREAD but | 136 | * Catch drivers which return WAKE_THREAD but |
| 143 | * did not set up a thread function | 137 | * did not set up a thread function |
| 144 | */ | 138 | */ |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 886e80347b32..4c60a50e66b2 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
| @@ -257,13 +257,11 @@ int __init early_irq_init(void) | |||
| 257 | count = ARRAY_SIZE(irq_desc); | 257 | count = ARRAY_SIZE(irq_desc); |
| 258 | 258 | ||
| 259 | for (i = 0; i < count; i++) { | 259 | for (i = 0; i < count; i++) { |
| 260 | desc[i].irq_data.irq = i; | ||
| 261 | desc[i].irq_data.chip = &no_irq_chip; | ||
| 262 | desc[i].kstat_irqs = alloc_percpu(unsigned int); | 260 | desc[i].kstat_irqs = alloc_percpu(unsigned int); |
| 263 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); | 261 | alloc_masks(&desc[i], GFP_KERNEL, node); |
| 264 | alloc_masks(desc + i, GFP_KERNEL, node); | 262 | raw_spin_lock_init(&desc[i].lock); |
| 265 | desc_smp_init(desc + i, node); | ||
| 266 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 263 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
| 264 | desc_set_defaults(i, &desc[i], node); | ||
| 267 | } | 265 | } |
| 268 | return arch_early_irq_init(); | 266 | return arch_early_irq_init(); |
| 269 | } | 267 | } |
| @@ -346,6 +344,12 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | |||
| 346 | if (!cnt) | 344 | if (!cnt) |
| 347 | return -EINVAL; | 345 | return -EINVAL; |
| 348 | 346 | ||
| 347 | if (irq >= 0) { | ||
| 348 | if (from > irq) | ||
| 349 | return -EINVAL; | ||
| 350 | from = irq; | ||
| 351 | } | ||
| 352 | |||
| 349 | mutex_lock(&sparse_irq_lock); | 353 | mutex_lock(&sparse_irq_lock); |
| 350 | 354 | ||
| 351 | start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS, | 355 | start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS, |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c new file mode 100644 index 000000000000..d5828da3fd38 --- /dev/null +++ b/kernel/irq/irqdomain.c | |||
| @@ -0,0 +1,180 @@ | |||
| 1 | #include <linux/irq.h> | ||
| 2 | #include <linux/irqdomain.h> | ||
| 3 | #include <linux/module.h> | ||
| 4 | #include <linux/mutex.h> | ||
| 5 | #include <linux/of.h> | ||
| 6 | #include <linux/of_address.h> | ||
| 7 | #include <linux/slab.h> | ||
| 8 | |||
| 9 | static LIST_HEAD(irq_domain_list); | ||
| 10 | static DEFINE_MUTEX(irq_domain_mutex); | ||
| 11 | |||
| 12 | /** | ||
| 13 | * irq_domain_add() - Register an irq_domain | ||
| 14 | * @domain: ptr to initialized irq_domain structure | ||
| 15 | * | ||
| 16 | * Registers an irq_domain structure. The irq_domain must at a minimum be | ||
| 17 | * initialized with an ops structure pointer, and either a ->to_irq hook or | ||
| 18 | * a valid irq_base value. Everything else is optional. | ||
| 19 | */ | ||
| 20 | void irq_domain_add(struct irq_domain *domain) | ||
| 21 | { | ||
| 22 | struct irq_data *d; | ||
| 23 | int hwirq; | ||
| 24 | |||
| 25 | /* | ||
| 26 | * This assumes that the irq_domain owner has already allocated | ||
| 27 | * the irq_descs. This block will be removed when support for dynamic | ||
| 28 | * allocation of irq_descs is added to irq_domain. | ||
| 29 | */ | ||
| 30 | for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { | ||
| 31 | d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); | ||
| 32 | if (d || d->domain) { | ||
| 33 | /* things are broken; just report, don't clean up */ | ||
| 34 | WARN(1, "error: irq_desc already assigned to a domain"); | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | d->domain = domain; | ||
| 38 | d->hwirq = hwirq; | ||
| 39 | } | ||
| 40 | |||
| 41 | mutex_lock(&irq_domain_mutex); | ||
| 42 | list_add(&domain->list, &irq_domain_list); | ||
| 43 | mutex_unlock(&irq_domain_mutex); | ||
| 44 | } | ||
| 45 | |||
| 46 | /** | ||
| 47 | * irq_domain_del() - Unregister an irq_domain | ||
| 48 | * @domain: ptr to registered irq_domain. | ||
| 49 | */ | ||
| 50 | void irq_domain_del(struct irq_domain *domain) | ||
| 51 | { | ||
| 52 | struct irq_data *d; | ||
| 53 | int hwirq; | ||
| 54 | |||
| 55 | mutex_lock(&irq_domain_mutex); | ||
| 56 | list_del(&domain->list); | ||
| 57 | mutex_unlock(&irq_domain_mutex); | ||
| 58 | |||
| 59 | /* Clear the irq_domain assignments */ | ||
| 60 | for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { | ||
| 61 | d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); | ||
| 62 | d->domain = NULL; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | #if defined(CONFIG_OF_IRQ) | ||
| 67 | /** | ||
| 68 | * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec | ||
| 69 | * | ||
| 70 | * Used by the device tree interrupt mapping code to translate a device tree | ||
| 71 | * interrupt specifier to a valid linux irq number. Returns either a valid | ||
| 72 | * linux IRQ number or 0. | ||
| 73 | * | ||
| 74 | * When the caller no longer need the irq number returned by this function it | ||
| 75 | * should arrange to call irq_dispose_mapping(). | ||
| 76 | */ | ||
| 77 | unsigned int irq_create_of_mapping(struct device_node *controller, | ||
| 78 | const u32 *intspec, unsigned int intsize) | ||
| 79 | { | ||
| 80 | struct irq_domain *domain; | ||
| 81 | unsigned long hwirq; | ||
| 82 | unsigned int irq, type; | ||
| 83 | int rc = -EINVAL; | ||
| 84 | |||
| 85 | /* Find a domain which can translate the irq spec */ | ||
| 86 | mutex_lock(&irq_domain_mutex); | ||
| 87 | list_for_each_entry(domain, &irq_domain_list, list) { | ||
| 88 | if (!domain->ops->dt_translate) | ||
| 89 | continue; | ||
| 90 | rc = domain->ops->dt_translate(domain, controller, | ||
| 91 | intspec, intsize, &hwirq, &type); | ||
| 92 | if (rc == 0) | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | mutex_unlock(&irq_domain_mutex); | ||
| 96 | |||
| 97 | if (rc != 0) | ||
| 98 | return 0; | ||
| 99 | |||
| 100 | irq = irq_domain_to_irq(domain, hwirq); | ||
| 101 | if (type != IRQ_TYPE_NONE) | ||
| 102 | irq_set_irq_type(irq, type); | ||
| 103 | pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n", | ||
| 104 | controller->full_name, (int)hwirq, irq, type); | ||
| 105 | return irq; | ||
| 106 | } | ||
| 107 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | ||
| 108 | |||
| 109 | /** | ||
| 110 | * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping() | ||
| 111 | * @irq: linux irq number to be discarded | ||
| 112 | * | ||
| 113 | * Calling this function indicates the caller no longer needs a reference to | ||
| 114 | * the linux irq number returned by a prior call to irq_create_of_mapping(). | ||
| 115 | */ | ||
| 116 | void irq_dispose_mapping(unsigned int irq) | ||
| 117 | { | ||
| 118 | /* | ||
| 119 | * nothing yet; will be filled when support for dynamic allocation of | ||
| 120 | * irq_descs is added to irq_domain | ||
| 121 | */ | ||
| 122 | } | ||
| 123 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | ||
| 124 | |||
| 125 | int irq_domain_simple_dt_translate(struct irq_domain *d, | ||
| 126 | struct device_node *controller, | ||
| 127 | const u32 *intspec, unsigned int intsize, | ||
| 128 | unsigned long *out_hwirq, unsigned int *out_type) | ||
| 129 | { | ||
| 130 | if (d->of_node != controller) | ||
| 131 | return -EINVAL; | ||
| 132 | if (intsize < 1) | ||
| 133 | return -EINVAL; | ||
| 134 | |||
| 135 | *out_hwirq = intspec[0]; | ||
| 136 | *out_type = IRQ_TYPE_NONE; | ||
| 137 | if (intsize > 1) | ||
| 138 | *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; | ||
| 139 | return 0; | ||
| 140 | } | ||
| 141 | |||
| 142 | struct irq_domain_ops irq_domain_simple_ops = { | ||
| 143 | .dt_translate = irq_domain_simple_dt_translate, | ||
| 144 | }; | ||
| 145 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
| 146 | |||
| 147 | /** | ||
| 148 | * irq_domain_create_simple() - Set up a 'simple' translation range | ||
| 149 | */ | ||
| 150 | void irq_domain_add_simple(struct device_node *controller, int irq_base) | ||
| 151 | { | ||
| 152 | struct irq_domain *domain; | ||
| 153 | |||
| 154 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | ||
| 155 | if (!domain) { | ||
| 156 | WARN_ON(1); | ||
| 157 | return; | ||
| 158 | } | ||
| 159 | |||
| 160 | domain->irq_base = irq_base; | ||
| 161 | domain->of_node = of_node_get(controller); | ||
| 162 | domain->ops = &irq_domain_simple_ops; | ||
| 163 | irq_domain_add(domain); | ||
| 164 | } | ||
| 165 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | ||
| 166 | |||
| 167 | void irq_domain_generate_simple(const struct of_device_id *match, | ||
| 168 | u64 phys_base, unsigned int irq_start) | ||
| 169 | { | ||
| 170 | struct device_node *node; | ||
| 171 | pr_info("looking for phys_base=%llx, irq_start=%i\n", | ||
| 172 | (unsigned long long) phys_base, (int) irq_start); | ||
| 173 | node = of_find_matching_node_by_address(NULL, match, phys_base); | ||
| 174 | if (node) | ||
| 175 | irq_domain_add_simple(node, irq_start); | ||
| 176 | else | ||
| 177 | pr_info("no node found\n"); | ||
| 178 | } | ||
| 179 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); | ||
| 180 | #endif /* CONFIG_OF_IRQ */ | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f7ce0021e1c4..0a7840aeb0fb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -491,6 +491,9 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) | |||
| 491 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); | 491 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); |
| 492 | int ret = 0; | 492 | int ret = 0; |
| 493 | 493 | ||
| 494 | if (!desc) | ||
| 495 | return -EINVAL; | ||
| 496 | |||
| 494 | /* wakeup-capable irqs can be shared between drivers that | 497 | /* wakeup-capable irqs can be shared between drivers that |
| 495 | * don't need to have the same sleep mode behaviors. | 498 | * don't need to have the same sleep mode behaviors. |
| 496 | */ | 499 | */ |
| @@ -723,13 +726,16 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } | |||
| 723 | * context. So we need to disable bh here to avoid deadlocks and other | 726 | * context. So we need to disable bh here to avoid deadlocks and other |
| 724 | * side effects. | 727 | * side effects. |
| 725 | */ | 728 | */ |
| 726 | static void | 729 | static irqreturn_t |
| 727 | irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | 730 | irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) |
| 728 | { | 731 | { |
| 732 | irqreturn_t ret; | ||
| 733 | |||
| 729 | local_bh_disable(); | 734 | local_bh_disable(); |
| 730 | action->thread_fn(action->irq, action->dev_id); | 735 | ret = action->thread_fn(action->irq, action->dev_id); |
| 731 | irq_finalize_oneshot(desc, action, false); | 736 | irq_finalize_oneshot(desc, action, false); |
| 732 | local_bh_enable(); | 737 | local_bh_enable(); |
| 738 | return ret; | ||
| 733 | } | 739 | } |
| 734 | 740 | ||
| 735 | /* | 741 | /* |
| @@ -737,10 +743,14 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |||
| 737 | * preemtible - many of them need to sleep and wait for slow busses to | 743 | * preemtible - many of them need to sleep and wait for slow busses to |
| 738 | * complete. | 744 | * complete. |
| 739 | */ | 745 | */ |
| 740 | static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action) | 746 | static irqreturn_t irq_thread_fn(struct irq_desc *desc, |
| 747 | struct irqaction *action) | ||
| 741 | { | 748 | { |
| 742 | action->thread_fn(action->irq, action->dev_id); | 749 | irqreturn_t ret; |
| 750 | |||
| 751 | ret = action->thread_fn(action->irq, action->dev_id); | ||
| 743 | irq_finalize_oneshot(desc, action, false); | 752 | irq_finalize_oneshot(desc, action, false); |
| 753 | return ret; | ||
| 744 | } | 754 | } |
| 745 | 755 | ||
| 746 | /* | 756 | /* |
| @@ -753,7 +763,8 @@ static int irq_thread(void *data) | |||
| 753 | }; | 763 | }; |
| 754 | struct irqaction *action = data; | 764 | struct irqaction *action = data; |
| 755 | struct irq_desc *desc = irq_to_desc(action->irq); | 765 | struct irq_desc *desc = irq_to_desc(action->irq); |
| 756 | void (*handler_fn)(struct irq_desc *desc, struct irqaction *action); | 766 | irqreturn_t (*handler_fn)(struct irq_desc *desc, |
| 767 | struct irqaction *action); | ||
| 757 | int wake; | 768 | int wake; |
| 758 | 769 | ||
| 759 | if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD, | 770 | if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD, |
| @@ -783,8 +794,12 @@ static int irq_thread(void *data) | |||
| 783 | desc->istate |= IRQS_PENDING; | 794 | desc->istate |= IRQS_PENDING; |
| 784 | raw_spin_unlock_irq(&desc->lock); | 795 | raw_spin_unlock_irq(&desc->lock); |
| 785 | } else { | 796 | } else { |
| 797 | irqreturn_t action_ret; | ||
| 798 | |||
| 786 | raw_spin_unlock_irq(&desc->lock); | 799 | raw_spin_unlock_irq(&desc->lock); |
| 787 | handler_fn(desc, action); | 800 | action_ret = handler_fn(desc, action); |
| 801 | if (!noirqdebug) | ||
| 802 | note_interrupt(action->irq, desc, action_ret); | ||
| 788 | } | 803 | } |
| 789 | 804 | ||
| 790 | wake = atomic_dec_and_test(&desc->threads_active); | 805 | wake = atomic_dec_and_test(&desc->threads_active); |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dfbd550401b2..aa57d5da18c1 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -167,6 +167,13 @@ out: | |||
| 167 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); | 167 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | static inline int bad_action_ret(irqreturn_t action_ret) | ||
| 171 | { | ||
| 172 | if (likely(action_ret <= (IRQ_HANDLED | IRQ_WAKE_THREAD))) | ||
| 173 | return 0; | ||
| 174 | return 1; | ||
| 175 | } | ||
| 176 | |||
| 170 | /* | 177 | /* |
| 171 | * If 99,900 of the previous 100,000 interrupts have not been handled | 178 | * If 99,900 of the previous 100,000 interrupts have not been handled |
| 172 | * then assume that the IRQ is stuck in some manner. Drop a diagnostic | 179 | * then assume that the IRQ is stuck in some manner. Drop a diagnostic |
| @@ -182,7 +189,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc, | |||
| 182 | struct irqaction *action; | 189 | struct irqaction *action; |
| 183 | unsigned long flags; | 190 | unsigned long flags; |
| 184 | 191 | ||
| 185 | if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) { | 192 | if (bad_action_ret(action_ret)) { |
| 186 | printk(KERN_ERR "irq event %d: bogus return value %x\n", | 193 | printk(KERN_ERR "irq event %d: bogus return value %x\n", |
| 187 | irq, action_ret); | 194 | irq, action_ret); |
| 188 | } else { | 195 | } else { |
| @@ -201,10 +208,11 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc, | |||
| 201 | raw_spin_lock_irqsave(&desc->lock, flags); | 208 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 202 | action = desc->action; | 209 | action = desc->action; |
| 203 | while (action) { | 210 | while (action) { |
| 204 | printk(KERN_ERR "[<%p>]", action->handler); | 211 | printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler); |
| 205 | print_symbol(" (%s)", | 212 | if (action->thread_fn) |
| 206 | (unsigned long)action->handler); | 213 | printk(KERN_CONT " threaded [<%p>] %pf", |
| 207 | printk("\n"); | 214 | action->thread_fn, action->thread_fn); |
| 215 | printk(KERN_CONT "\n"); | ||
| 208 | action = action->next; | 216 | action = action->next; |
| 209 | } | 217 | } |
| 210 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 218 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| @@ -262,7 +270,16 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 262 | if (desc->istate & IRQS_POLL_INPROGRESS) | 270 | if (desc->istate & IRQS_POLL_INPROGRESS) |
| 263 | return; | 271 | return; |
| 264 | 272 | ||
| 265 | if (unlikely(action_ret != IRQ_HANDLED)) { | 273 | /* we get here again via the threaded handler */ |
| 274 | if (action_ret == IRQ_WAKE_THREAD) | ||
| 275 | return; | ||
| 276 | |||
| 277 | if (bad_action_ret(action_ret)) { | ||
| 278 | report_bad_irq(irq, desc, action_ret); | ||
| 279 | return; | ||
| 280 | } | ||
| 281 | |||
| 282 | if (unlikely(action_ret == IRQ_NONE)) { | ||
| 266 | /* | 283 | /* |
| 267 | * If we are seeing only the odd spurious IRQ caused by | 284 | * If we are seeing only the odd spurious IRQ caused by |
| 268 | * bus asynchronicity then don't eventually trigger an error, | 285 | * bus asynchronicity then don't eventually trigger an error, |
| @@ -274,8 +291,6 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 274 | else | 291 | else |
| 275 | desc->irqs_unhandled++; | 292 | desc->irqs_unhandled++; |
| 276 | desc->last_unhandled = jiffies; | 293 | desc->last_unhandled = jiffies; |
| 277 | if (unlikely(action_ret != IRQ_NONE)) | ||
| 278 | report_bad_irq(irq, desc, action_ret); | ||
| 279 | } | 294 | } |
| 280 | 295 | ||
| 281 | if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { | 296 | if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index fa27e750dbc0..a8ce45097f3d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -375,15 +375,19 @@ int jump_label_text_reserved(void *start, void *end) | |||
| 375 | 375 | ||
| 376 | static void jump_label_update(struct jump_label_key *key, int enable) | 376 | static void jump_label_update(struct jump_label_key *key, int enable) |
| 377 | { | 377 | { |
| 378 | struct jump_entry *entry = key->entries; | 378 | struct jump_entry *entry = key->entries, *stop = __stop___jump_table; |
| 379 | |||
| 380 | /* if there are no users, entry can be NULL */ | ||
| 381 | if (entry) | ||
| 382 | __jump_label_update(key, entry, __stop___jump_table, enable); | ||
| 383 | 379 | ||
| 384 | #ifdef CONFIG_MODULES | 380 | #ifdef CONFIG_MODULES |
| 381 | struct module *mod = __module_address((jump_label_t)key); | ||
| 382 | |||
| 385 | __jump_label_mod_update(key, enable); | 383 | __jump_label_mod_update(key, enable); |
| 384 | |||
| 385 | if (mod) | ||
| 386 | stop = mod->jump_entries + mod->num_jump_entries; | ||
| 386 | #endif | 387 | #endif |
| 388 | /* if there are no users, entry can be NULL */ | ||
| 389 | if (entry) | ||
| 390 | __jump_label_update(key, entry, stop, enable); | ||
| 387 | } | 391 | } |
| 388 | 392 | ||
| 389 | #endif | 393 | #endif |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 8d814cbc8109..296fbc84d659 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -1095,7 +1095,7 @@ size_t crash_get_memory_size(void) | |||
| 1095 | size_t size = 0; | 1095 | size_t size = 0; |
| 1096 | mutex_lock(&kexec_mutex); | 1096 | mutex_lock(&kexec_mutex); |
| 1097 | if (crashk_res.end != crashk_res.start) | 1097 | if (crashk_res.end != crashk_res.start) |
| 1098 | size = crashk_res.end - crashk_res.start + 1; | 1098 | size = resource_size(&crashk_res); |
| 1099 | mutex_unlock(&kexec_mutex); | 1099 | mutex_unlock(&kexec_mutex); |
| 1100 | return size; | 1100 | return size; |
| 1101 | } | 1101 | } |
diff --git a/kernel/kmod.c b/kernel/kmod.c index ad6a81c58b44..ddc7644c1305 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -156,12 +156,6 @@ static int ____call_usermodehelper(void *data) | |||
| 156 | */ | 156 | */ |
| 157 | set_user_nice(current, 0); | 157 | set_user_nice(current, 0); |
| 158 | 158 | ||
| 159 | if (sub_info->init) { | ||
| 160 | retval = sub_info->init(sub_info); | ||
| 161 | if (retval) | ||
| 162 | goto fail; | ||
| 163 | } | ||
| 164 | |||
| 165 | retval = -ENOMEM; | 159 | retval = -ENOMEM; |
| 166 | new = prepare_kernel_cred(current); | 160 | new = prepare_kernel_cred(current); |
| 167 | if (!new) | 161 | if (!new) |
| @@ -173,6 +167,14 @@ static int ____call_usermodehelper(void *data) | |||
| 173 | new->cap_inheritable); | 167 | new->cap_inheritable); |
| 174 | spin_unlock(&umh_sysctl_lock); | 168 | spin_unlock(&umh_sysctl_lock); |
| 175 | 169 | ||
| 170 | if (sub_info->init) { | ||
| 171 | retval = sub_info->init(sub_info, new); | ||
| 172 | if (retval) { | ||
| 173 | abort_creds(new); | ||
| 174 | goto fail; | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 176 | commit_creds(new); | 178 | commit_creds(new); |
| 177 | 179 | ||
| 178 | retval = kernel_execve(sub_info->path, | 180 | retval = kernel_execve(sub_info->path, |
| @@ -272,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 272 | * (used for preventing user land processes from being created after the user | 274 | * (used for preventing user land processes from being created after the user |
| 273 | * land has been frozen during a system-wide hibernation or suspend operation). | 275 | * land has been frozen during a system-wide hibernation or suspend operation). |
| 274 | */ | 276 | */ |
| 275 | static int usermodehelper_disabled; | 277 | static int usermodehelper_disabled = 1; |
| 276 | 278 | ||
| 277 | /* Number of helpers running */ | 279 | /* Number of helpers running */ |
| 278 | static atomic_t running_helpers = ATOMIC_INIT(0); | 280 | static atomic_t running_helpers = ATOMIC_INIT(0); |
| @@ -388,7 +390,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); | |||
| 388 | * context in which call_usermodehelper_exec is called. | 390 | * context in which call_usermodehelper_exec is called. |
| 389 | */ | 391 | */ |
| 390 | void call_usermodehelper_setfns(struct subprocess_info *info, | 392 | void call_usermodehelper_setfns(struct subprocess_info *info, |
| 391 | int (*init)(struct subprocess_info *info), | 393 | int (*init)(struct subprocess_info *info, struct cred *new), |
| 392 | void (*cleanup)(struct subprocess_info *info), | 394 | void (*cleanup)(struct subprocess_info *info), |
| 393 | void *data) | 395 | void *data) |
| 394 | { | 396 | { |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 77981813a1e7..b30fd54eb985 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr) | |||
| 1255 | /* | 1255 | /* |
| 1256 | * If we have a symbol_name argument, look it up and add the offset field | 1256 | * If we have a symbol_name argument, look it up and add the offset field |
| 1257 | * to it. This way, we can specify a relative address to a symbol. | 1257 | * to it. This way, we can specify a relative address to a symbol. |
| 1258 | * This returns encoded errors if it fails to look up symbol or invalid | ||
| 1259 | * combination of parameters. | ||
| 1258 | */ | 1260 | */ |
| 1259 | static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | 1261 | static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) |
| 1260 | { | 1262 | { |
| 1261 | kprobe_opcode_t *addr = p->addr; | 1263 | kprobe_opcode_t *addr = p->addr; |
| 1264 | |||
| 1265 | if ((p->symbol_name && p->addr) || | ||
| 1266 | (!p->symbol_name && !p->addr)) | ||
| 1267 | goto invalid; | ||
| 1268 | |||
| 1262 | if (p->symbol_name) { | 1269 | if (p->symbol_name) { |
| 1263 | if (addr) | ||
| 1264 | return NULL; | ||
| 1265 | kprobe_lookup_name(p->symbol_name, addr); | 1270 | kprobe_lookup_name(p->symbol_name, addr); |
| 1271 | if (!addr) | ||
| 1272 | return ERR_PTR(-ENOENT); | ||
| 1266 | } | 1273 | } |
| 1267 | 1274 | ||
| 1268 | if (!addr) | 1275 | addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); |
| 1269 | return NULL; | 1276 | if (addr) |
| 1270 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); | 1277 | return addr; |
| 1278 | |||
| 1279 | invalid: | ||
| 1280 | return ERR_PTR(-EINVAL); | ||
| 1271 | } | 1281 | } |
| 1272 | 1282 | ||
| 1273 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ | 1283 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ |
| @@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 1311 | kprobe_opcode_t *addr; | 1321 | kprobe_opcode_t *addr; |
| 1312 | 1322 | ||
| 1313 | addr = kprobe_addr(p); | 1323 | addr = kprobe_addr(p); |
| 1314 | if (!addr) | 1324 | if (IS_ERR(addr)) |
| 1315 | return -EINVAL; | 1325 | return PTR_ERR(addr); |
| 1316 | p->addr = addr; | 1326 | p->addr = addr; |
| 1317 | 1327 | ||
| 1318 | ret = check_kprobe_rereg(p); | 1328 | ret = check_kprobe_rereg(p); |
| @@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 1335 | */ | 1345 | */ |
| 1336 | probed_mod = __module_text_address((unsigned long) p->addr); | 1346 | probed_mod = __module_text_address((unsigned long) p->addr); |
| 1337 | if (probed_mod) { | 1347 | if (probed_mod) { |
| 1348 | /* Return -ENOENT if fail. */ | ||
| 1349 | ret = -ENOENT; | ||
| 1338 | /* | 1350 | /* |
| 1339 | * We must hold a refcount of the probed module while updating | 1351 | * We must hold a refcount of the probed module while updating |
| 1340 | * its code to prohibit unexpected unloading. | 1352 | * its code to prohibit unexpected unloading. |
| @@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 1351 | module_put(probed_mod); | 1363 | module_put(probed_mod); |
| 1352 | goto fail_with_jump_label; | 1364 | goto fail_with_jump_label; |
| 1353 | } | 1365 | } |
| 1366 | /* ret will be updated by following code */ | ||
| 1354 | } | 1367 | } |
| 1355 | preempt_enable(); | 1368 | preempt_enable(); |
| 1356 | jump_label_unlock(); | 1369 | jump_label_unlock(); |
| @@ -1399,7 +1412,7 @@ out: | |||
| 1399 | fail_with_jump_label: | 1412 | fail_with_jump_label: |
| 1400 | preempt_enable(); | 1413 | preempt_enable(); |
| 1401 | jump_label_unlock(); | 1414 | jump_label_unlock(); |
| 1402 | return -EINVAL; | 1415 | return ret; |
| 1403 | } | 1416 | } |
| 1404 | EXPORT_SYMBOL_GPL(register_kprobe); | 1417 | EXPORT_SYMBOL_GPL(register_kprobe); |
| 1405 | 1418 | ||
| @@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
| 1686 | 1699 | ||
| 1687 | if (kretprobe_blacklist_size) { | 1700 | if (kretprobe_blacklist_size) { |
| 1688 | addr = kprobe_addr(&rp->kp); | 1701 | addr = kprobe_addr(&rp->kp); |
| 1689 | if (!addr) | 1702 | if (IS_ERR(addr)) |
| 1690 | return -EINVAL; | 1703 | return PTR_ERR(addr); |
| 1691 | 1704 | ||
| 1692 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { | 1705 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { |
| 1693 | if (kretprobe_blacklist[i].addr == addr) | 1706 | if (kretprobe_blacklist[i].addr == addr) |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 63437d065ac8..8c24294e477f 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -2468,6 +2468,9 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
| 2468 | 2468 | ||
| 2469 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); | 2469 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); |
| 2470 | 2470 | ||
| 2471 | if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) | ||
| 2472 | continue; | ||
| 2473 | |||
| 2471 | if (!mark_lock(curr, hlock, usage_bit)) | 2474 | if (!mark_lock(curr, hlock, usage_bit)) |
| 2472 | return 0; | 2475 | return 0; |
| 2473 | } | 2476 | } |
| @@ -2478,34 +2481,13 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
| 2478 | /* | 2481 | /* |
| 2479 | * Hardirqs will be enabled: | 2482 | * Hardirqs will be enabled: |
| 2480 | */ | 2483 | */ |
| 2481 | void trace_hardirqs_on_caller(unsigned long ip) | 2484 | static void __trace_hardirqs_on_caller(unsigned long ip) |
| 2482 | { | 2485 | { |
| 2483 | struct task_struct *curr = current; | 2486 | struct task_struct *curr = current; |
| 2484 | 2487 | ||
| 2485 | time_hardirqs_on(CALLER_ADDR0, ip); | ||
| 2486 | |||
| 2487 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
| 2488 | return; | ||
| 2489 | |||
| 2490 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) | ||
| 2491 | return; | ||
| 2492 | |||
| 2493 | if (unlikely(curr->hardirqs_enabled)) { | ||
| 2494 | /* | ||
| 2495 | * Neither irq nor preemption are disabled here | ||
| 2496 | * so this is racy by nature but losing one hit | ||
| 2497 | * in a stat is not a big deal. | ||
| 2498 | */ | ||
| 2499 | __debug_atomic_inc(redundant_hardirqs_on); | ||
| 2500 | return; | ||
| 2501 | } | ||
| 2502 | /* we'll do an OFF -> ON transition: */ | 2488 | /* we'll do an OFF -> ON transition: */ |
| 2503 | curr->hardirqs_enabled = 1; | 2489 | curr->hardirqs_enabled = 1; |
| 2504 | 2490 | ||
| 2505 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
| 2506 | return; | ||
| 2507 | if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) | ||
| 2508 | return; | ||
| 2509 | /* | 2491 | /* |
| 2510 | * We are going to turn hardirqs on, so set the | 2492 | * We are going to turn hardirqs on, so set the |
| 2511 | * usage bit for all held locks: | 2493 | * usage bit for all held locks: |
| @@ -2525,6 +2507,37 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
| 2525 | curr->hardirq_enable_event = ++curr->irq_events; | 2507 | curr->hardirq_enable_event = ++curr->irq_events; |
| 2526 | debug_atomic_inc(hardirqs_on_events); | 2508 | debug_atomic_inc(hardirqs_on_events); |
| 2527 | } | 2509 | } |
| 2510 | |||
| 2511 | void trace_hardirqs_on_caller(unsigned long ip) | ||
| 2512 | { | ||
| 2513 | time_hardirqs_on(CALLER_ADDR0, ip); | ||
| 2514 | |||
| 2515 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
| 2516 | return; | ||
| 2517 | |||
| 2518 | if (unlikely(current->hardirqs_enabled)) { | ||
| 2519 | /* | ||
| 2520 | * Neither irq nor preemption are disabled here | ||
| 2521 | * so this is racy by nature but losing one hit | ||
| 2522 | * in a stat is not a big deal. | ||
| 2523 | */ | ||
| 2524 | __debug_atomic_inc(redundant_hardirqs_on); | ||
| 2525 | return; | ||
| 2526 | } | ||
| 2527 | |||
| 2528 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
| 2529 | return; | ||
| 2530 | |||
| 2531 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) | ||
| 2532 | return; | ||
| 2533 | |||
| 2534 | if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) | ||
| 2535 | return; | ||
| 2536 | |||
| 2537 | current->lockdep_recursion = 1; | ||
| 2538 | __trace_hardirqs_on_caller(ip); | ||
| 2539 | current->lockdep_recursion = 0; | ||
| 2540 | } | ||
| 2528 | EXPORT_SYMBOL(trace_hardirqs_on_caller); | 2541 | EXPORT_SYMBOL(trace_hardirqs_on_caller); |
| 2529 | 2542 | ||
| 2530 | void trace_hardirqs_on(void) | 2543 | void trace_hardirqs_on(void) |
| @@ -2574,7 +2587,7 @@ void trace_softirqs_on(unsigned long ip) | |||
| 2574 | { | 2587 | { |
| 2575 | struct task_struct *curr = current; | 2588 | struct task_struct *curr = current; |
| 2576 | 2589 | ||
| 2577 | if (unlikely(!debug_locks)) | 2590 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
| 2578 | return; | 2591 | return; |
| 2579 | 2592 | ||
| 2580 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2593 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| @@ -2585,6 +2598,7 @@ void trace_softirqs_on(unsigned long ip) | |||
| 2585 | return; | 2598 | return; |
| 2586 | } | 2599 | } |
| 2587 | 2600 | ||
| 2601 | current->lockdep_recursion = 1; | ||
| 2588 | /* | 2602 | /* |
| 2589 | * We'll do an OFF -> ON transition: | 2603 | * We'll do an OFF -> ON transition: |
| 2590 | */ | 2604 | */ |
| @@ -2599,6 +2613,7 @@ void trace_softirqs_on(unsigned long ip) | |||
| 2599 | */ | 2613 | */ |
| 2600 | if (curr->hardirqs_enabled) | 2614 | if (curr->hardirqs_enabled) |
| 2601 | mark_held_locks(curr, SOFTIRQ); | 2615 | mark_held_locks(curr, SOFTIRQ); |
| 2616 | current->lockdep_recursion = 0; | ||
| 2602 | } | 2617 | } |
| 2603 | 2618 | ||
| 2604 | /* | 2619 | /* |
| @@ -2608,7 +2623,7 @@ void trace_softirqs_off(unsigned long ip) | |||
| 2608 | { | 2623 | { |
| 2609 | struct task_struct *curr = current; | 2624 | struct task_struct *curr = current; |
| 2610 | 2625 | ||
| 2611 | if (unlikely(!debug_locks)) | 2626 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
| 2612 | return; | 2627 | return; |
| 2613 | 2628 | ||
| 2614 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2629 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| @@ -2859,10 +2874,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 2859 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2874 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
| 2860 | struct lock_class_key *key, int subclass) | 2875 | struct lock_class_key *key, int subclass) |
| 2861 | { | 2876 | { |
| 2862 | int i; | 2877 | memset(lock, 0, sizeof(*lock)); |
| 2863 | |||
| 2864 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) | ||
| 2865 | lock->class_cache[i] = NULL; | ||
| 2866 | 2878 | ||
| 2867 | #ifdef CONFIG_LOCK_STAT | 2879 | #ifdef CONFIG_LOCK_STAT |
| 2868 | lock->cpu = raw_smp_processor_id(); | 2880 | lock->cpu = raw_smp_processor_id(); |
| @@ -3426,7 +3438,7 @@ int lock_is_held(struct lockdep_map *lock) | |||
| 3426 | int ret = 0; | 3438 | int ret = 0; |
| 3427 | 3439 | ||
| 3428 | if (unlikely(current->lockdep_recursion)) | 3440 | if (unlikely(current->lockdep_recursion)) |
| 3429 | return ret; | 3441 | return 1; /* avoid false negative lockdep_assert_held() */ |
| 3430 | 3442 | ||
| 3431 | raw_local_irq_save(flags); | 3443 | raw_local_irq_save(flags); |
| 3432 | check_flags(flags); | 3444 | check_flags(flags); |
diff --git a/kernel/module.c b/kernel/module.c index 795bdc7f5c3f..04379f92f843 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -545,9 +545,9 @@ static void setup_modinfo_##field(struct module *mod, const char *s) \ | |||
| 545 | mod->field = kstrdup(s, GFP_KERNEL); \ | 545 | mod->field = kstrdup(s, GFP_KERNEL); \ |
| 546 | } \ | 546 | } \ |
| 547 | static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ | 547 | static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ |
| 548 | struct module *mod, char *buffer) \ | 548 | struct module_kobject *mk, char *buffer) \ |
| 549 | { \ | 549 | { \ |
| 550 | return sprintf(buffer, "%s\n", mod->field); \ | 550 | return sprintf(buffer, "%s\n", mk->mod->field); \ |
| 551 | } \ | 551 | } \ |
| 552 | static int modinfo_##field##_exists(struct module *mod) \ | 552 | static int modinfo_##field##_exists(struct module *mod) \ |
| 553 | { \ | 553 | { \ |
| @@ -902,9 +902,9 @@ void symbol_put_addr(void *addr) | |||
| 902 | EXPORT_SYMBOL_GPL(symbol_put_addr); | 902 | EXPORT_SYMBOL_GPL(symbol_put_addr); |
| 903 | 903 | ||
| 904 | static ssize_t show_refcnt(struct module_attribute *mattr, | 904 | static ssize_t show_refcnt(struct module_attribute *mattr, |
| 905 | struct module *mod, char *buffer) | 905 | struct module_kobject *mk, char *buffer) |
| 906 | { | 906 | { |
| 907 | return sprintf(buffer, "%u\n", module_refcount(mod)); | 907 | return sprintf(buffer, "%u\n", module_refcount(mk->mod)); |
| 908 | } | 908 | } |
| 909 | 909 | ||
| 910 | static struct module_attribute refcnt = { | 910 | static struct module_attribute refcnt = { |
| @@ -952,11 +952,11 @@ static inline int module_unload_init(struct module *mod) | |||
| 952 | #endif /* CONFIG_MODULE_UNLOAD */ | 952 | #endif /* CONFIG_MODULE_UNLOAD */ |
| 953 | 953 | ||
| 954 | static ssize_t show_initstate(struct module_attribute *mattr, | 954 | static ssize_t show_initstate(struct module_attribute *mattr, |
| 955 | struct module *mod, char *buffer) | 955 | struct module_kobject *mk, char *buffer) |
| 956 | { | 956 | { |
| 957 | const char *state = "unknown"; | 957 | const char *state = "unknown"; |
| 958 | 958 | ||
| 959 | switch (mod->state) { | 959 | switch (mk->mod->state) { |
| 960 | case MODULE_STATE_LIVE: | 960 | case MODULE_STATE_LIVE: |
| 961 | state = "live"; | 961 | state = "live"; |
| 962 | break; | 962 | break; |
| @@ -975,10 +975,27 @@ static struct module_attribute initstate = { | |||
| 975 | .show = show_initstate, | 975 | .show = show_initstate, |
| 976 | }; | 976 | }; |
| 977 | 977 | ||
| 978 | static ssize_t store_uevent(struct module_attribute *mattr, | ||
| 979 | struct module_kobject *mk, | ||
| 980 | const char *buffer, size_t count) | ||
| 981 | { | ||
| 982 | enum kobject_action action; | ||
| 983 | |||
| 984 | if (kobject_action_type(buffer, count, &action) == 0) | ||
| 985 | kobject_uevent(&mk->kobj, action); | ||
| 986 | return count; | ||
| 987 | } | ||
| 988 | |||
| 989 | struct module_attribute module_uevent = { | ||
| 990 | .attr = { .name = "uevent", .mode = 0200 }, | ||
| 991 | .store = store_uevent, | ||
| 992 | }; | ||
| 993 | |||
| 978 | static struct module_attribute *modinfo_attrs[] = { | 994 | static struct module_attribute *modinfo_attrs[] = { |
| 979 | &modinfo_version, | 995 | &modinfo_version, |
| 980 | &modinfo_srcversion, | 996 | &modinfo_srcversion, |
| 981 | &initstate, | 997 | &initstate, |
| 998 | &module_uevent, | ||
| 982 | #ifdef CONFIG_MODULE_UNLOAD | 999 | #ifdef CONFIG_MODULE_UNLOAD |
| 983 | &refcnt, | 1000 | &refcnt, |
| 984 | #endif | 1001 | #endif |
| @@ -1187,7 +1204,7 @@ struct module_sect_attrs | |||
| 1187 | }; | 1204 | }; |
| 1188 | 1205 | ||
| 1189 | static ssize_t module_sect_show(struct module_attribute *mattr, | 1206 | static ssize_t module_sect_show(struct module_attribute *mattr, |
| 1190 | struct module *mod, char *buf) | 1207 | struct module_kobject *mk, char *buf) |
| 1191 | { | 1208 | { |
| 1192 | struct module_sect_attr *sattr = | 1209 | struct module_sect_attr *sattr = |
| 1193 | container_of(mattr, struct module_sect_attr, mattr); | 1210 | container_of(mattr, struct module_sect_attr, mattr); |
| @@ -1697,6 +1714,15 @@ static void unset_module_core_ro_nx(struct module *mod) { } | |||
| 1697 | static void unset_module_init_ro_nx(struct module *mod) { } | 1714 | static void unset_module_init_ro_nx(struct module *mod) { } |
| 1698 | #endif | 1715 | #endif |
| 1699 | 1716 | ||
| 1717 | void __weak module_free(struct module *mod, void *module_region) | ||
| 1718 | { | ||
| 1719 | vfree(module_region); | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | void __weak module_arch_cleanup(struct module *mod) | ||
| 1723 | { | ||
| 1724 | } | ||
| 1725 | |||
| 1700 | /* Free a module, remove from lists, etc. */ | 1726 | /* Free a module, remove from lists, etc. */ |
| 1701 | static void free_module(struct module *mod) | 1727 | static void free_module(struct module *mod) |
| 1702 | { | 1728 | { |
| @@ -1851,6 +1877,26 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) | |||
| 1851 | return ret; | 1877 | return ret; |
| 1852 | } | 1878 | } |
| 1853 | 1879 | ||
| 1880 | int __weak apply_relocate(Elf_Shdr *sechdrs, | ||
| 1881 | const char *strtab, | ||
| 1882 | unsigned int symindex, | ||
| 1883 | unsigned int relsec, | ||
| 1884 | struct module *me) | ||
| 1885 | { | ||
| 1886 | pr_err("module %s: REL relocation unsupported\n", me->name); | ||
| 1887 | return -ENOEXEC; | ||
| 1888 | } | ||
| 1889 | |||
| 1890 | int __weak apply_relocate_add(Elf_Shdr *sechdrs, | ||
| 1891 | const char *strtab, | ||
| 1892 | unsigned int symindex, | ||
| 1893 | unsigned int relsec, | ||
| 1894 | struct module *me) | ||
| 1895 | { | ||
| 1896 | pr_err("module %s: RELA relocation unsupported\n", me->name); | ||
| 1897 | return -ENOEXEC; | ||
| 1898 | } | ||
| 1899 | |||
| 1854 | static int apply_relocations(struct module *mod, const struct load_info *info) | 1900 | static int apply_relocations(struct module *mod, const struct load_info *info) |
| 1855 | { | 1901 | { |
| 1856 | unsigned int i; | 1902 | unsigned int i; |
| @@ -2235,6 +2281,11 @@ static void dynamic_debug_remove(struct _ddebug *debug) | |||
| 2235 | ddebug_remove_module(debug->modname); | 2281 | ddebug_remove_module(debug->modname); |
| 2236 | } | 2282 | } |
| 2237 | 2283 | ||
| 2284 | void * __weak module_alloc(unsigned long size) | ||
| 2285 | { | ||
| 2286 | return size == 0 ? NULL : vmalloc_exec(size); | ||
| 2287 | } | ||
| 2288 | |||
| 2238 | static void *module_alloc_update_bounds(unsigned long size) | 2289 | static void *module_alloc_update_bounds(unsigned long size) |
| 2239 | { | 2290 | { |
| 2240 | void *ret = module_alloc(size); | 2291 | void *ret = module_alloc(size); |
| @@ -2645,6 +2696,14 @@ static void flush_module_icache(const struct module *mod) | |||
| 2645 | set_fs(old_fs); | 2696 | set_fs(old_fs); |
| 2646 | } | 2697 | } |
| 2647 | 2698 | ||
| 2699 | int __weak module_frob_arch_sections(Elf_Ehdr *hdr, | ||
| 2700 | Elf_Shdr *sechdrs, | ||
| 2701 | char *secstrings, | ||
| 2702 | struct module *mod) | ||
| 2703 | { | ||
| 2704 | return 0; | ||
| 2705 | } | ||
| 2706 | |||
| 2648 | static struct module *layout_and_allocate(struct load_info *info) | 2707 | static struct module *layout_and_allocate(struct load_info *info) |
| 2649 | { | 2708 | { |
| 2650 | /* Module within temporary copy. */ | 2709 | /* Module within temporary copy. */ |
| @@ -2716,6 +2775,13 @@ static void module_deallocate(struct module *mod, struct load_info *info) | |||
| 2716 | module_free(mod, mod->module_core); | 2775 | module_free(mod, mod->module_core); |
| 2717 | } | 2776 | } |
| 2718 | 2777 | ||
| 2778 | int __weak module_finalize(const Elf_Ehdr *hdr, | ||
| 2779 | const Elf_Shdr *sechdrs, | ||
| 2780 | struct module *me) | ||
| 2781 | { | ||
| 2782 | return 0; | ||
| 2783 | } | ||
| 2784 | |||
| 2719 | static int post_relocation(struct module *mod, const struct load_info *info) | 2785 | static int post_relocation(struct module *mod, const struct load_info *info) |
| 2720 | { | 2786 | { |
| 2721 | /* Sort exception table now relocations are done. */ | 2787 | /* Sort exception table now relocations are done. */ |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 2488ba7eb568..8d7b435806c9 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
| @@ -525,37 +525,6 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh) | |||
| 525 | } | 525 | } |
| 526 | EXPORT_SYMBOL_GPL(srcu_init_notifier_head); | 526 | EXPORT_SYMBOL_GPL(srcu_init_notifier_head); |
| 527 | 527 | ||
| 528 | /** | ||
| 529 | * register_reboot_notifier - Register function to be called at reboot time | ||
| 530 | * @nb: Info about notifier function to be called | ||
| 531 | * | ||
| 532 | * Registers a function with the list of functions | ||
| 533 | * to be called at reboot time. | ||
| 534 | * | ||
| 535 | * Currently always returns zero, as blocking_notifier_chain_register() | ||
| 536 | * always returns zero. | ||
| 537 | */ | ||
| 538 | int register_reboot_notifier(struct notifier_block *nb) | ||
| 539 | { | ||
| 540 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | ||
| 541 | } | ||
| 542 | EXPORT_SYMBOL(register_reboot_notifier); | ||
| 543 | |||
| 544 | /** | ||
| 545 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | ||
| 546 | * @nb: Hook to be unregistered | ||
| 547 | * | ||
| 548 | * Unregisters a previously registered reboot | ||
| 549 | * notifier function. | ||
| 550 | * | ||
| 551 | * Returns zero on success, or %-ENOENT on failure. | ||
| 552 | */ | ||
| 553 | int unregister_reboot_notifier(struct notifier_block *nb) | ||
| 554 | { | ||
| 555 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | ||
| 556 | } | ||
| 557 | EXPORT_SYMBOL(unregister_reboot_notifier); | ||
| 558 | |||
| 559 | static ATOMIC_NOTIFIER_HEAD(die_chain); | 528 | static ATOMIC_NOTIFIER_HEAD(die_chain); |
| 560 | 529 | ||
| 561 | int notrace __kprobes notify_die(enum die_val val, const char *str, | 530 | int notrace __kprobes notify_die(enum die_val val, const char *str, |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index d6a00f3de15d..9aeab4b98c64 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
| @@ -271,10 +271,8 @@ out: | |||
| 271 | return err; | 271 | return err; |
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | static int __init nsproxy_cache_init(void) | 274 | int __init nsproxy_cache_init(void) |
| 275 | { | 275 | { |
| 276 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); | 276 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); |
| 277 | return 0; | 277 | return 0; |
| 278 | } | 278 | } |
| 279 | |||
| 280 | module_init(nsproxy_cache_init); | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 69231670eb95..d7bb6974efb5 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -119,6 +119,8 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
| 119 | } | 119 | } |
| 120 | mdelay(PANIC_TIMER_STEP); | 120 | mdelay(PANIC_TIMER_STEP); |
| 121 | } | 121 | } |
| 122 | } | ||
| 123 | if (panic_timeout != 0) { | ||
| 122 | /* | 124 | /* |
| 123 | * This will not be a clean reboot, with everything | 125 | * This will not be a clean reboot, with everything |
| 124 | * shutting down. But if there is a chance of | 126 | * shutting down. But if there is a chance of |
diff --git a/kernel/params.c b/kernel/params.c index ed72e1330862..22df3e0d142a 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -225,8 +225,8 @@ int parse_args(const char *name, | |||
| 225 | int ret; \ | 225 | int ret; \ |
| 226 | \ | 226 | \ |
| 227 | ret = strtolfn(val, 0, &l); \ | 227 | ret = strtolfn(val, 0, &l); \ |
| 228 | if (ret == -EINVAL || ((type)l != l)) \ | 228 | if (ret < 0 || ((type)l != l)) \ |
| 229 | return -EINVAL; \ | 229 | return ret < 0 ? ret : -EINVAL; \ |
| 230 | *((type *)kp->arg) = l; \ | 230 | *((type *)kp->arg) = l; \ |
| 231 | return 0; \ | 231 | return 0; \ |
| 232 | } \ | 232 | } \ |
| @@ -511,7 +511,7 @@ struct module_param_attrs | |||
| 511 | #define to_param_attr(n) container_of(n, struct param_attribute, mattr) | 511 | #define to_param_attr(n) container_of(n, struct param_attribute, mattr) |
| 512 | 512 | ||
| 513 | static ssize_t param_attr_show(struct module_attribute *mattr, | 513 | static ssize_t param_attr_show(struct module_attribute *mattr, |
| 514 | struct module *mod, char *buf) | 514 | struct module_kobject *mk, char *buf) |
| 515 | { | 515 | { |
| 516 | int count; | 516 | int count; |
| 517 | struct param_attribute *attribute = to_param_attr(mattr); | 517 | struct param_attribute *attribute = to_param_attr(mattr); |
| @@ -531,7 +531,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr, | |||
| 531 | 531 | ||
| 532 | /* sysfs always hands a nul-terminated string in buf. We rely on that. */ | 532 | /* sysfs always hands a nul-terminated string in buf. We rely on that. */ |
| 533 | static ssize_t param_attr_store(struct module_attribute *mattr, | 533 | static ssize_t param_attr_store(struct module_attribute *mattr, |
| 534 | struct module *owner, | 534 | struct module_kobject *km, |
| 535 | const char *buf, size_t len) | 535 | const char *buf, size_t len) |
| 536 | { | 536 | { |
| 537 | int err; | 537 | int err; |
| @@ -730,6 +730,10 @@ static struct module_kobject * __init locate_module_kobject(const char *name) | |||
| 730 | mk->kobj.kset = module_kset; | 730 | mk->kobj.kset = module_kset; |
| 731 | err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, | 731 | err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, |
| 732 | "%s", name); | 732 | "%s", name); |
| 733 | #ifdef CONFIG_MODULES | ||
| 734 | if (!err) | ||
| 735 | err = sysfs_create_file(&mk->kobj, &module_uevent.attr); | ||
| 736 | #endif | ||
| 733 | if (err) { | 737 | if (err) { |
| 734 | kobject_put(&mk->kobj); | 738 | kobject_put(&mk->kobj); |
| 735 | printk(KERN_ERR | 739 | printk(KERN_ERR |
| @@ -807,7 +811,7 @@ static void __init param_sysfs_builtin(void) | |||
| 807 | } | 811 | } |
| 808 | 812 | ||
| 809 | ssize_t __modver_version_show(struct module_attribute *mattr, | 813 | ssize_t __modver_version_show(struct module_attribute *mattr, |
| 810 | struct module *mod, char *buf) | 814 | struct module_kobject *mk, char *buf) |
| 811 | { | 815 | { |
| 812 | struct module_version_attribute *vattr = | 816 | struct module_version_attribute *vattr = |
| 813 | container_of(mattr, struct module_version_attribute, mattr); | 817 | container_of(mattr, struct module_version_attribute, mattr); |
| @@ -852,7 +856,7 @@ static ssize_t module_attr_show(struct kobject *kobj, | |||
| 852 | if (!attribute->show) | 856 | if (!attribute->show) |
| 853 | return -EIO; | 857 | return -EIO; |
| 854 | 858 | ||
| 855 | ret = attribute->show(attribute, mk->mod, buf); | 859 | ret = attribute->show(attribute, mk, buf); |
| 856 | 860 | ||
| 857 | return ret; | 861 | return ret; |
| 858 | } | 862 | } |
| @@ -871,7 +875,7 @@ static ssize_t module_attr_store(struct kobject *kobj, | |||
| 871 | if (!attribute->store) | 875 | if (!attribute->store) |
| 872 | return -EIO; | 876 | return -EIO; |
| 873 | 877 | ||
| 874 | ret = attribute->store(attribute, mk->mod, buf, len); | 878 | ret = attribute->store(attribute, mk, buf, len); |
| 875 | 879 | ||
| 876 | return ret; | 880 | return ret; |
| 877 | } | 881 | } |
diff --git a/kernel/pid.c b/kernel/pid.c index 57a8346a270e..e432057f3b21 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -405,7 +405,6 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) | |||
| 405 | if (pid) { | 405 | if (pid) { |
| 406 | struct hlist_node *first; | 406 | struct hlist_node *first; |
| 407 | first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), | 407 | first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), |
| 408 | rcu_read_lock_held() || | ||
| 409 | lockdep_tasklist_lock_is_held()); | 408 | lockdep_tasklist_lock_is_held()); |
| 410 | if (first) | 409 | if (first) |
| 411 | result = hlist_entry(first, struct task_struct, pids[(type)].node); | 410 | result = hlist_entry(first, struct task_struct, pids[(type)].node); |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 6824ca7d4d0c..37f05d0f0793 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
| @@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(pm_qos_lock); | |||
| 74 | static struct pm_qos_object null_pm_qos; | 74 | static struct pm_qos_object null_pm_qos; |
| 75 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); | 75 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); |
| 76 | static struct pm_qos_object cpu_dma_pm_qos = { | 76 | static struct pm_qos_object cpu_dma_pm_qos = { |
| 77 | .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), | 77 | .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests), |
| 78 | .notifiers = &cpu_dma_lat_notifier, | 78 | .notifiers = &cpu_dma_lat_notifier, |
| 79 | .name = "cpu_dma_latency", | 79 | .name = "cpu_dma_latency", |
| 80 | .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, | 80 | .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, |
| @@ -84,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = { | |||
| 84 | 84 | ||
| 85 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); | 85 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); |
| 86 | static struct pm_qos_object network_lat_pm_qos = { | 86 | static struct pm_qos_object network_lat_pm_qos = { |
| 87 | .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), | 87 | .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests), |
| 88 | .notifiers = &network_lat_notifier, | 88 | .notifiers = &network_lat_notifier, |
| 89 | .name = "network_latency", | 89 | .name = "network_latency", |
| 90 | .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, | 90 | .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, |
| @@ -95,7 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = { | |||
| 95 | 95 | ||
| 96 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); | 96 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); |
| 97 | static struct pm_qos_object network_throughput_pm_qos = { | 97 | static struct pm_qos_object network_throughput_pm_qos = { |
| 98 | .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), | 98 | .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests), |
| 99 | .notifiers = &network_throughput_notifier, | 99 | .notifiers = &network_throughput_notifier, |
| 100 | .name = "network_throughput", | 100 | .name = "network_throughput", |
| 101 | .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, | 101 | .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 87f4d24b55b0..b1914cb9095c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -193,8 +193,8 @@ config APM_EMULATION | |||
| 193 | notification of APM "events" (e.g. battery status change). | 193 | notification of APM "events" (e.g. battery status change). |
| 194 | 194 | ||
| 195 | In order to use APM, you will need supporting software. For location | 195 | In order to use APM, you will need supporting software. For location |
| 196 | and more information, read <file:Documentation/power/pm.txt> and the | 196 | and more information, read <file:Documentation/power/apm-acpi.txt> |
| 197 | Battery Powered Linux mini-HOWTO, available from | 197 | and the Battery Powered Linux mini-HOWTO, available from |
| 198 | <http://www.tldp.org/docs.html#howto>. | 198 | <http://www.tldp.org/docs.html#howto>. |
| 199 | 199 | ||
| 200 | This driver does not spin down disk drives (see the hdparm(8) | 200 | This driver does not spin down disk drives (see the hdparm(8) |
| @@ -224,6 +224,10 @@ config PM_OPP | |||
| 224 | implementations a ready to use framework to manage OPPs. | 224 | implementations a ready to use framework to manage OPPs. |
| 225 | For more information, read <file:Documentation/power/opp.txt> | 225 | For more information, read <file:Documentation/power/opp.txt> |
| 226 | 226 | ||
| 227 | config PM_RUNTIME_CLK | 227 | config PM_CLK |
| 228 | def_bool y | 228 | def_bool y |
| 229 | depends on PM_RUNTIME && HAVE_CLK | 229 | depends on PM && HAVE_CLK |
| 230 | |||
| 231 | config PM_GENERIC_DOMAINS | ||
| 232 | bool | ||
| 233 | depends on PM | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 2981af4ce7cb..6c601f871964 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -37,8 +37,9 @@ EXPORT_SYMBOL_GPL(unregister_pm_notifier); | |||
| 37 | 37 | ||
| 38 | int pm_notifier_call_chain(unsigned long val) | 38 | int pm_notifier_call_chain(unsigned long val) |
| 39 | { | 39 | { |
| 40 | return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) | 40 | int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL); |
| 41 | == NOTIFY_BAD) ? -EINVAL : 0; | 41 | |
| 42 | return notifier_to_errno(ret); | ||
| 42 | } | 43 | } |
| 43 | 44 | ||
| 44 | /* If set, devices may be suspended and resumed asynchronously. */ | 45 | /* If set, devices may be suspended and resumed asynchronously. */ |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ace55889f702..06efa54f93d6 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -1211,7 +1211,11 @@ static void free_unnecessary_pages(void) | |||
| 1211 | to_free_highmem = alloc_highmem - save; | 1211 | to_free_highmem = alloc_highmem - save; |
| 1212 | } else { | 1212 | } else { |
| 1213 | to_free_highmem = 0; | 1213 | to_free_highmem = 0; |
| 1214 | to_free_normal -= save - alloc_highmem; | 1214 | save -= alloc_highmem; |
| 1215 | if (to_free_normal > save) | ||
| 1216 | to_free_normal -= save; | ||
| 1217 | else | ||
| 1218 | to_free_normal = 0; | ||
| 1215 | } | 1219 | } |
| 1216 | 1220 | ||
| 1217 | memory_bm_position_reset(©_bm); | 1221 | memory_bm_position_reset(©_bm); |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1c41ba215419..b6b71ad2208f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -44,6 +44,7 @@ void suspend_set_ops(const struct platform_suspend_ops *ops) | |||
| 44 | suspend_ops = ops; | 44 | suspend_ops = ops; |
| 45 | mutex_unlock(&pm_mutex); | 45 | mutex_unlock(&pm_mutex); |
| 46 | } | 46 | } |
| 47 | EXPORT_SYMBOL_GPL(suspend_set_ops); | ||
| 47 | 48 | ||
| 48 | bool valid_state(suspend_state_t state) | 49 | bool valid_state(suspend_state_t state) |
| 49 | { | 50 | { |
| @@ -65,6 +66,7 @@ int suspend_valid_only_mem(suspend_state_t state) | |||
| 65 | { | 66 | { |
| 66 | return state == PM_SUSPEND_MEM; | 67 | return state == PM_SUSPEND_MEM; |
| 67 | } | 68 | } |
| 69 | EXPORT_SYMBOL_GPL(suspend_valid_only_mem); | ||
| 68 | 70 | ||
| 69 | static int suspend_test(int level) | 71 | static int suspend_test(int level) |
| 70 | { | 72 | { |
| @@ -126,12 +128,13 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) | |||
| 126 | } | 128 | } |
| 127 | 129 | ||
| 128 | /** | 130 | /** |
| 129 | * suspend_enter - enter the desired system sleep state. | 131 | * suspend_enter - enter the desired system sleep state. |
| 130 | * @state: state to enter | 132 | * @state: State to enter |
| 133 | * @wakeup: Returns information that suspend should not be entered again. | ||
| 131 | * | 134 | * |
| 132 | * This function should be called after devices have been suspended. | 135 | * This function should be called after devices have been suspended. |
| 133 | */ | 136 | */ |
| 134 | static int suspend_enter(suspend_state_t state) | 137 | static int suspend_enter(suspend_state_t state, bool *wakeup) |
| 135 | { | 138 | { |
| 136 | int error; | 139 | int error; |
| 137 | 140 | ||
| @@ -165,7 +168,8 @@ static int suspend_enter(suspend_state_t state) | |||
| 165 | 168 | ||
| 166 | error = syscore_suspend(); | 169 | error = syscore_suspend(); |
| 167 | if (!error) { | 170 | if (!error) { |
| 168 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { | 171 | *wakeup = pm_wakeup_pending(); |
| 172 | if (!(suspend_test(TEST_CORE) || *wakeup)) { | ||
| 169 | error = suspend_ops->enter(state); | 173 | error = suspend_ops->enter(state); |
| 170 | events_check_enabled = false; | 174 | events_check_enabled = false; |
| 171 | } | 175 | } |
| @@ -199,6 +203,7 @@ static int suspend_enter(suspend_state_t state) | |||
| 199 | int suspend_devices_and_enter(suspend_state_t state) | 203 | int suspend_devices_and_enter(suspend_state_t state) |
| 200 | { | 204 | { |
| 201 | int error; | 205 | int error; |
| 206 | bool wakeup = false; | ||
| 202 | 207 | ||
| 203 | if (!suspend_ops) | 208 | if (!suspend_ops) |
| 204 | return -ENOSYS; | 209 | return -ENOSYS; |
| @@ -220,7 +225,10 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
| 220 | if (suspend_test(TEST_DEVICES)) | 225 | if (suspend_test(TEST_DEVICES)) |
| 221 | goto Recover_platform; | 226 | goto Recover_platform; |
| 222 | 227 | ||
| 223 | error = suspend_enter(state); | 228 | do { |
| 229 | error = suspend_enter(state, &wakeup); | ||
| 230 | } while (!error && !wakeup | ||
| 231 | && suspend_ops->suspend_again && suspend_ops->suspend_again()); | ||
| 224 | 232 | ||
| 225 | Resume_devices: | 233 | Resume_devices: |
| 226 | suspend_test_start(); | 234 | suspend_test_start(); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 7d02d33be699..42ddbc6f0de6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -113,8 +113,10 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 113 | if (error) | 113 | if (error) |
| 114 | pm_notifier_call_chain(PM_POST_RESTORE); | 114 | pm_notifier_call_chain(PM_POST_RESTORE); |
| 115 | } | 115 | } |
| 116 | if (error) | 116 | if (error) { |
| 117 | free_basic_memory_bitmaps(); | ||
| 117 | atomic_inc(&snapshot_device_available); | 118 | atomic_inc(&snapshot_device_available); |
| 119 | } | ||
| 118 | data->frozen = 0; | 120 | data->frozen = 0; |
| 119 | data->ready = 0; | 121 | data->ready = 0; |
| 120 | data->platform_support = 0; | 122 | data->platform_support = 0; |
diff --git a/kernel/printk.c b/kernel/printk.c index 35185392173f..37dff3429adb 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -782,7 +782,7 @@ static inline int can_use_console(unsigned int cpu) | |||
| 782 | static int console_trylock_for_printk(unsigned int cpu) | 782 | static int console_trylock_for_printk(unsigned int cpu) |
| 783 | __releases(&logbuf_lock) | 783 | __releases(&logbuf_lock) |
| 784 | { | 784 | { |
| 785 | int retval = 0; | 785 | int retval = 0, wake = 0; |
| 786 | 786 | ||
| 787 | if (console_trylock()) { | 787 | if (console_trylock()) { |
| 788 | retval = 1; | 788 | retval = 1; |
| @@ -795,12 +795,14 @@ static int console_trylock_for_printk(unsigned int cpu) | |||
| 795 | */ | 795 | */ |
| 796 | if (!can_use_console(cpu)) { | 796 | if (!can_use_console(cpu)) { |
| 797 | console_locked = 0; | 797 | console_locked = 0; |
| 798 | up(&console_sem); | 798 | wake = 1; |
| 799 | retval = 0; | 799 | retval = 0; |
| 800 | } | 800 | } |
| 801 | } | 801 | } |
| 802 | printk_cpu = UINT_MAX; | 802 | printk_cpu = UINT_MAX; |
| 803 | spin_unlock(&logbuf_lock); | 803 | spin_unlock(&logbuf_lock); |
| 804 | if (wake) | ||
| 805 | up(&console_sem); | ||
| 804 | return retval; | 806 | return retval; |
| 805 | } | 807 | } |
| 806 | static const char recursion_bug_msg [] = | 808 | static const char recursion_bug_msg [] = |
| @@ -1242,7 +1244,7 @@ void console_unlock(void) | |||
| 1242 | { | 1244 | { |
| 1243 | unsigned long flags; | 1245 | unsigned long flags; |
| 1244 | unsigned _con_start, _log_end; | 1246 | unsigned _con_start, _log_end; |
| 1245 | unsigned wake_klogd = 0; | 1247 | unsigned wake_klogd = 0, retry = 0; |
| 1246 | 1248 | ||
| 1247 | if (console_suspended) { | 1249 | if (console_suspended) { |
| 1248 | up(&console_sem); | 1250 | up(&console_sem); |
| @@ -1251,6 +1253,7 @@ void console_unlock(void) | |||
| 1251 | 1253 | ||
| 1252 | console_may_schedule = 0; | 1254 | console_may_schedule = 0; |
| 1253 | 1255 | ||
| 1256 | again: | ||
| 1254 | for ( ; ; ) { | 1257 | for ( ; ; ) { |
| 1255 | spin_lock_irqsave(&logbuf_lock, flags); | 1258 | spin_lock_irqsave(&logbuf_lock, flags); |
| 1256 | wake_klogd |= log_start - log_end; | 1259 | wake_klogd |= log_start - log_end; |
| @@ -1271,8 +1274,23 @@ void console_unlock(void) | |||
| 1271 | if (unlikely(exclusive_console)) | 1274 | if (unlikely(exclusive_console)) |
| 1272 | exclusive_console = NULL; | 1275 | exclusive_console = NULL; |
| 1273 | 1276 | ||
| 1277 | spin_unlock(&logbuf_lock); | ||
| 1278 | |||
| 1274 | up(&console_sem); | 1279 | up(&console_sem); |
| 1280 | |||
| 1281 | /* | ||
| 1282 | * Someone could have filled up the buffer again, so re-check if there's | ||
| 1283 | * something to flush. In case we cannot trylock the console_sem again, | ||
| 1284 | * there's a new owner and the console_unlock() from them will do the | ||
| 1285 | * flush, no worries. | ||
| 1286 | */ | ||
| 1287 | spin_lock(&logbuf_lock); | ||
| 1288 | if (con_start != log_end) | ||
| 1289 | retry = 1; | ||
| 1275 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1290 | spin_unlock_irqrestore(&logbuf_lock, flags); |
| 1291 | if (retry && console_trylock()) | ||
| 1292 | goto again; | ||
| 1293 | |||
| 1276 | if (wake_klogd) | 1294 | if (wake_klogd) |
| 1277 | wake_up_klogd(); | 1295 | wake_up_klogd(); |
| 1278 | } | 1296 | } |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 2df115790cd9..9de3ecfd20f9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -23,8 +23,15 @@ | |||
| 23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
| 24 | #include <linux/regset.h> | 24 | #include <linux/regset.h> |
| 25 | #include <linux/hw_breakpoint.h> | 25 | #include <linux/hw_breakpoint.h> |
| 26 | #include <linux/cn_proc.h> | ||
| 26 | 27 | ||
| 27 | 28 | ||
| 29 | static int ptrace_trapping_sleep_fn(void *flags) | ||
| 30 | { | ||
| 31 | schedule(); | ||
| 32 | return 0; | ||
| 33 | } | ||
| 34 | |||
| 28 | /* | 35 | /* |
| 29 | * ptrace a task: make the debugger its new parent and | 36 | * ptrace a task: make the debugger its new parent and |
| 30 | * move it to the ptrace list. | 37 | * move it to the ptrace list. |
| @@ -77,13 +84,20 @@ void __ptrace_unlink(struct task_struct *child) | |||
| 77 | spin_lock(&child->sighand->siglock); | 84 | spin_lock(&child->sighand->siglock); |
| 78 | 85 | ||
| 79 | /* | 86 | /* |
| 80 | * Reinstate GROUP_STOP_PENDING if group stop is in effect and | 87 | * Clear all pending traps and TRAPPING. TRAPPING should be |
| 88 | * cleared regardless of JOBCTL_STOP_PENDING. Do it explicitly. | ||
| 89 | */ | ||
| 90 | task_clear_jobctl_pending(child, JOBCTL_TRAP_MASK); | ||
| 91 | task_clear_jobctl_trapping(child); | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Reinstate JOBCTL_STOP_PENDING if group stop is in effect and | ||
| 81 | * @child isn't dead. | 95 | * @child isn't dead. |
| 82 | */ | 96 | */ |
| 83 | if (!(child->flags & PF_EXITING) && | 97 | if (!(child->flags & PF_EXITING) && |
| 84 | (child->signal->flags & SIGNAL_STOP_STOPPED || | 98 | (child->signal->flags & SIGNAL_STOP_STOPPED || |
| 85 | child->signal->group_stop_count)) | 99 | child->signal->group_stop_count)) |
| 86 | child->group_stop |= GROUP_STOP_PENDING; | 100 | child->jobctl |= JOBCTL_STOP_PENDING; |
| 87 | 101 | ||
| 88 | /* | 102 | /* |
| 89 | * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick | 103 | * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick |
| @@ -91,16 +105,30 @@ void __ptrace_unlink(struct task_struct *child) | |||
| 91 | * is in TASK_TRACED; otherwise, we might unduly disrupt | 105 | * is in TASK_TRACED; otherwise, we might unduly disrupt |
| 92 | * TASK_KILLABLE sleeps. | 106 | * TASK_KILLABLE sleeps. |
| 93 | */ | 107 | */ |
| 94 | if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child)) | 108 | if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) |
| 95 | signal_wake_up(child, task_is_traced(child)); | 109 | signal_wake_up(child, task_is_traced(child)); |
| 96 | 110 | ||
| 97 | spin_unlock(&child->sighand->siglock); | 111 | spin_unlock(&child->sighand->siglock); |
| 98 | } | 112 | } |
| 99 | 113 | ||
| 100 | /* | 114 | /** |
| 101 | * Check that we have indeed attached to the thing.. | 115 | * ptrace_check_attach - check whether ptracee is ready for ptrace operation |
| 116 | * @child: ptracee to check for | ||
| 117 | * @ignore_state: don't check whether @child is currently %TASK_TRACED | ||
| 118 | * | ||
| 119 | * Check whether @child is being ptraced by %current and ready for further | ||
| 120 | * ptrace operations. If @ignore_state is %false, @child also should be in | ||
| 121 | * %TASK_TRACED state and on return the child is guaranteed to be traced | ||
| 122 | * and not executing. If @ignore_state is %true, @child can be in any | ||
| 123 | * state. | ||
| 124 | * | ||
| 125 | * CONTEXT: | ||
| 126 | * Grabs and releases tasklist_lock and @child->sighand->siglock. | ||
| 127 | * | ||
| 128 | * RETURNS: | ||
| 129 | * 0 on success, -ESRCH if %child is not ready. | ||
| 102 | */ | 130 | */ |
| 103 | int ptrace_check_attach(struct task_struct *child, int kill) | 131 | int ptrace_check_attach(struct task_struct *child, bool ignore_state) |
| 104 | { | 132 | { |
| 105 | int ret = -ESRCH; | 133 | int ret = -ESRCH; |
| 106 | 134 | ||
| @@ -119,13 +147,14 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
| 119 | */ | 147 | */ |
| 120 | spin_lock_irq(&child->sighand->siglock); | 148 | spin_lock_irq(&child->sighand->siglock); |
| 121 | WARN_ON_ONCE(task_is_stopped(child)); | 149 | WARN_ON_ONCE(task_is_stopped(child)); |
| 122 | if (task_is_traced(child) || kill) | 150 | if (ignore_state || (task_is_traced(child) && |
| 151 | !(child->jobctl & JOBCTL_LISTENING))) | ||
| 123 | ret = 0; | 152 | ret = 0; |
| 124 | spin_unlock_irq(&child->sighand->siglock); | 153 | spin_unlock_irq(&child->sighand->siglock); |
| 125 | } | 154 | } |
| 126 | read_unlock(&tasklist_lock); | 155 | read_unlock(&tasklist_lock); |
| 127 | 156 | ||
| 128 | if (!ret && !kill) | 157 | if (!ret && !ignore_state) |
| 129 | ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; | 158 | ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; |
| 130 | 159 | ||
| 131 | /* All systems go.. */ | 160 | /* All systems go.. */ |
| @@ -182,11 +211,28 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
| 182 | return !err; | 211 | return !err; |
| 183 | } | 212 | } |
| 184 | 213 | ||
| 185 | static int ptrace_attach(struct task_struct *task) | 214 | static int ptrace_attach(struct task_struct *task, long request, |
| 215 | unsigned long flags) | ||
| 186 | { | 216 | { |
| 187 | bool wait_trap = false; | 217 | bool seize = (request == PTRACE_SEIZE); |
| 188 | int retval; | 218 | int retval; |
| 189 | 219 | ||
| 220 | /* | ||
| 221 | * SEIZE will enable new ptrace behaviors which will be implemented | ||
| 222 | * gradually. SEIZE_DEVEL is used to prevent applications | ||
| 223 | * expecting full SEIZE behaviors trapping on kernel commits which | ||
| 224 | * are still in the process of implementing them. | ||
| 225 | * | ||
| 226 | * Only test programs for new ptrace behaviors being implemented | ||
| 227 | * should set SEIZE_DEVEL. If unset, SEIZE will fail with -EIO. | ||
| 228 | * | ||
| 229 | * Once SEIZE behaviors are completely implemented, this flag and | ||
| 230 | * the following test will be removed. | ||
| 231 | */ | ||
| 232 | retval = -EIO; | ||
| 233 | if (seize && !(flags & PTRACE_SEIZE_DEVEL)) | ||
| 234 | goto out; | ||
| 235 | |||
| 190 | audit_ptrace(task); | 236 | audit_ptrace(task); |
| 191 | 237 | ||
| 192 | retval = -EPERM; | 238 | retval = -EPERM; |
| @@ -218,16 +264,21 @@ static int ptrace_attach(struct task_struct *task) | |||
| 218 | goto unlock_tasklist; | 264 | goto unlock_tasklist; |
| 219 | 265 | ||
| 220 | task->ptrace = PT_PTRACED; | 266 | task->ptrace = PT_PTRACED; |
| 267 | if (seize) | ||
| 268 | task->ptrace |= PT_SEIZED; | ||
| 221 | if (task_ns_capable(task, CAP_SYS_PTRACE)) | 269 | if (task_ns_capable(task, CAP_SYS_PTRACE)) |
| 222 | task->ptrace |= PT_PTRACE_CAP; | 270 | task->ptrace |= PT_PTRACE_CAP; |
| 223 | 271 | ||
| 224 | __ptrace_link(task, current); | 272 | __ptrace_link(task, current); |
| 225 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); | 273 | |
| 274 | /* SEIZE doesn't trap tracee on attach */ | ||
| 275 | if (!seize) | ||
| 276 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); | ||
| 226 | 277 | ||
| 227 | spin_lock(&task->sighand->siglock); | 278 | spin_lock(&task->sighand->siglock); |
| 228 | 279 | ||
| 229 | /* | 280 | /* |
| 230 | * If the task is already STOPPED, set GROUP_STOP_PENDING and | 281 | * If the task is already STOPPED, set JOBCTL_TRAP_STOP and |
| 231 | * TRAPPING, and kick it so that it transits to TRACED. TRAPPING | 282 | * TRAPPING, and kick it so that it transits to TRACED. TRAPPING |
| 232 | * will be cleared if the child completes the transition or any | 283 | * will be cleared if the child completes the transition or any |
| 233 | * event which clears the group stop states happens. We'll wait | 284 | * event which clears the group stop states happens. We'll wait |
| @@ -243,11 +294,9 @@ static int ptrace_attach(struct task_struct *task) | |||
| 243 | * The following task_is_stopped() test is safe as both transitions | 294 | * The following task_is_stopped() test is safe as both transitions |
| 244 | * in and out of STOPPED are protected by siglock. | 295 | * in and out of STOPPED are protected by siglock. |
| 245 | */ | 296 | */ |
| 246 | if (task_is_stopped(task)) { | 297 | if (task_is_stopped(task) && |
| 247 | task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; | 298 | task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) |
| 248 | signal_wake_up(task, 1); | 299 | signal_wake_up(task, 1); |
| 249 | wait_trap = true; | ||
| 250 | } | ||
| 251 | 300 | ||
| 252 | spin_unlock(&task->sighand->siglock); | 301 | spin_unlock(&task->sighand->siglock); |
| 253 | 302 | ||
| @@ -257,9 +306,12 @@ unlock_tasklist: | |||
| 257 | unlock_creds: | 306 | unlock_creds: |
| 258 | mutex_unlock(&task->signal->cred_guard_mutex); | 307 | mutex_unlock(&task->signal->cred_guard_mutex); |
| 259 | out: | 308 | out: |
| 260 | if (wait_trap) | 309 | if (!retval) { |
| 261 | wait_event(current->signal->wait_chldexit, | 310 | wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, |
| 262 | !(task->group_stop & GROUP_STOP_TRAPPING)); | 311 | ptrace_trapping_sleep_fn, TASK_UNINTERRUPTIBLE); |
| 312 | proc_ptrace_connector(task, PTRACE_ATTACH); | ||
| 313 | } | ||
| 314 | |||
| 263 | return retval; | 315 | return retval; |
| 264 | } | 316 | } |
| 265 | 317 | ||
| @@ -322,25 +374,27 @@ static int ignoring_children(struct sighand_struct *sigh) | |||
| 322 | */ | 374 | */ |
| 323 | static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) | 375 | static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) |
| 324 | { | 376 | { |
| 377 | bool dead; | ||
| 378 | |||
| 325 | __ptrace_unlink(p); | 379 | __ptrace_unlink(p); |
| 326 | 380 | ||
| 327 | if (p->exit_state == EXIT_ZOMBIE) { | 381 | if (p->exit_state != EXIT_ZOMBIE) |
| 328 | if (!task_detached(p) && thread_group_empty(p)) { | 382 | return false; |
| 329 | if (!same_thread_group(p->real_parent, tracer)) | 383 | |
| 330 | do_notify_parent(p, p->exit_signal); | 384 | dead = !thread_group_leader(p); |
| 331 | else if (ignoring_children(tracer->sighand)) { | 385 | |
| 332 | __wake_up_parent(p, tracer); | 386 | if (!dead && thread_group_empty(p)) { |
| 333 | p->exit_signal = -1; | 387 | if (!same_thread_group(p->real_parent, tracer)) |
| 334 | } | 388 | dead = do_notify_parent(p, p->exit_signal); |
| 335 | } | 389 | else if (ignoring_children(tracer->sighand)) { |
| 336 | if (task_detached(p)) { | 390 | __wake_up_parent(p, tracer); |
| 337 | /* Mark it as in the process of being reaped. */ | 391 | dead = true; |
| 338 | p->exit_state = EXIT_DEAD; | ||
| 339 | return true; | ||
| 340 | } | 392 | } |
| 341 | } | 393 | } |
| 342 | 394 | /* Mark it as in the process of being reaped. */ | |
| 343 | return false; | 395 | if (dead) |
| 396 | p->exit_state = EXIT_DEAD; | ||
| 397 | return dead; | ||
| 344 | } | 398 | } |
| 345 | 399 | ||
| 346 | static int ptrace_detach(struct task_struct *child, unsigned int data) | 400 | static int ptrace_detach(struct task_struct *child, unsigned int data) |
| @@ -365,6 +419,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) | |||
| 365 | } | 419 | } |
| 366 | write_unlock_irq(&tasklist_lock); | 420 | write_unlock_irq(&tasklist_lock); |
| 367 | 421 | ||
| 422 | proc_ptrace_connector(child, PTRACE_DETACH); | ||
| 368 | if (unlikely(dead)) | 423 | if (unlikely(dead)) |
| 369 | release_task(child); | 424 | release_task(child); |
| 370 | 425 | ||
| @@ -611,10 +666,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, | |||
| 611 | int ptrace_request(struct task_struct *child, long request, | 666 | int ptrace_request(struct task_struct *child, long request, |
| 612 | unsigned long addr, unsigned long data) | 667 | unsigned long addr, unsigned long data) |
| 613 | { | 668 | { |
| 669 | bool seized = child->ptrace & PT_SEIZED; | ||
| 614 | int ret = -EIO; | 670 | int ret = -EIO; |
| 615 | siginfo_t siginfo; | 671 | siginfo_t siginfo, *si; |
| 616 | void __user *datavp = (void __user *) data; | 672 | void __user *datavp = (void __user *) data; |
| 617 | unsigned long __user *datalp = datavp; | 673 | unsigned long __user *datalp = datavp; |
| 674 | unsigned long flags; | ||
| 618 | 675 | ||
| 619 | switch (request) { | 676 | switch (request) { |
| 620 | case PTRACE_PEEKTEXT: | 677 | case PTRACE_PEEKTEXT: |
| @@ -647,6 +704,62 @@ int ptrace_request(struct task_struct *child, long request, | |||
| 647 | ret = ptrace_setsiginfo(child, &siginfo); | 704 | ret = ptrace_setsiginfo(child, &siginfo); |
| 648 | break; | 705 | break; |
| 649 | 706 | ||
| 707 | case PTRACE_INTERRUPT: | ||
| 708 | /* | ||
| 709 | * Stop tracee without any side-effect on signal or job | ||
| 710 | * control. At least one trap is guaranteed to happen | ||
| 711 | * after this request. If @child is already trapped, the | ||
| 712 | * current trap is not disturbed and another trap will | ||
| 713 | * happen after the current trap is ended with PTRACE_CONT. | ||
| 714 | * | ||
| 715 | * The actual trap might not be PTRACE_EVENT_STOP trap but | ||
| 716 | * the pending condition is cleared regardless. | ||
| 717 | */ | ||
| 718 | if (unlikely(!seized || !lock_task_sighand(child, &flags))) | ||
| 719 | break; | ||
| 720 | |||
| 721 | /* | ||
| 722 | * INTERRUPT doesn't disturb existing trap sans one | ||
| 723 | * exception. If ptracer issued LISTEN for the current | ||
| 724 | * STOP, this INTERRUPT should clear LISTEN and re-trap | ||
| 725 | * tracee into STOP. | ||
| 726 | */ | ||
| 727 | if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) | ||
| 728 | signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); | ||
| 729 | |||
| 730 | unlock_task_sighand(child, &flags); | ||
| 731 | ret = 0; | ||
| 732 | break; | ||
| 733 | |||
| 734 | case PTRACE_LISTEN: | ||
| 735 | /* | ||
| 736 | * Listen for events. Tracee must be in STOP. It's not | ||
| 737 | * resumed per-se but is not considered to be in TRACED by | ||
| 738 | * wait(2) or ptrace(2). If an async event (e.g. group | ||
| 739 | * stop state change) happens, tracee will enter STOP trap | ||
| 740 | * again. Alternatively, ptracer can issue INTERRUPT to | ||
| 741 | * finish listening and re-trap tracee into STOP. | ||
| 742 | */ | ||
| 743 | if (unlikely(!seized || !lock_task_sighand(child, &flags))) | ||
| 744 | break; | ||
| 745 | |||
| 746 | si = child->last_siginfo; | ||
| 747 | if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP)) | ||
| 748 | break; | ||
| 749 | |||
| 750 | child->jobctl |= JOBCTL_LISTENING; | ||
| 751 | |||
| 752 | /* | ||
| 753 | * If NOTIFY is set, it means event happened between start | ||
| 754 | * of this trap and now. Trigger re-trap immediately. | ||
| 755 | */ | ||
| 756 | if (child->jobctl & JOBCTL_TRAP_NOTIFY) | ||
| 757 | signal_wake_up(child, true); | ||
| 758 | |||
| 759 | unlock_task_sighand(child, &flags); | ||
| 760 | ret = 0; | ||
| 761 | break; | ||
| 762 | |||
| 650 | case PTRACE_DETACH: /* detach a process that was attached. */ | 763 | case PTRACE_DETACH: /* detach a process that was attached. */ |
| 651 | ret = ptrace_detach(child, data); | 764 | ret = ptrace_detach(child, data); |
| 652 | break; | 765 | break; |
| @@ -761,8 +874,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, | |||
| 761 | goto out; | 874 | goto out; |
| 762 | } | 875 | } |
| 763 | 876 | ||
| 764 | if (request == PTRACE_ATTACH) { | 877 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { |
| 765 | ret = ptrace_attach(child); | 878 | ret = ptrace_attach(child, request, data); |
| 766 | /* | 879 | /* |
| 767 | * Some architectures need to do book-keeping after | 880 | * Some architectures need to do book-keeping after |
| 768 | * a ptrace attach. | 881 | * a ptrace attach. |
| @@ -772,7 +885,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, | |||
| 772 | goto out_put_task_struct; | 885 | goto out_put_task_struct; |
| 773 | } | 886 | } |
| 774 | 887 | ||
| 775 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | 888 | ret = ptrace_check_attach(child, request == PTRACE_KILL || |
| 889 | request == PTRACE_INTERRUPT); | ||
| 776 | if (ret < 0) | 890 | if (ret < 0) |
| 777 | goto out_put_task_struct; | 891 | goto out_put_task_struct; |
| 778 | 892 | ||
| @@ -903,8 +1017,8 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
| 903 | goto out; | 1017 | goto out; |
| 904 | } | 1018 | } |
| 905 | 1019 | ||
| 906 | if (request == PTRACE_ATTACH) { | 1020 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { |
| 907 | ret = ptrace_attach(child); | 1021 | ret = ptrace_attach(child, request, data); |
| 908 | /* | 1022 | /* |
| 909 | * Some architectures need to do book-keeping after | 1023 | * Some architectures need to do book-keeping after |
| 910 | * a ptrace attach. | 1024 | * a ptrace attach. |
| @@ -914,7 +1028,8 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
| 914 | goto out_put_task_struct; | 1028 | goto out_put_task_struct; |
| 915 | } | 1029 | } |
| 916 | 1030 | ||
| 917 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | 1031 | ret = ptrace_check_attach(child, request == PTRACE_KILL || |
| 1032 | request == PTRACE_INTERRUPT); | ||
| 918 | if (!ret) | 1033 | if (!ret) |
| 919 | ret = compat_arch_ptrace(child, request, addr, data); | 1034 | ret = compat_arch_ptrace(child, request, addr, data); |
| 920 | 1035 | ||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 7784bd216b6a..ddddb320be61 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
| 38 | #include <linux/interrupt.h> | 38 | #include <linux/interrupt.h> |
| 39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
| 40 | #include <asm/atomic.h> | 40 | #include <linux/atomic.h> |
| 41 | #include <linux/bitops.h> | 41 | #include <linux/bitops.h> |
| 42 | #include <linux/percpu.h> | 42 | #include <linux/percpu.h> |
| 43 | #include <linux/notifier.h> | 43 | #include <linux/notifier.h> |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2e138db03382..98f51b13bb7e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -33,7 +33,7 @@ | |||
| 33 | #include <linux/rcupdate.h> | 33 | #include <linux/rcupdate.h> |
| 34 | #include <linux/interrupt.h> | 34 | #include <linux/interrupt.h> |
| 35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
| 36 | #include <asm/atomic.h> | 36 | #include <linux/atomic.h> |
| 37 | #include <linux/bitops.h> | 37 | #include <linux/bitops.h> |
| 38 | #include <linux/completion.h> | 38 | #include <linux/completion.h> |
| 39 | #include <linux/moduleparam.h> | 39 | #include <linux/moduleparam.h> |
| @@ -941,7 +941,6 @@ static void rcu_torture_timer(unsigned long unused) | |||
| 941 | idx = cur_ops->readlock(); | 941 | idx = cur_ops->readlock(); |
| 942 | completed = cur_ops->completed(); | 942 | completed = cur_ops->completed(); |
| 943 | p = rcu_dereference_check(rcu_torture_current, | 943 | p = rcu_dereference_check(rcu_torture_current, |
| 944 | rcu_read_lock_held() || | ||
| 945 | rcu_read_lock_bh_held() || | 944 | rcu_read_lock_bh_held() || |
| 946 | rcu_read_lock_sched_held() || | 945 | rcu_read_lock_sched_held() || |
| 947 | srcu_read_lock_held(&srcu_ctl)); | 946 | srcu_read_lock_held(&srcu_ctl)); |
| @@ -1002,7 +1001,6 @@ rcu_torture_reader(void *arg) | |||
| 1002 | idx = cur_ops->readlock(); | 1001 | idx = cur_ops->readlock(); |
| 1003 | completed = cur_ops->completed(); | 1002 | completed = cur_ops->completed(); |
| 1004 | p = rcu_dereference_check(rcu_torture_current, | 1003 | p = rcu_dereference_check(rcu_torture_current, |
| 1005 | rcu_read_lock_held() || | ||
| 1006 | rcu_read_lock_bh_held() || | 1004 | rcu_read_lock_bh_held() || |
| 1007 | rcu_read_lock_sched_held() || | 1005 | rcu_read_lock_sched_held() || |
| 1008 | srcu_read_lock_held(&srcu_ctl)); | 1006 | srcu_read_lock_held(&srcu_ctl)); |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89419ff92e99..ba06207b1dd3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -84,10 +84,35 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | |||
| 84 | 84 | ||
| 85 | static struct rcu_state *rcu_state; | 85 | static struct rcu_state *rcu_state; |
| 86 | 86 | ||
| 87 | /* | ||
| 88 | * The rcu_scheduler_active variable transitions from zero to one just | ||
| 89 | * before the first task is spawned. So when this variable is zero, RCU | ||
| 90 | * can assume that there is but one task, allowing RCU to (for example) | ||
| 91 | * optimized synchronize_sched() to a simple barrier(). When this variable | ||
| 92 | * is one, RCU must actually do all the hard work required to detect real | ||
| 93 | * grace periods. This variable is also used to suppress boot-time false | ||
| 94 | * positives from lockdep-RCU error checking. | ||
| 95 | */ | ||
| 87 | int rcu_scheduler_active __read_mostly; | 96 | int rcu_scheduler_active __read_mostly; |
| 88 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 97 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
| 89 | 98 | ||
| 90 | /* | 99 | /* |
| 100 | * The rcu_scheduler_fully_active variable transitions from zero to one | ||
| 101 | * during the early_initcall() processing, which is after the scheduler | ||
| 102 | * is capable of creating new tasks. So RCU processing (for example, | ||
| 103 | * creating tasks for RCU priority boosting) must be delayed until after | ||
| 104 | * rcu_scheduler_fully_active transitions from zero to one. We also | ||
| 105 | * currently delay invocation of any RCU callbacks until after this point. | ||
| 106 | * | ||
| 107 | * It might later prove better for people registering RCU callbacks during | ||
| 108 | * early boot to take responsibility for these callbacks, but one step at | ||
| 109 | * a time. | ||
| 110 | */ | ||
| 111 | static int rcu_scheduler_fully_active __read_mostly; | ||
| 112 | |||
| 113 | #ifdef CONFIG_RCU_BOOST | ||
| 114 | |||
| 115 | /* | ||
| 91 | * Control variables for per-CPU and per-rcu_node kthreads. These | 116 | * Control variables for per-CPU and per-rcu_node kthreads. These |
| 92 | * handle all flavors of RCU. | 117 | * handle all flavors of RCU. |
| 93 | */ | 118 | */ |
| @@ -96,10 +121,12 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |||
| 96 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | 121 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); |
| 97 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 122 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| 98 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | 123 | DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| 99 | static char rcu_kthreads_spawnable; | 124 | |
| 125 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 100 | 126 | ||
| 101 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 127 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
| 102 | static void invoke_rcu_cpu_kthread(void); | 128 | static void invoke_rcu_core(void); |
| 129 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | ||
| 103 | 130 | ||
| 104 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ | 131 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ |
| 105 | 132 | ||
| @@ -1088,14 +1115,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1088 | int need_report = 0; | 1115 | int need_report = 0; |
| 1089 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1116 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1090 | struct rcu_node *rnp; | 1117 | struct rcu_node *rnp; |
| 1091 | struct task_struct *t; | ||
| 1092 | 1118 | ||
| 1093 | /* Stop the CPU's kthread. */ | 1119 | rcu_stop_cpu_kthread(cpu); |
| 1094 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1095 | if (t != NULL) { | ||
| 1096 | per_cpu(rcu_cpu_kthread_task, cpu) = NULL; | ||
| 1097 | kthread_stop(t); | ||
| 1098 | } | ||
| 1099 | 1120 | ||
| 1100 | /* Exclude any attempts to start a new grace period. */ | 1121 | /* Exclude any attempts to start a new grace period. */ |
| 1101 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1122 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
| @@ -1231,7 +1252,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1231 | 1252 | ||
| 1232 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1253 | /* Re-raise the RCU softirq if there are callbacks remaining. */ |
| 1233 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1254 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
| 1234 | invoke_rcu_cpu_kthread(); | 1255 | invoke_rcu_core(); |
| 1235 | } | 1256 | } |
| 1236 | 1257 | ||
| 1237 | /* | 1258 | /* |
| @@ -1277,7 +1298,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 1277 | } | 1298 | } |
| 1278 | rcu_preempt_check_callbacks(cpu); | 1299 | rcu_preempt_check_callbacks(cpu); |
| 1279 | if (rcu_pending(cpu)) | 1300 | if (rcu_pending(cpu)) |
| 1280 | invoke_rcu_cpu_kthread(); | 1301 | invoke_rcu_core(); |
| 1281 | } | 1302 | } |
| 1282 | 1303 | ||
| 1283 | #ifdef CONFIG_SMP | 1304 | #ifdef CONFIG_SMP |
| @@ -1442,13 +1463,14 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1442 | } | 1463 | } |
| 1443 | 1464 | ||
| 1444 | /* If there are callbacks ready, invoke them. */ | 1465 | /* If there are callbacks ready, invoke them. */ |
| 1445 | rcu_do_batch(rsp, rdp); | 1466 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
| 1467 | invoke_rcu_callbacks(rsp, rdp); | ||
| 1446 | } | 1468 | } |
| 1447 | 1469 | ||
| 1448 | /* | 1470 | /* |
| 1449 | * Do softirq processing for the current CPU. | 1471 | * Do softirq processing for the current CPU. |
| 1450 | */ | 1472 | */ |
| 1451 | static void rcu_process_callbacks(void) | 1473 | static void rcu_process_callbacks(struct softirq_action *unused) |
| 1452 | { | 1474 | { |
| 1453 | __rcu_process_callbacks(&rcu_sched_state, | 1475 | __rcu_process_callbacks(&rcu_sched_state, |
| 1454 | &__get_cpu_var(rcu_sched_data)); | 1476 | &__get_cpu_var(rcu_sched_data)); |
| @@ -1465,342 +1487,22 @@ static void rcu_process_callbacks(void) | |||
| 1465 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | 1487 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task |
| 1466 | * cannot disappear out from under us. | 1488 | * cannot disappear out from under us. |
| 1467 | */ | 1489 | */ |
| 1468 | static void invoke_rcu_cpu_kthread(void) | 1490 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
| 1469 | { | ||
| 1470 | unsigned long flags; | ||
| 1471 | |||
| 1472 | local_irq_save(flags); | ||
| 1473 | __this_cpu_write(rcu_cpu_has_work, 1); | ||
| 1474 | if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { | ||
| 1475 | local_irq_restore(flags); | ||
| 1476 | return; | ||
| 1477 | } | ||
| 1478 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||
| 1479 | local_irq_restore(flags); | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | /* | ||
| 1483 | * Wake up the specified per-rcu_node-structure kthread. | ||
| 1484 | * Because the per-rcu_node kthreads are immortal, we don't need | ||
| 1485 | * to do anything to keep them alive. | ||
| 1486 | */ | ||
| 1487 | static void invoke_rcu_node_kthread(struct rcu_node *rnp) | ||
| 1488 | { | ||
| 1489 | struct task_struct *t; | ||
| 1490 | |||
| 1491 | t = rnp->node_kthread_task; | ||
| 1492 | if (t != NULL) | ||
| 1493 | wake_up_process(t); | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | /* | ||
| 1497 | * Set the specified CPU's kthread to run RT or not, as specified by | ||
| 1498 | * the to_rt argument. The CPU-hotplug locks are held, so the task | ||
| 1499 | * is not going away. | ||
| 1500 | */ | ||
| 1501 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | ||
| 1502 | { | ||
| 1503 | int policy; | ||
| 1504 | struct sched_param sp; | ||
| 1505 | struct task_struct *t; | ||
| 1506 | |||
| 1507 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1508 | if (t == NULL) | ||
| 1509 | return; | ||
| 1510 | if (to_rt) { | ||
| 1511 | policy = SCHED_FIFO; | ||
| 1512 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1513 | } else { | ||
| 1514 | policy = SCHED_NORMAL; | ||
| 1515 | sp.sched_priority = 0; | ||
| 1516 | } | ||
| 1517 | sched_setscheduler_nocheck(t, policy, &sp); | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | /* | ||
| 1521 | * Timer handler to initiate the waking up of per-CPU kthreads that | ||
| 1522 | * have yielded the CPU due to excess numbers of RCU callbacks. | ||
| 1523 | * We wake up the per-rcu_node kthread, which in turn will wake up | ||
| 1524 | * the booster kthread. | ||
| 1525 | */ | ||
| 1526 | static void rcu_cpu_kthread_timer(unsigned long arg) | ||
| 1527 | { | ||
| 1528 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); | ||
| 1529 | struct rcu_node *rnp = rdp->mynode; | ||
| 1530 | |||
| 1531 | atomic_or(rdp->grpmask, &rnp->wakemask); | ||
| 1532 | invoke_rcu_node_kthread(rnp); | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | /* | ||
| 1536 | * Drop to non-real-time priority and yield, but only after posting a | ||
| 1537 | * timer that will cause us to regain our real-time priority if we | ||
| 1538 | * remain preempted. Either way, we restore our real-time priority | ||
| 1539 | * before returning. | ||
| 1540 | */ | ||
| 1541 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | ||
| 1542 | { | ||
| 1543 | struct sched_param sp; | ||
| 1544 | struct timer_list yield_timer; | ||
| 1545 | |||
| 1546 | setup_timer_on_stack(&yield_timer, f, arg); | ||
| 1547 | mod_timer(&yield_timer, jiffies + 2); | ||
| 1548 | sp.sched_priority = 0; | ||
| 1549 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | ||
| 1550 | set_user_nice(current, 19); | ||
| 1551 | schedule(); | ||
| 1552 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1553 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | ||
| 1554 | del_timer(&yield_timer); | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | /* | ||
| 1558 | * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. | ||
| 1559 | * This can happen while the corresponding CPU is either coming online | ||
| 1560 | * or going offline. We cannot wait until the CPU is fully online | ||
| 1561 | * before starting the kthread, because the various notifier functions | ||
| 1562 | * can wait for RCU grace periods. So we park rcu_cpu_kthread() until | ||
| 1563 | * the corresponding CPU is online. | ||
| 1564 | * | ||
| 1565 | * Return 1 if the kthread needs to stop, 0 otherwise. | ||
| 1566 | * | ||
| 1567 | * Caller must disable bh. This function can momentarily enable it. | ||
| 1568 | */ | ||
| 1569 | static int rcu_cpu_kthread_should_stop(int cpu) | ||
| 1570 | { | ||
| 1571 | while (cpu_is_offline(cpu) || | ||
| 1572 | !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || | ||
| 1573 | smp_processor_id() != cpu) { | ||
| 1574 | if (kthread_should_stop()) | ||
| 1575 | return 1; | ||
| 1576 | per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | ||
| 1577 | per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); | ||
| 1578 | local_bh_enable(); | ||
| 1579 | schedule_timeout_uninterruptible(1); | ||
| 1580 | if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) | ||
| 1581 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
| 1582 | local_bh_disable(); | ||
| 1583 | } | ||
| 1584 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | ||
| 1585 | return 0; | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | /* | ||
| 1589 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | ||
| 1590 | * earlier RCU softirq. | ||
| 1591 | */ | ||
| 1592 | static int rcu_cpu_kthread(void *arg) | ||
| 1593 | { | ||
| 1594 | int cpu = (int)(long)arg; | ||
| 1595 | unsigned long flags; | ||
| 1596 | int spincnt = 0; | ||
| 1597 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); | ||
| 1598 | char work; | ||
| 1599 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | ||
| 1600 | |||
| 1601 | for (;;) { | ||
| 1602 | *statusp = RCU_KTHREAD_WAITING; | ||
| 1603 | rcu_wait(*workp != 0 || kthread_should_stop()); | ||
| 1604 | local_bh_disable(); | ||
| 1605 | if (rcu_cpu_kthread_should_stop(cpu)) { | ||
| 1606 | local_bh_enable(); | ||
| 1607 | break; | ||
| 1608 | } | ||
| 1609 | *statusp = RCU_KTHREAD_RUNNING; | ||
| 1610 | per_cpu(rcu_cpu_kthread_loops, cpu)++; | ||
| 1611 | local_irq_save(flags); | ||
| 1612 | work = *workp; | ||
| 1613 | *workp = 0; | ||
| 1614 | local_irq_restore(flags); | ||
| 1615 | if (work) | ||
| 1616 | rcu_process_callbacks(); | ||
| 1617 | local_bh_enable(); | ||
| 1618 | if (*workp != 0) | ||
| 1619 | spincnt++; | ||
| 1620 | else | ||
| 1621 | spincnt = 0; | ||
| 1622 | if (spincnt > 10) { | ||
| 1623 | *statusp = RCU_KTHREAD_YIELDING; | ||
| 1624 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | ||
| 1625 | spincnt = 0; | ||
| 1626 | } | ||
| 1627 | } | ||
| 1628 | *statusp = RCU_KTHREAD_STOPPED; | ||
| 1629 | return 0; | ||
| 1630 | } | ||
| 1631 | |||
| 1632 | /* | ||
| 1633 | * Spawn a per-CPU kthread, setting up affinity and priority. | ||
| 1634 | * Because the CPU hotplug lock is held, no other CPU will be attempting | ||
| 1635 | * to manipulate rcu_cpu_kthread_task. There might be another CPU | ||
| 1636 | * attempting to access it during boot, but the locking in kthread_bind() | ||
| 1637 | * will enforce sufficient ordering. | ||
| 1638 | */ | ||
| 1639 | static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | ||
| 1640 | { | ||
| 1641 | struct sched_param sp; | ||
| 1642 | struct task_struct *t; | ||
| 1643 | |||
| 1644 | if (!rcu_kthreads_spawnable || | ||
| 1645 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | ||
| 1646 | return 0; | ||
| 1647 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | ||
| 1648 | if (IS_ERR(t)) | ||
| 1649 | return PTR_ERR(t); | ||
| 1650 | kthread_bind(t, cpu); | ||
| 1651 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | ||
| 1652 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | ||
| 1653 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | ||
| 1654 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1655 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 1656 | return 0; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | /* | ||
| 1660 | * Per-rcu_node kthread, which is in charge of waking up the per-CPU | ||
| 1661 | * kthreads when needed. We ignore requests to wake up kthreads | ||
| 1662 | * for offline CPUs, which is OK because force_quiescent_state() | ||
| 1663 | * takes care of this case. | ||
| 1664 | */ | ||
| 1665 | static int rcu_node_kthread(void *arg) | ||
| 1666 | { | 1491 | { |
| 1667 | int cpu; | 1492 | if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) |
| 1668 | unsigned long flags; | ||
| 1669 | unsigned long mask; | ||
| 1670 | struct rcu_node *rnp = (struct rcu_node *)arg; | ||
| 1671 | struct sched_param sp; | ||
| 1672 | struct task_struct *t; | ||
| 1673 | |||
| 1674 | for (;;) { | ||
| 1675 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; | ||
| 1676 | rcu_wait(atomic_read(&rnp->wakemask) != 0); | ||
| 1677 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; | ||
| 1678 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 1679 | mask = atomic_xchg(&rnp->wakemask, 0); | ||
| 1680 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | ||
| 1681 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { | ||
| 1682 | if ((mask & 0x1) == 0) | ||
| 1683 | continue; | ||
| 1684 | preempt_disable(); | ||
| 1685 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1686 | if (!cpu_online(cpu) || t == NULL) { | ||
| 1687 | preempt_enable(); | ||
| 1688 | continue; | ||
| 1689 | } | ||
| 1690 | per_cpu(rcu_cpu_has_work, cpu) = 1; | ||
| 1691 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1692 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 1693 | preempt_enable(); | ||
| 1694 | } | ||
| 1695 | } | ||
| 1696 | /* NOTREACHED */ | ||
| 1697 | rnp->node_kthread_status = RCU_KTHREAD_STOPPED; | ||
| 1698 | return 0; | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | /* | ||
| 1702 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | ||
| 1703 | * served by the rcu_node in question. The CPU hotplug lock is still | ||
| 1704 | * held, so the value of rnp->qsmaskinit will be stable. | ||
| 1705 | * | ||
| 1706 | * We don't include outgoingcpu in the affinity set, use -1 if there is | ||
| 1707 | * no outgoing CPU. If there are no CPUs left in the affinity set, | ||
| 1708 | * this function allows the kthread to execute on any CPU. | ||
| 1709 | */ | ||
| 1710 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | ||
| 1711 | { | ||
| 1712 | cpumask_var_t cm; | ||
| 1713 | int cpu; | ||
| 1714 | unsigned long mask = rnp->qsmaskinit; | ||
| 1715 | |||
| 1716 | if (rnp->node_kthread_task == NULL) | ||
| 1717 | return; | 1493 | return; |
| 1718 | if (!alloc_cpumask_var(&cm, GFP_KERNEL)) | 1494 | if (likely(!rsp->boost)) { |
| 1495 | rcu_do_batch(rsp, rdp); | ||
| 1719 | return; | 1496 | return; |
| 1720 | cpumask_clear(cm); | ||
| 1721 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) | ||
| 1722 | if ((mask & 0x1) && cpu != outgoingcpu) | ||
| 1723 | cpumask_set_cpu(cpu, cm); | ||
| 1724 | if (cpumask_weight(cm) == 0) { | ||
| 1725 | cpumask_setall(cm); | ||
| 1726 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) | ||
| 1727 | cpumask_clear_cpu(cpu, cm); | ||
| 1728 | WARN_ON_ONCE(cpumask_weight(cm) == 0); | ||
| 1729 | } | 1497 | } |
| 1730 | set_cpus_allowed_ptr(rnp->node_kthread_task, cm); | 1498 | invoke_rcu_callbacks_kthread(); |
| 1731 | rcu_boost_kthread_setaffinity(rnp, cm); | ||
| 1732 | free_cpumask_var(cm); | ||
| 1733 | } | 1499 | } |
| 1734 | 1500 | ||
| 1735 | /* | 1501 | static void invoke_rcu_core(void) |
| 1736 | * Spawn a per-rcu_node kthread, setting priority and affinity. | ||
| 1737 | * Called during boot before online/offline can happen, or, if | ||
| 1738 | * during runtime, with the main CPU-hotplug locks held. So only | ||
| 1739 | * one of these can be executing at a time. | ||
| 1740 | */ | ||
| 1741 | static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | ||
| 1742 | struct rcu_node *rnp) | ||
| 1743 | { | 1502 | { |
| 1744 | unsigned long flags; | 1503 | raise_softirq(RCU_SOFTIRQ); |
| 1745 | int rnp_index = rnp - &rsp->node[0]; | ||
| 1746 | struct sched_param sp; | ||
| 1747 | struct task_struct *t; | ||
| 1748 | |||
| 1749 | if (!rcu_kthreads_spawnable || | ||
| 1750 | rnp->qsmaskinit == 0) | ||
| 1751 | return 0; | ||
| 1752 | if (rnp->node_kthread_task == NULL) { | ||
| 1753 | t = kthread_create(rcu_node_kthread, (void *)rnp, | ||
| 1754 | "rcun%d", rnp_index); | ||
| 1755 | if (IS_ERR(t)) | ||
| 1756 | return PTR_ERR(t); | ||
| 1757 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 1758 | rnp->node_kthread_task = t; | ||
| 1759 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 1760 | sp.sched_priority = 99; | ||
| 1761 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 1762 | } | ||
| 1763 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); | ||
| 1764 | } | 1504 | } |
| 1765 | 1505 | ||
| 1766 | static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); | ||
| 1767 | |||
| 1768 | /* | ||
| 1769 | * Spawn all kthreads -- called as soon as the scheduler is running. | ||
| 1770 | */ | ||
| 1771 | static int __init rcu_spawn_kthreads(void) | ||
| 1772 | { | ||
| 1773 | int cpu; | ||
| 1774 | struct rcu_node *rnp; | ||
| 1775 | struct task_struct *t; | ||
| 1776 | |||
| 1777 | rcu_kthreads_spawnable = 1; | ||
| 1778 | for_each_possible_cpu(cpu) { | ||
| 1779 | per_cpu(rcu_cpu_has_work, cpu) = 0; | ||
| 1780 | if (cpu_online(cpu)) { | ||
| 1781 | (void)rcu_spawn_one_cpu_kthread(cpu); | ||
| 1782 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1783 | if (t) | ||
| 1784 | wake_up_process(t); | ||
| 1785 | } | ||
| 1786 | } | ||
| 1787 | rnp = rcu_get_root(rcu_state); | ||
| 1788 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | ||
| 1789 | if (rnp->node_kthread_task) | ||
| 1790 | wake_up_process(rnp->node_kthread_task); | ||
| 1791 | if (NUM_RCU_NODES > 1) { | ||
| 1792 | rcu_for_each_leaf_node(rcu_state, rnp) { | ||
| 1793 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | ||
| 1794 | t = rnp->node_kthread_task; | ||
| 1795 | if (t) | ||
| 1796 | wake_up_process(t); | ||
| 1797 | rcu_wake_one_boost_kthread(rnp); | ||
| 1798 | } | ||
| 1799 | } | ||
| 1800 | return 0; | ||
| 1801 | } | ||
| 1802 | early_initcall(rcu_spawn_kthreads); | ||
| 1803 | |||
| 1804 | static void | 1506 | static void |
| 1805 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1507 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
| 1806 | struct rcu_state *rsp) | 1508 | struct rcu_state *rsp) |
| @@ -2207,44 +1909,6 @@ static void __cpuinit rcu_prepare_cpu(int cpu) | |||
| 2207 | rcu_preempt_init_percpu_data(cpu); | 1909 | rcu_preempt_init_percpu_data(cpu); |
| 2208 | } | 1910 | } |
| 2209 | 1911 | ||
| 2210 | static void __cpuinit rcu_prepare_kthreads(int cpu) | ||
| 2211 | { | ||
| 2212 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||
| 2213 | struct rcu_node *rnp = rdp->mynode; | ||
| 2214 | |||
| 2215 | /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ | ||
| 2216 | if (rcu_kthreads_spawnable) { | ||
| 2217 | (void)rcu_spawn_one_cpu_kthread(cpu); | ||
| 2218 | if (rnp->node_kthread_task == NULL) | ||
| 2219 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | ||
| 2220 | } | ||
| 2221 | } | ||
| 2222 | |||
| 2223 | /* | ||
| 2224 | * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, | ||
| 2225 | * but the RCU threads are woken on demand, and if demand is low this | ||
| 2226 | * could be a while triggering the hung task watchdog. | ||
| 2227 | * | ||
| 2228 | * In order to avoid this, poke all tasks once the CPU is fully | ||
| 2229 | * up and running. | ||
| 2230 | */ | ||
| 2231 | static void __cpuinit rcu_online_kthreads(int cpu) | ||
| 2232 | { | ||
| 2233 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||
| 2234 | struct rcu_node *rnp = rdp->mynode; | ||
| 2235 | struct task_struct *t; | ||
| 2236 | |||
| 2237 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 2238 | if (t) | ||
| 2239 | wake_up_process(t); | ||
| 2240 | |||
| 2241 | t = rnp->node_kthread_task; | ||
| 2242 | if (t) | ||
| 2243 | wake_up_process(t); | ||
| 2244 | |||
| 2245 | rcu_wake_one_boost_kthread(rnp); | ||
| 2246 | } | ||
| 2247 | |||
| 2248 | /* | 1912 | /* |
| 2249 | * Handle CPU online/offline notification events. | 1913 | * Handle CPU online/offline notification events. |
| 2250 | */ | 1914 | */ |
| @@ -2262,7 +1926,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 2262 | rcu_prepare_kthreads(cpu); | 1926 | rcu_prepare_kthreads(cpu); |
| 2263 | break; | 1927 | break; |
| 2264 | case CPU_ONLINE: | 1928 | case CPU_ONLINE: |
| 2265 | rcu_online_kthreads(cpu); | ||
| 2266 | case CPU_DOWN_FAILED: | 1929 | case CPU_DOWN_FAILED: |
| 2267 | rcu_node_kthread_setaffinity(rnp, -1); | 1930 | rcu_node_kthread_setaffinity(rnp, -1); |
| 2268 | rcu_cpu_kthread_setrt(cpu, 1); | 1931 | rcu_cpu_kthread_setrt(cpu, 1); |
| @@ -2410,6 +2073,7 @@ void __init rcu_init(void) | |||
| 2410 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2073 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
| 2411 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2074 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
| 2412 | __rcu_init_preempt(); | 2075 | __rcu_init_preempt(); |
| 2076 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
| 2413 | 2077 | ||
| 2414 | /* | 2078 | /* |
| 2415 | * We don't need protection against CPU-hotplug here because | 2079 | * We don't need protection against CPU-hotplug here because |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7b9a08b4aaea..01b2ccda26fb 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -369,6 +369,7 @@ struct rcu_state { | |||
| 369 | /* period because */ | 369 | /* period because */ |
| 370 | /* force_quiescent_state() */ | 370 | /* force_quiescent_state() */ |
| 371 | /* was running. */ | 371 | /* was running. */ |
| 372 | u8 boost; /* Subject to priority boost. */ | ||
| 372 | unsigned long gpnum; /* Current gp number. */ | 373 | unsigned long gpnum; /* Current gp number. */ |
| 373 | unsigned long completed; /* # of last completed gp. */ | 374 | unsigned long completed; /* # of last completed gp. */ |
| 374 | 375 | ||
| @@ -426,6 +427,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | |||
| 426 | #ifdef CONFIG_HOTPLUG_CPU | 427 | #ifdef CONFIG_HOTPLUG_CPU |
| 427 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 428 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, |
| 428 | unsigned long flags); | 429 | unsigned long flags); |
| 430 | static void rcu_stop_cpu_kthread(int cpu); | ||
| 429 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 431 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 430 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 432 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
| 431 | static void rcu_print_task_stall(struct rcu_node *rnp); | 433 | static void rcu_print_task_stall(struct rcu_node *rnp); |
| @@ -450,11 +452,19 @@ static void rcu_preempt_send_cbs_to_online(void); | |||
| 450 | static void __init __rcu_init_preempt(void); | 452 | static void __init __rcu_init_preempt(void); |
| 451 | static void rcu_needs_cpu_flush(void); | 453 | static void rcu_needs_cpu_flush(void); |
| 452 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 454 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
| 455 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | ||
| 456 | static void invoke_rcu_callbacks_kthread(void); | ||
| 457 | #ifdef CONFIG_RCU_BOOST | ||
| 458 | static void rcu_preempt_do_callbacks(void); | ||
| 453 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 459 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
| 454 | cpumask_var_t cm); | 460 | cpumask_var_t cm); |
| 455 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | ||
| 456 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 461 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| 457 | struct rcu_node *rnp, | 462 | struct rcu_node *rnp, |
| 458 | int rnp_index); | 463 | int rnp_index); |
| 464 | static void invoke_rcu_node_kthread(struct rcu_node *rnp); | ||
| 465 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg); | ||
| 466 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 467 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); | ||
| 468 | static void __cpuinit rcu_prepare_kthreads(int cpu); | ||
| 459 | 469 | ||
| 460 | #endif /* #ifndef RCU_TREE_NONCORE */ | 470 | #endif /* #ifndef RCU_TREE_NONCORE */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c8bff3099a89..8aafbb80b8b0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -68,6 +68,7 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | |||
| 68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
| 69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 69 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
| 70 | 70 | ||
| 71 | static void rcu_read_unlock_special(struct task_struct *t); | ||
| 71 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 72 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
| 72 | 73 | ||
| 73 | /* | 74 | /* |
| @@ -147,7 +148,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 147 | struct rcu_data *rdp; | 148 | struct rcu_data *rdp; |
| 148 | struct rcu_node *rnp; | 149 | struct rcu_node *rnp; |
| 149 | 150 | ||
| 150 | if (t->rcu_read_lock_nesting && | 151 | if (t->rcu_read_lock_nesting > 0 && |
| 151 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 152 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
| 152 | 153 | ||
| 153 | /* Possibly blocking in an RCU read-side critical section. */ | 154 | /* Possibly blocking in an RCU read-side critical section. */ |
| @@ -190,6 +191,14 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 190 | rnp->gp_tasks = &t->rcu_node_entry; | 191 | rnp->gp_tasks = &t->rcu_node_entry; |
| 191 | } | 192 | } |
| 192 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 193 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 194 | } else if (t->rcu_read_lock_nesting < 0 && | ||
| 195 | t->rcu_read_unlock_special) { | ||
| 196 | |||
| 197 | /* | ||
| 198 | * Complete exit from RCU read-side critical section on | ||
| 199 | * behalf of preempted instance of __rcu_read_unlock(). | ||
| 200 | */ | ||
| 201 | rcu_read_unlock_special(t); | ||
| 193 | } | 202 | } |
| 194 | 203 | ||
| 195 | /* | 204 | /* |
| @@ -284,7 +293,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, | |||
| 284 | * notify RCU core processing or task having blocked during the RCU | 293 | * notify RCU core processing or task having blocked during the RCU |
| 285 | * read-side critical section. | 294 | * read-side critical section. |
| 286 | */ | 295 | */ |
| 287 | static void rcu_read_unlock_special(struct task_struct *t) | 296 | static noinline void rcu_read_unlock_special(struct task_struct *t) |
| 288 | { | 297 | { |
| 289 | int empty; | 298 | int empty; |
| 290 | int empty_exp; | 299 | int empty_exp; |
| @@ -309,7 +318,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 309 | } | 318 | } |
| 310 | 319 | ||
| 311 | /* Hardware IRQ handlers cannot block. */ | 320 | /* Hardware IRQ handlers cannot block. */ |
| 312 | if (in_irq()) { | 321 | if (in_irq() || in_serving_softirq()) { |
| 313 | local_irq_restore(flags); | 322 | local_irq_restore(flags); |
| 314 | return; | 323 | return; |
| 315 | } | 324 | } |
| @@ -342,6 +351,11 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 342 | #ifdef CONFIG_RCU_BOOST | 351 | #ifdef CONFIG_RCU_BOOST |
| 343 | if (&t->rcu_node_entry == rnp->boost_tasks) | 352 | if (&t->rcu_node_entry == rnp->boost_tasks) |
| 344 | rnp->boost_tasks = np; | 353 | rnp->boost_tasks = np; |
| 354 | /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | ||
| 355 | if (t->rcu_boosted) { | ||
| 356 | special |= RCU_READ_UNLOCK_BOOSTED; | ||
| 357 | t->rcu_boosted = 0; | ||
| 358 | } | ||
| 345 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 359 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 346 | t->rcu_blocked_node = NULL; | 360 | t->rcu_blocked_node = NULL; |
| 347 | 361 | ||
| @@ -358,7 +372,6 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 358 | #ifdef CONFIG_RCU_BOOST | 372 | #ifdef CONFIG_RCU_BOOST |
| 359 | /* Unboost if we were boosted. */ | 373 | /* Unboost if we were boosted. */ |
| 360 | if (special & RCU_READ_UNLOCK_BOOSTED) { | 374 | if (special & RCU_READ_UNLOCK_BOOSTED) { |
| 361 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
| 362 | rt_mutex_unlock(t->rcu_boost_mutex); | 375 | rt_mutex_unlock(t->rcu_boost_mutex); |
| 363 | t->rcu_boost_mutex = NULL; | 376 | t->rcu_boost_mutex = NULL; |
| 364 | } | 377 | } |
| @@ -387,13 +400,22 @@ void __rcu_read_unlock(void) | |||
| 387 | struct task_struct *t = current; | 400 | struct task_struct *t = current; |
| 388 | 401 | ||
| 389 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | 402 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
| 390 | --t->rcu_read_lock_nesting; | 403 | if (t->rcu_read_lock_nesting != 1) |
| 391 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | 404 | --t->rcu_read_lock_nesting; |
| 392 | if (t->rcu_read_lock_nesting == 0 && | 405 | else { |
| 393 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 406 | t->rcu_read_lock_nesting = INT_MIN; |
| 394 | rcu_read_unlock_special(t); | 407 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
| 408 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
| 409 | rcu_read_unlock_special(t); | ||
| 410 | barrier(); /* ->rcu_read_unlock_special load before assign */ | ||
| 411 | t->rcu_read_lock_nesting = 0; | ||
| 412 | } | ||
| 395 | #ifdef CONFIG_PROVE_LOCKING | 413 | #ifdef CONFIG_PROVE_LOCKING |
| 396 | WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); | 414 | { |
| 415 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
| 416 | |||
| 417 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | ||
| 418 | } | ||
| 397 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | 419 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ |
| 398 | } | 420 | } |
| 399 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 421 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); |
| @@ -589,7 +611,8 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
| 589 | rcu_preempt_qs(cpu); | 611 | rcu_preempt_qs(cpu); |
| 590 | return; | 612 | return; |
| 591 | } | 613 | } |
| 592 | if (per_cpu(rcu_preempt_data, cpu).qs_pending) | 614 | if (t->rcu_read_lock_nesting > 0 && |
| 615 | per_cpu(rcu_preempt_data, cpu).qs_pending) | ||
| 593 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | 616 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; |
| 594 | } | 617 | } |
| 595 | 618 | ||
| @@ -602,6 +625,15 @@ static void rcu_preempt_process_callbacks(void) | |||
| 602 | &__get_cpu_var(rcu_preempt_data)); | 625 | &__get_cpu_var(rcu_preempt_data)); |
| 603 | } | 626 | } |
| 604 | 627 | ||
| 628 | #ifdef CONFIG_RCU_BOOST | ||
| 629 | |||
| 630 | static void rcu_preempt_do_callbacks(void) | ||
| 631 | { | ||
| 632 | rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); | ||
| 633 | } | ||
| 634 | |||
| 635 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 636 | |||
| 605 | /* | 637 | /* |
| 606 | * Queue a preemptible-RCU callback for invocation after a grace period. | 638 | * Queue a preemptible-RCU callback for invocation after a grace period. |
| 607 | */ | 639 | */ |
| @@ -686,9 +718,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
| 686 | 718 | ||
| 687 | raw_spin_lock_irqsave(&rnp->lock, flags); | 719 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 688 | for (;;) { | 720 | for (;;) { |
| 689 | if (!sync_rcu_preempt_exp_done(rnp)) | 721 | if (!sync_rcu_preempt_exp_done(rnp)) { |
| 722 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 690 | break; | 723 | break; |
| 724 | } | ||
| 691 | if (rnp->parent == NULL) { | 725 | if (rnp->parent == NULL) { |
| 726 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 692 | wake_up(&sync_rcu_preempt_exp_wq); | 727 | wake_up(&sync_rcu_preempt_exp_wq); |
| 693 | break; | 728 | break; |
| 694 | } | 729 | } |
| @@ -698,7 +733,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
| 698 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 733 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
| 699 | rnp->expmask &= ~mask; | 734 | rnp->expmask &= ~mask; |
| 700 | } | 735 | } |
| 701 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 702 | } | 736 | } |
| 703 | 737 | ||
| 704 | /* | 738 | /* |
| @@ -1165,7 +1199,7 @@ static int rcu_boost(struct rcu_node *rnp) | |||
| 1165 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1199 | t = container_of(tb, struct task_struct, rcu_node_entry); |
| 1166 | rt_mutex_init_proxy_locked(&mtx, t); | 1200 | rt_mutex_init_proxy_locked(&mtx, t); |
| 1167 | t->rcu_boost_mutex = &mtx; | 1201 | t->rcu_boost_mutex = &mtx; |
| 1168 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | 1202 | t->rcu_boosted = 1; |
| 1169 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1170 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1204 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
| 1171 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1205 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
| @@ -1249,6 +1283,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |||
| 1249 | } | 1283 | } |
| 1250 | 1284 | ||
| 1251 | /* | 1285 | /* |
| 1286 | * Wake up the per-CPU kthread to invoke RCU callbacks. | ||
| 1287 | */ | ||
| 1288 | static void invoke_rcu_callbacks_kthread(void) | ||
| 1289 | { | ||
| 1290 | unsigned long flags; | ||
| 1291 | |||
| 1292 | local_irq_save(flags); | ||
| 1293 | __this_cpu_write(rcu_cpu_has_work, 1); | ||
| 1294 | if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { | ||
| 1295 | local_irq_restore(flags); | ||
| 1296 | return; | ||
| 1297 | } | ||
| 1298 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||
| 1299 | local_irq_restore(flags); | ||
| 1300 | } | ||
| 1301 | |||
| 1302 | /* | ||
| 1252 | * Set the affinity of the boost kthread. The CPU-hotplug locks are | 1303 | * Set the affinity of the boost kthread. The CPU-hotplug locks are |
| 1253 | * held, so no one should be messing with the existence of the boost | 1304 | * held, so no one should be messing with the existence of the boost |
| 1254 | * kthread. | 1305 | * kthread. |
| @@ -1288,6 +1339,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
| 1288 | 1339 | ||
| 1289 | if (&rcu_preempt_state != rsp) | 1340 | if (&rcu_preempt_state != rsp) |
| 1290 | return 0; | 1341 | return 0; |
| 1342 | rsp->boost = 1; | ||
| 1291 | if (rnp->boost_kthread_task != NULL) | 1343 | if (rnp->boost_kthread_task != NULL) |
| 1292 | return 0; | 1344 | return 0; |
| 1293 | t = kthread_create(rcu_boost_kthread, (void *)rnp, | 1345 | t = kthread_create(rcu_boost_kthread, (void *)rnp, |
| @@ -1299,13 +1351,372 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
| 1299 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1351 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1300 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1352 | sp.sched_priority = RCU_KTHREAD_PRIO; |
| 1301 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1353 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
| 1354 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | ||
| 1302 | return 0; | 1355 | return 0; |
| 1303 | } | 1356 | } |
| 1304 | 1357 | ||
| 1305 | static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) | 1358 | #ifdef CONFIG_HOTPLUG_CPU |
| 1359 | |||
| 1360 | /* | ||
| 1361 | * Stop the RCU's per-CPU kthread when its CPU goes offline,. | ||
| 1362 | */ | ||
| 1363 | static void rcu_stop_cpu_kthread(int cpu) | ||
| 1364 | { | ||
| 1365 | struct task_struct *t; | ||
| 1366 | |||
| 1367 | /* Stop the CPU's kthread. */ | ||
| 1368 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1369 | if (t != NULL) { | ||
| 1370 | per_cpu(rcu_cpu_kthread_task, cpu) = NULL; | ||
| 1371 | kthread_stop(t); | ||
| 1372 | } | ||
| 1373 | } | ||
| 1374 | |||
| 1375 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1376 | |||
| 1377 | static void rcu_kthread_do_work(void) | ||
| 1306 | { | 1378 | { |
| 1307 | if (rnp->boost_kthread_task) | 1379 | rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); |
| 1308 | wake_up_process(rnp->boost_kthread_task); | 1380 | rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
| 1381 | rcu_preempt_do_callbacks(); | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | /* | ||
| 1385 | * Wake up the specified per-rcu_node-structure kthread. | ||
| 1386 | * Because the per-rcu_node kthreads are immortal, we don't need | ||
| 1387 | * to do anything to keep them alive. | ||
| 1388 | */ | ||
| 1389 | static void invoke_rcu_node_kthread(struct rcu_node *rnp) | ||
| 1390 | { | ||
| 1391 | struct task_struct *t; | ||
| 1392 | |||
| 1393 | t = rnp->node_kthread_task; | ||
| 1394 | if (t != NULL) | ||
| 1395 | wake_up_process(t); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | /* | ||
| 1399 | * Set the specified CPU's kthread to run RT or not, as specified by | ||
| 1400 | * the to_rt argument. The CPU-hotplug locks are held, so the task | ||
| 1401 | * is not going away. | ||
| 1402 | */ | ||
| 1403 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | ||
| 1404 | { | ||
| 1405 | int policy; | ||
| 1406 | struct sched_param sp; | ||
| 1407 | struct task_struct *t; | ||
| 1408 | |||
| 1409 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1410 | if (t == NULL) | ||
| 1411 | return; | ||
| 1412 | if (to_rt) { | ||
| 1413 | policy = SCHED_FIFO; | ||
| 1414 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1415 | } else { | ||
| 1416 | policy = SCHED_NORMAL; | ||
| 1417 | sp.sched_priority = 0; | ||
| 1418 | } | ||
| 1419 | sched_setscheduler_nocheck(t, policy, &sp); | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | /* | ||
| 1423 | * Timer handler to initiate the waking up of per-CPU kthreads that | ||
| 1424 | * have yielded the CPU due to excess numbers of RCU callbacks. | ||
| 1425 | * We wake up the per-rcu_node kthread, which in turn will wake up | ||
| 1426 | * the booster kthread. | ||
| 1427 | */ | ||
| 1428 | static void rcu_cpu_kthread_timer(unsigned long arg) | ||
| 1429 | { | ||
| 1430 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); | ||
| 1431 | struct rcu_node *rnp = rdp->mynode; | ||
| 1432 | |||
| 1433 | atomic_or(rdp->grpmask, &rnp->wakemask); | ||
| 1434 | invoke_rcu_node_kthread(rnp); | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | /* | ||
| 1438 | * Drop to non-real-time priority and yield, but only after posting a | ||
| 1439 | * timer that will cause us to regain our real-time priority if we | ||
| 1440 | * remain preempted. Either way, we restore our real-time priority | ||
| 1441 | * before returning. | ||
| 1442 | */ | ||
| 1443 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | ||
| 1444 | { | ||
| 1445 | struct sched_param sp; | ||
| 1446 | struct timer_list yield_timer; | ||
| 1447 | |||
| 1448 | setup_timer_on_stack(&yield_timer, f, arg); | ||
| 1449 | mod_timer(&yield_timer, jiffies + 2); | ||
| 1450 | sp.sched_priority = 0; | ||
| 1451 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | ||
| 1452 | set_user_nice(current, 19); | ||
| 1453 | schedule(); | ||
| 1454 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1455 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | ||
| 1456 | del_timer(&yield_timer); | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /* | ||
| 1460 | * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. | ||
| 1461 | * This can happen while the corresponding CPU is either coming online | ||
| 1462 | * or going offline. We cannot wait until the CPU is fully online | ||
| 1463 | * before starting the kthread, because the various notifier functions | ||
| 1464 | * can wait for RCU grace periods. So we park rcu_cpu_kthread() until | ||
| 1465 | * the corresponding CPU is online. | ||
| 1466 | * | ||
| 1467 | * Return 1 if the kthread needs to stop, 0 otherwise. | ||
| 1468 | * | ||
| 1469 | * Caller must disable bh. This function can momentarily enable it. | ||
| 1470 | */ | ||
| 1471 | static int rcu_cpu_kthread_should_stop(int cpu) | ||
| 1472 | { | ||
| 1473 | while (cpu_is_offline(cpu) || | ||
| 1474 | !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || | ||
| 1475 | smp_processor_id() != cpu) { | ||
| 1476 | if (kthread_should_stop()) | ||
| 1477 | return 1; | ||
| 1478 | per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | ||
| 1479 | per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); | ||
| 1480 | local_bh_enable(); | ||
| 1481 | schedule_timeout_uninterruptible(1); | ||
| 1482 | if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) | ||
| 1483 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
| 1484 | local_bh_disable(); | ||
| 1485 | } | ||
| 1486 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | ||
| 1487 | return 0; | ||
| 1488 | } | ||
| 1489 | |||
| 1490 | /* | ||
| 1491 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | ||
| 1492 | * earlier RCU softirq. | ||
| 1493 | */ | ||
| 1494 | static int rcu_cpu_kthread(void *arg) | ||
| 1495 | { | ||
| 1496 | int cpu = (int)(long)arg; | ||
| 1497 | unsigned long flags; | ||
| 1498 | int spincnt = 0; | ||
| 1499 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); | ||
| 1500 | char work; | ||
| 1501 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | ||
| 1502 | |||
| 1503 | for (;;) { | ||
| 1504 | *statusp = RCU_KTHREAD_WAITING; | ||
| 1505 | rcu_wait(*workp != 0 || kthread_should_stop()); | ||
| 1506 | local_bh_disable(); | ||
| 1507 | if (rcu_cpu_kthread_should_stop(cpu)) { | ||
| 1508 | local_bh_enable(); | ||
| 1509 | break; | ||
| 1510 | } | ||
| 1511 | *statusp = RCU_KTHREAD_RUNNING; | ||
| 1512 | per_cpu(rcu_cpu_kthread_loops, cpu)++; | ||
| 1513 | local_irq_save(flags); | ||
| 1514 | work = *workp; | ||
| 1515 | *workp = 0; | ||
| 1516 | local_irq_restore(flags); | ||
| 1517 | if (work) | ||
| 1518 | rcu_kthread_do_work(); | ||
| 1519 | local_bh_enable(); | ||
| 1520 | if (*workp != 0) | ||
| 1521 | spincnt++; | ||
| 1522 | else | ||
| 1523 | spincnt = 0; | ||
| 1524 | if (spincnt > 10) { | ||
| 1525 | *statusp = RCU_KTHREAD_YIELDING; | ||
| 1526 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | ||
| 1527 | spincnt = 0; | ||
| 1528 | } | ||
| 1529 | } | ||
| 1530 | *statusp = RCU_KTHREAD_STOPPED; | ||
| 1531 | return 0; | ||
| 1532 | } | ||
| 1533 | |||
| 1534 | /* | ||
| 1535 | * Spawn a per-CPU kthread, setting up affinity and priority. | ||
| 1536 | * Because the CPU hotplug lock is held, no other CPU will be attempting | ||
| 1537 | * to manipulate rcu_cpu_kthread_task. There might be another CPU | ||
| 1538 | * attempting to access it during boot, but the locking in kthread_bind() | ||
| 1539 | * will enforce sufficient ordering. | ||
| 1540 | * | ||
| 1541 | * Please note that we cannot simply refuse to wake up the per-CPU | ||
| 1542 | * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, | ||
| 1543 | * which can result in softlockup complaints if the task ends up being | ||
| 1544 | * idle for more than a couple of minutes. | ||
| 1545 | * | ||
| 1546 | * However, please note also that we cannot bind the per-CPU kthread to its | ||
| 1547 | * CPU until that CPU is fully online. We also cannot wait until the | ||
| 1548 | * CPU is fully online before we create its per-CPU kthread, as this would | ||
| 1549 | * deadlock the system when CPU notifiers tried waiting for grace | ||
| 1550 | * periods. So we bind the per-CPU kthread to its CPU only if the CPU | ||
| 1551 | * is online. If its CPU is not yet fully online, then the code in | ||
| 1552 | * rcu_cpu_kthread() will wait until it is fully online, and then do | ||
| 1553 | * the binding. | ||
| 1554 | */ | ||
| 1555 | static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | ||
| 1556 | { | ||
| 1557 | struct sched_param sp; | ||
| 1558 | struct task_struct *t; | ||
| 1559 | |||
| 1560 | if (!rcu_scheduler_fully_active || | ||
| 1561 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | ||
| 1562 | return 0; | ||
| 1563 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | ||
| 1564 | if (IS_ERR(t)) | ||
| 1565 | return PTR_ERR(t); | ||
| 1566 | if (cpu_online(cpu)) | ||
| 1567 | kthread_bind(t, cpu); | ||
| 1568 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | ||
| 1569 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | ||
| 1570 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1571 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 1572 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | ||
| 1573 | wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ | ||
| 1574 | return 0; | ||
| 1575 | } | ||
| 1576 | |||
| 1577 | /* | ||
| 1578 | * Per-rcu_node kthread, which is in charge of waking up the per-CPU | ||
| 1579 | * kthreads when needed. We ignore requests to wake up kthreads | ||
| 1580 | * for offline CPUs, which is OK because force_quiescent_state() | ||
| 1581 | * takes care of this case. | ||
| 1582 | */ | ||
| 1583 | static int rcu_node_kthread(void *arg) | ||
| 1584 | { | ||
| 1585 | int cpu; | ||
| 1586 | unsigned long flags; | ||
| 1587 | unsigned long mask; | ||
| 1588 | struct rcu_node *rnp = (struct rcu_node *)arg; | ||
| 1589 | struct sched_param sp; | ||
| 1590 | struct task_struct *t; | ||
| 1591 | |||
| 1592 | for (;;) { | ||
| 1593 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; | ||
| 1594 | rcu_wait(atomic_read(&rnp->wakemask) != 0); | ||
| 1595 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; | ||
| 1596 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 1597 | mask = atomic_xchg(&rnp->wakemask, 0); | ||
| 1598 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | ||
| 1599 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { | ||
| 1600 | if ((mask & 0x1) == 0) | ||
| 1601 | continue; | ||
| 1602 | preempt_disable(); | ||
| 1603 | t = per_cpu(rcu_cpu_kthread_task, cpu); | ||
| 1604 | if (!cpu_online(cpu) || t == NULL) { | ||
| 1605 | preempt_enable(); | ||
| 1606 | continue; | ||
| 1607 | } | ||
| 1608 | per_cpu(rcu_cpu_has_work, cpu) = 1; | ||
| 1609 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
| 1610 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 1611 | preempt_enable(); | ||
| 1612 | } | ||
| 1613 | } | ||
| 1614 | /* NOTREACHED */ | ||
| 1615 | rnp->node_kthread_status = RCU_KTHREAD_STOPPED; | ||
| 1616 | return 0; | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | /* | ||
| 1620 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | ||
| 1621 | * served by the rcu_node in question. The CPU hotplug lock is still | ||
| 1622 | * held, so the value of rnp->qsmaskinit will be stable. | ||
| 1623 | * | ||
| 1624 | * We don't include outgoingcpu in the affinity set, use -1 if there is | ||
| 1625 | * no outgoing CPU. If there are no CPUs left in the affinity set, | ||
| 1626 | * this function allows the kthread to execute on any CPU. | ||
| 1627 | */ | ||
| 1628 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | ||
| 1629 | { | ||
| 1630 | cpumask_var_t cm; | ||
| 1631 | int cpu; | ||
| 1632 | unsigned long mask = rnp->qsmaskinit; | ||
| 1633 | |||
| 1634 | if (rnp->node_kthread_task == NULL) | ||
| 1635 | return; | ||
| 1636 | if (!alloc_cpumask_var(&cm, GFP_KERNEL)) | ||
| 1637 | return; | ||
| 1638 | cpumask_clear(cm); | ||
| 1639 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) | ||
| 1640 | if ((mask & 0x1) && cpu != outgoingcpu) | ||
| 1641 | cpumask_set_cpu(cpu, cm); | ||
| 1642 | if (cpumask_weight(cm) == 0) { | ||
| 1643 | cpumask_setall(cm); | ||
| 1644 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) | ||
| 1645 | cpumask_clear_cpu(cpu, cm); | ||
| 1646 | WARN_ON_ONCE(cpumask_weight(cm) == 0); | ||
| 1647 | } | ||
| 1648 | set_cpus_allowed_ptr(rnp->node_kthread_task, cm); | ||
| 1649 | rcu_boost_kthread_setaffinity(rnp, cm); | ||
| 1650 | free_cpumask_var(cm); | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | /* | ||
| 1654 | * Spawn a per-rcu_node kthread, setting priority and affinity. | ||
| 1655 | * Called during boot before online/offline can happen, or, if | ||
| 1656 | * during runtime, with the main CPU-hotplug locks held. So only | ||
| 1657 | * one of these can be executing at a time. | ||
| 1658 | */ | ||
| 1659 | static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | ||
| 1660 | struct rcu_node *rnp) | ||
| 1661 | { | ||
| 1662 | unsigned long flags; | ||
| 1663 | int rnp_index = rnp - &rsp->node[0]; | ||
| 1664 | struct sched_param sp; | ||
| 1665 | struct task_struct *t; | ||
| 1666 | |||
| 1667 | if (!rcu_scheduler_fully_active || | ||
| 1668 | rnp->qsmaskinit == 0) | ||
| 1669 | return 0; | ||
| 1670 | if (rnp->node_kthread_task == NULL) { | ||
| 1671 | t = kthread_create(rcu_node_kthread, (void *)rnp, | ||
| 1672 | "rcun%d", rnp_index); | ||
| 1673 | if (IS_ERR(t)) | ||
| 1674 | return PTR_ERR(t); | ||
| 1675 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 1676 | rnp->node_kthread_task = t; | ||
| 1677 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 1678 | sp.sched_priority = 99; | ||
| 1679 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
| 1680 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | ||
| 1681 | } | ||
| 1682 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); | ||
| 1683 | } | ||
| 1684 | |||
| 1685 | /* | ||
| 1686 | * Spawn all kthreads -- called as soon as the scheduler is running. | ||
| 1687 | */ | ||
| 1688 | static int __init rcu_spawn_kthreads(void) | ||
| 1689 | { | ||
| 1690 | int cpu; | ||
| 1691 | struct rcu_node *rnp; | ||
| 1692 | |||
| 1693 | rcu_scheduler_fully_active = 1; | ||
| 1694 | for_each_possible_cpu(cpu) { | ||
| 1695 | per_cpu(rcu_cpu_has_work, cpu) = 0; | ||
| 1696 | if (cpu_online(cpu)) | ||
| 1697 | (void)rcu_spawn_one_cpu_kthread(cpu); | ||
| 1698 | } | ||
| 1699 | rnp = rcu_get_root(rcu_state); | ||
| 1700 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | ||
| 1701 | if (NUM_RCU_NODES > 1) { | ||
| 1702 | rcu_for_each_leaf_node(rcu_state, rnp) | ||
| 1703 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | ||
| 1704 | } | ||
| 1705 | return 0; | ||
| 1706 | } | ||
| 1707 | early_initcall(rcu_spawn_kthreads); | ||
| 1708 | |||
| 1709 | static void __cpuinit rcu_prepare_kthreads(int cpu) | ||
| 1710 | { | ||
| 1711 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||
| 1712 | struct rcu_node *rnp = rdp->mynode; | ||
| 1713 | |||
| 1714 | /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ | ||
| 1715 | if (rcu_scheduler_fully_active) { | ||
| 1716 | (void)rcu_spawn_one_cpu_kthread(cpu); | ||
| 1717 | if (rnp->node_kthread_task == NULL) | ||
| 1718 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | ||
| 1719 | } | ||
| 1309 | } | 1720 | } |
| 1310 | 1721 | ||
| 1311 | #else /* #ifdef CONFIG_RCU_BOOST */ | 1722 | #else /* #ifdef CONFIG_RCU_BOOST */ |
| @@ -1315,23 +1726,39 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |||
| 1315 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1726 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1316 | } | 1727 | } |
| 1317 | 1728 | ||
| 1318 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 1729 | static void invoke_rcu_callbacks_kthread(void) |
| 1319 | cpumask_var_t cm) | ||
| 1320 | { | 1730 | { |
| 1731 | WARN_ON_ONCE(1); | ||
| 1321 | } | 1732 | } |
| 1322 | 1733 | ||
| 1323 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | 1734 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) |
| 1324 | { | 1735 | { |
| 1325 | } | 1736 | } |
| 1326 | 1737 | ||
| 1327 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1738 | #ifdef CONFIG_HOTPLUG_CPU |
| 1328 | struct rcu_node *rnp, | 1739 | |
| 1329 | int rnp_index) | 1740 | static void rcu_stop_cpu_kthread(int cpu) |
| 1741 | { | ||
| 1742 | } | ||
| 1743 | |||
| 1744 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1745 | |||
| 1746 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | ||
| 1747 | { | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | ||
| 1751 | { | ||
| 1752 | } | ||
| 1753 | |||
| 1754 | static int __init rcu_scheduler_really_started(void) | ||
| 1330 | { | 1755 | { |
| 1756 | rcu_scheduler_fully_active = 1; | ||
| 1331 | return 0; | 1757 | return 0; |
| 1332 | } | 1758 | } |
| 1759 | early_initcall(rcu_scheduler_really_started); | ||
| 1333 | 1760 | ||
| 1334 | static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) | 1761 | static void __cpuinit rcu_prepare_kthreads(int cpu) |
| 1335 | { | 1762 | { |
| 1336 | } | 1763 | } |
| 1337 | 1764 | ||
| @@ -1509,7 +1936,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | |||
| 1509 | * | 1936 | * |
| 1510 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | 1937 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
| 1511 | * disabled, we do one pass of force_quiescent_state(), then do a | 1938 | * disabled, we do one pass of force_quiescent_state(), then do a |
| 1512 | * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked | 1939 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked |
| 1513 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. | 1940 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. |
| 1514 | */ | 1941 | */ |
| 1515 | int rcu_needs_cpu(int cpu) | 1942 | int rcu_needs_cpu(int cpu) |
| @@ -1560,7 +1987,7 @@ int rcu_needs_cpu(int cpu) | |||
| 1560 | 1987 | ||
| 1561 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | 1988 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ |
| 1562 | if (c) | 1989 | if (c) |
| 1563 | invoke_rcu_cpu_kthread(); | 1990 | invoke_rcu_core(); |
| 1564 | return c; | 1991 | return c; |
| 1565 | } | 1992 | } |
| 1566 | 1993 | ||
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 9678cc3650f5..3b0c0986afc0 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <linux/rcupdate.h> | 31 | #include <linux/rcupdate.h> |
| 32 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
| 33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
| 34 | #include <asm/atomic.h> | 34 | #include <linux/atomic.h> |
| 35 | #include <linux/bitops.h> | 35 | #include <linux/bitops.h> |
| 36 | #include <linux/module.h> | 36 | #include <linux/module.h> |
| 37 | #include <linux/completion.h> | 37 | #include <linux/completion.h> |
| @@ -46,6 +46,8 @@ | |||
| 46 | #define RCU_TREE_NONCORE | 46 | #define RCU_TREE_NONCORE |
| 47 | #include "rcutree.h" | 47 | #include "rcutree.h" |
| 48 | 48 | ||
| 49 | #ifdef CONFIG_RCU_BOOST | ||
| 50 | |||
| 49 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 51 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| 50 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); | 52 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); |
| 51 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 53 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| @@ -58,6 +60,8 @@ static char convert_kthread_status(unsigned int kthread_status) | |||
| 58 | return "SRWOY"[kthread_status]; | 60 | return "SRWOY"[kthread_status]; |
| 59 | } | 61 | } |
| 60 | 62 | ||
| 63 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 64 | |||
| 61 | static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | 65 | static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) |
| 62 | { | 66 | { |
| 63 | if (!rdp->beenonline) | 67 | if (!rdp->beenonline) |
| @@ -76,7 +80,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 76 | rdp->dynticks_fqs); | 80 | rdp->dynticks_fqs); |
| 77 | #endif /* #ifdef CONFIG_NO_HZ */ | 81 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 78 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 82 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
| 79 | seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld", | 83 | seq_printf(m, " ql=%ld qs=%c%c%c%c", |
| 80 | rdp->qlen, | 84 | rdp->qlen, |
| 81 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != | 85 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != |
| 82 | rdp->nxttail[RCU_NEXT_TAIL]], | 86 | rdp->nxttail[RCU_NEXT_TAIL]], |
| @@ -84,13 +88,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 84 | rdp->nxttail[RCU_NEXT_READY_TAIL]], | 88 | rdp->nxttail[RCU_NEXT_READY_TAIL]], |
| 85 | ".W"[rdp->nxttail[RCU_DONE_TAIL] != | 89 | ".W"[rdp->nxttail[RCU_DONE_TAIL] != |
| 86 | rdp->nxttail[RCU_WAIT_TAIL]], | 90 | rdp->nxttail[RCU_WAIT_TAIL]], |
| 87 | ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]], | 91 | ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); |
| 92 | #ifdef CONFIG_RCU_BOOST | ||
| 93 | seq_printf(m, " kt=%d/%c/%d ktl=%x", | ||
| 88 | per_cpu(rcu_cpu_has_work, rdp->cpu), | 94 | per_cpu(rcu_cpu_has_work, rdp->cpu), |
| 89 | convert_kthread_status(per_cpu(rcu_cpu_kthread_status, | 95 | convert_kthread_status(per_cpu(rcu_cpu_kthread_status, |
| 90 | rdp->cpu)), | 96 | rdp->cpu)), |
| 91 | per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), | 97 | per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), |
| 92 | per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff, | 98 | per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); |
| 93 | rdp->blimit); | 99 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 100 | seq_printf(m, " b=%ld", rdp->blimit); | ||
| 94 | seq_printf(m, " ci=%lu co=%lu ca=%lu\n", | 101 | seq_printf(m, " ci=%lu co=%lu ca=%lu\n", |
| 95 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | 102 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); |
| 96 | } | 103 | } |
| @@ -147,18 +154,21 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 147 | rdp->dynticks_fqs); | 154 | rdp->dynticks_fqs); |
| 148 | #endif /* #ifdef CONFIG_NO_HZ */ | 155 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 149 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 156 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
| 150 | seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen, | 157 | seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, |
| 151 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != | 158 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != |
| 152 | rdp->nxttail[RCU_NEXT_TAIL]], | 159 | rdp->nxttail[RCU_NEXT_TAIL]], |
| 153 | ".R"[rdp->nxttail[RCU_WAIT_TAIL] != | 160 | ".R"[rdp->nxttail[RCU_WAIT_TAIL] != |
| 154 | rdp->nxttail[RCU_NEXT_READY_TAIL]], | 161 | rdp->nxttail[RCU_NEXT_READY_TAIL]], |
| 155 | ".W"[rdp->nxttail[RCU_DONE_TAIL] != | 162 | ".W"[rdp->nxttail[RCU_DONE_TAIL] != |
| 156 | rdp->nxttail[RCU_WAIT_TAIL]], | 163 | rdp->nxttail[RCU_WAIT_TAIL]], |
| 157 | ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]], | 164 | ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); |
| 165 | #ifdef CONFIG_RCU_BOOST | ||
| 166 | seq_printf(m, ",%d,\"%c\"", | ||
| 158 | per_cpu(rcu_cpu_has_work, rdp->cpu), | 167 | per_cpu(rcu_cpu_has_work, rdp->cpu), |
| 159 | convert_kthread_status(per_cpu(rcu_cpu_kthread_status, | 168 | convert_kthread_status(per_cpu(rcu_cpu_kthread_status, |
| 160 | rdp->cpu)), | 169 | rdp->cpu))); |
| 161 | rdp->blimit); | 170 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 171 | seq_printf(m, ",%ld", rdp->blimit); | ||
| 162 | seq_printf(m, ",%lu,%lu,%lu\n", | 172 | seq_printf(m, ",%lu,%lu,%lu\n", |
| 163 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | 173 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); |
| 164 | } | 174 | } |
| @@ -169,7 +179,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
| 169 | #ifdef CONFIG_NO_HZ | 179 | #ifdef CONFIG_NO_HZ |
| 170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 180 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
| 171 | #endif /* #ifdef CONFIG_NO_HZ */ | 181 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 172 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); | 182 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); |
| 183 | #ifdef CONFIG_RCU_BOOST | ||
| 184 | seq_puts(m, "\"kt\",\"ktl\""); | ||
| 185 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 186 | seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); | ||
| 173 | #ifdef CONFIG_TREE_PREEMPT_RCU | 187 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 174 | seq_puts(m, "\"rcu_preempt:\"\n"); | 188 | seq_puts(m, "\"rcu_preempt:\"\n"); |
| 175 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); | 189 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); |
diff --git a/kernel/resource.c b/kernel/resource.c index 798e2fae2a06..3b3cedc52592 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -38,6 +38,14 @@ struct resource iomem_resource = { | |||
| 38 | }; | 38 | }; |
| 39 | EXPORT_SYMBOL(iomem_resource); | 39 | EXPORT_SYMBOL(iomem_resource); |
| 40 | 40 | ||
| 41 | /* constraints to be met while allocating resources */ | ||
| 42 | struct resource_constraint { | ||
| 43 | resource_size_t min, max, align; | ||
| 44 | resource_size_t (*alignf)(void *, const struct resource *, | ||
| 45 | resource_size_t, resource_size_t); | ||
| 46 | void *alignf_data; | ||
| 47 | }; | ||
| 48 | |||
| 41 | static DEFINE_RWLOCK(resource_lock); | 49 | static DEFINE_RWLOCK(resource_lock); |
| 42 | 50 | ||
| 43 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) | 51 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
| @@ -384,16 +392,13 @@ static bool resource_contains(struct resource *res1, struct resource *res2) | |||
| 384 | } | 392 | } |
| 385 | 393 | ||
| 386 | /* | 394 | /* |
| 387 | * Find empty slot in the resource tree given range and alignment. | 395 | * Find empty slot in the resource tree with the given range and |
| 396 | * alignment constraints | ||
| 388 | */ | 397 | */ |
| 389 | static int find_resource(struct resource *root, struct resource *new, | 398 | static int __find_resource(struct resource *root, struct resource *old, |
| 390 | resource_size_t size, resource_size_t min, | 399 | struct resource *new, |
| 391 | resource_size_t max, resource_size_t align, | 400 | resource_size_t size, |
| 392 | resource_size_t (*alignf)(void *, | 401 | struct resource_constraint *constraint) |
| 393 | const struct resource *, | ||
| 394 | resource_size_t, | ||
| 395 | resource_size_t), | ||
| 396 | void *alignf_data) | ||
| 397 | { | 402 | { |
| 398 | struct resource *this = root->child; | 403 | struct resource *this = root->child; |
| 399 | struct resource tmp = *new, avail, alloc; | 404 | struct resource tmp = *new, avail, alloc; |
| @@ -404,25 +409,26 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 404 | * Skip past an allocated resource that starts at 0, since the assignment | 409 | * Skip past an allocated resource that starts at 0, since the assignment |
| 405 | * of this->start - 1 to tmp->end below would cause an underflow. | 410 | * of this->start - 1 to tmp->end below would cause an underflow. |
| 406 | */ | 411 | */ |
| 407 | if (this && this->start == 0) { | 412 | if (this && this->start == root->start) { |
| 408 | tmp.start = this->end + 1; | 413 | tmp.start = (this == old) ? old->start : this->end + 1; |
| 409 | this = this->sibling; | 414 | this = this->sibling; |
| 410 | } | 415 | } |
| 411 | for(;;) { | 416 | for(;;) { |
| 412 | if (this) | 417 | if (this) |
| 413 | tmp.end = this->start - 1; | 418 | tmp.end = (this == old) ? this->end : this->start - 1; |
| 414 | else | 419 | else |
| 415 | tmp.end = root->end; | 420 | tmp.end = root->end; |
| 416 | 421 | ||
| 417 | resource_clip(&tmp, min, max); | 422 | resource_clip(&tmp, constraint->min, constraint->max); |
| 418 | arch_remove_reservations(&tmp); | 423 | arch_remove_reservations(&tmp); |
| 419 | 424 | ||
| 420 | /* Check for overflow after ALIGN() */ | 425 | /* Check for overflow after ALIGN() */ |
| 421 | avail = *new; | 426 | avail = *new; |
| 422 | avail.start = ALIGN(tmp.start, align); | 427 | avail.start = ALIGN(tmp.start, constraint->align); |
| 423 | avail.end = tmp.end; | 428 | avail.end = tmp.end; |
| 424 | if (avail.start >= tmp.start) { | 429 | if (avail.start >= tmp.start) { |
| 425 | alloc.start = alignf(alignf_data, &avail, size, align); | 430 | alloc.start = constraint->alignf(constraint->alignf_data, &avail, |
| 431 | size, constraint->align); | ||
| 426 | alloc.end = alloc.start + size - 1; | 432 | alloc.end = alloc.start + size - 1; |
| 427 | if (resource_contains(&avail, &alloc)) { | 433 | if (resource_contains(&avail, &alloc)) { |
| 428 | new->start = alloc.start; | 434 | new->start = alloc.start; |
| @@ -432,14 +438,75 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 432 | } | 438 | } |
| 433 | if (!this) | 439 | if (!this) |
| 434 | break; | 440 | break; |
| 435 | tmp.start = this->end + 1; | 441 | if (this != old) |
| 442 | tmp.start = this->end + 1; | ||
| 436 | this = this->sibling; | 443 | this = this->sibling; |
| 437 | } | 444 | } |
| 438 | return -EBUSY; | 445 | return -EBUSY; |
| 439 | } | 446 | } |
| 440 | 447 | ||
| 448 | /* | ||
| 449 | * Find empty slot in the resource tree given range and alignment. | ||
| 450 | */ | ||
| 451 | static int find_resource(struct resource *root, struct resource *new, | ||
| 452 | resource_size_t size, | ||
| 453 | struct resource_constraint *constraint) | ||
| 454 | { | ||
| 455 | return __find_resource(root, NULL, new, size, constraint); | ||
| 456 | } | ||
| 457 | |||
| 441 | /** | 458 | /** |
| 442 | * allocate_resource - allocate empty slot in the resource tree given range & alignment | 459 | * reallocate_resource - allocate a slot in the resource tree given range & alignment. |
| 460 | * The resource will be relocated if the new size cannot be reallocated in the | ||
| 461 | * current location. | ||
| 462 | * | ||
| 463 | * @root: root resource descriptor | ||
| 464 | * @old: resource descriptor desired by caller | ||
| 465 | * @newsize: new size of the resource descriptor | ||
| 466 | * @constraint: the size and alignment constraints to be met. | ||
| 467 | */ | ||
| 468 | int reallocate_resource(struct resource *root, struct resource *old, | ||
| 469 | resource_size_t newsize, | ||
| 470 | struct resource_constraint *constraint) | ||
| 471 | { | ||
| 472 | int err=0; | ||
| 473 | struct resource new = *old; | ||
| 474 | struct resource *conflict; | ||
| 475 | |||
| 476 | write_lock(&resource_lock); | ||
| 477 | |||
| 478 | if ((err = __find_resource(root, old, &new, newsize, constraint))) | ||
| 479 | goto out; | ||
| 480 | |||
| 481 | if (resource_contains(&new, old)) { | ||
| 482 | old->start = new.start; | ||
| 483 | old->end = new.end; | ||
| 484 | goto out; | ||
| 485 | } | ||
| 486 | |||
| 487 | if (old->child) { | ||
| 488 | err = -EBUSY; | ||
| 489 | goto out; | ||
| 490 | } | ||
| 491 | |||
| 492 | if (resource_contains(old, &new)) { | ||
| 493 | old->start = new.start; | ||
| 494 | old->end = new.end; | ||
| 495 | } else { | ||
| 496 | __release_resource(old); | ||
| 497 | *old = new; | ||
| 498 | conflict = __request_resource(root, old); | ||
| 499 | BUG_ON(conflict); | ||
| 500 | } | ||
| 501 | out: | ||
| 502 | write_unlock(&resource_lock); | ||
| 503 | return err; | ||
| 504 | } | ||
| 505 | |||
| 506 | |||
| 507 | /** | ||
| 508 | * allocate_resource - allocate empty slot in the resource tree given range & alignment. | ||
| 509 | * The resource will be reallocated with a new size if it was already allocated | ||
| 443 | * @root: root resource descriptor | 510 | * @root: root resource descriptor |
| 444 | * @new: resource descriptor desired by caller | 511 | * @new: resource descriptor desired by caller |
| 445 | * @size: requested resource region size | 512 | * @size: requested resource region size |
| @@ -459,12 +526,25 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
| 459 | void *alignf_data) | 526 | void *alignf_data) |
| 460 | { | 527 | { |
| 461 | int err; | 528 | int err; |
| 529 | struct resource_constraint constraint; | ||
| 462 | 530 | ||
| 463 | if (!alignf) | 531 | if (!alignf) |
| 464 | alignf = simple_align_resource; | 532 | alignf = simple_align_resource; |
| 465 | 533 | ||
| 534 | constraint.min = min; | ||
| 535 | constraint.max = max; | ||
| 536 | constraint.align = align; | ||
| 537 | constraint.alignf = alignf; | ||
| 538 | constraint.alignf_data = alignf_data; | ||
| 539 | |||
| 540 | if ( new->parent ) { | ||
| 541 | /* resource is already allocated, try reallocating with | ||
| 542 | the new constraints */ | ||
| 543 | return reallocate_resource(root, new, size, &constraint); | ||
| 544 | } | ||
| 545 | |||
| 466 | write_lock(&resource_lock); | 546 | write_lock(&resource_lock); |
| 467 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | 547 | err = find_resource(root, new, size, &constraint); |
| 468 | if (err >= 0 && __request_resource(root, new)) | 548 | if (err >= 0 && __request_resource(root, new)) |
| 469 | err = -EBUSY; | 549 | err = -EBUSY; |
| 470 | write_unlock(&resource_lock); | 550 | write_unlock(&resource_lock); |
| @@ -473,6 +553,27 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
| 473 | 553 | ||
| 474 | EXPORT_SYMBOL(allocate_resource); | 554 | EXPORT_SYMBOL(allocate_resource); |
| 475 | 555 | ||
| 556 | /** | ||
| 557 | * lookup_resource - find an existing resource by a resource start address | ||
| 558 | * @root: root resource descriptor | ||
| 559 | * @start: resource start address | ||
| 560 | * | ||
| 561 | * Returns a pointer to the resource if found, NULL otherwise | ||
| 562 | */ | ||
| 563 | struct resource *lookup_resource(struct resource *root, resource_size_t start) | ||
| 564 | { | ||
| 565 | struct resource *res; | ||
| 566 | |||
| 567 | read_lock(&resource_lock); | ||
| 568 | for (res = root->child; res; res = res->sibling) { | ||
| 569 | if (res->start == start) | ||
| 570 | break; | ||
| 571 | } | ||
| 572 | read_unlock(&resource_lock); | ||
| 573 | |||
| 574 | return res; | ||
| 575 | } | ||
| 576 | |||
| 476 | /* | 577 | /* |
| 477 | * Insert a resource into the resource tree. If successful, return NULL, | 578 | * Insert a resource into the resource tree. If successful, return NULL, |
| 478 | * otherwise return the conflicting resource (compare to __request_resource()) | 579 | * otherwise return the conflicting resource (compare to __request_resource()) |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index ab449117aaf2..255e1662acdb 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -890,7 +890,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name) | |||
| 890 | { | 890 | { |
| 891 | lock->owner = NULL; | 891 | lock->owner = NULL; |
| 892 | raw_spin_lock_init(&lock->wait_lock); | 892 | raw_spin_lock_init(&lock->wait_lock); |
| 893 | plist_head_init_raw(&lock->wait_list, &lock->wait_lock); | 893 | plist_head_init(&lock->wait_list); |
| 894 | 894 | ||
| 895 | debug_rt_mutex_init(lock, name); | 895 | debug_rt_mutex_init(lock, name); |
| 896 | } | 896 | } |
diff --git a/kernel/rwsem.c b/kernel/rwsem.c index cae050b05f5e..9f48f3d82e9b 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include <linux/rwsem.h> | 11 | #include <linux/rwsem.h> |
| 12 | 12 | ||
| 13 | #include <asm/system.h> | 13 | #include <asm/system.h> |
| 14 | #include <asm/atomic.h> | 14 | #include <linux/atomic.h> |
| 15 | 15 | ||
| 16 | /* | 16 | /* |
| 17 | * lock for reading | 17 | * lock for reading |
| @@ -117,15 +117,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) | |||
| 117 | 117 | ||
| 118 | EXPORT_SYMBOL(down_read_nested); | 118 | EXPORT_SYMBOL(down_read_nested); |
| 119 | 119 | ||
| 120 | void down_read_non_owner(struct rw_semaphore *sem) | ||
| 121 | { | ||
| 122 | might_sleep(); | ||
| 123 | |||
| 124 | __down_read(sem); | ||
| 125 | } | ||
| 126 | |||
| 127 | EXPORT_SYMBOL(down_read_non_owner); | ||
| 128 | |||
| 129 | void down_write_nested(struct rw_semaphore *sem, int subclass) | 120 | void down_write_nested(struct rw_semaphore *sem, int subclass) |
| 130 | { | 121 | { |
| 131 | might_sleep(); | 122 | might_sleep(); |
| @@ -136,13 +127,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 136 | 127 | ||
| 137 | EXPORT_SYMBOL(down_write_nested); | 128 | EXPORT_SYMBOL(down_write_nested); |
| 138 | 129 | ||
| 139 | void up_read_non_owner(struct rw_semaphore *sem) | ||
| 140 | { | ||
| 141 | __up_read(sem); | ||
| 142 | } | ||
| 143 | |||
| 144 | EXPORT_SYMBOL(up_read_non_owner); | ||
| 145 | |||
| 146 | #endif | 130 | #endif |
| 147 | 131 | ||
| 148 | 132 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index cbb3a0eee58e..ccacdbdecf45 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -75,6 +75,9 @@ | |||
| 75 | #include <asm/tlb.h> | 75 | #include <asm/tlb.h> |
| 76 | #include <asm/irq_regs.h> | 76 | #include <asm/irq_regs.h> |
| 77 | #include <asm/mutex.h> | 77 | #include <asm/mutex.h> |
| 78 | #ifdef CONFIG_PARAVIRT | ||
| 79 | #include <asm/paravirt.h> | ||
| 80 | #endif | ||
| 78 | 81 | ||
| 79 | #include "sched_cpupri.h" | 82 | #include "sched_cpupri.h" |
| 80 | #include "workqueue_sched.h" | 83 | #include "workqueue_sched.h" |
| @@ -124,7 +127,7 @@ | |||
| 124 | 127 | ||
| 125 | static inline int rt_policy(int policy) | 128 | static inline int rt_policy(int policy) |
| 126 | { | 129 | { |
| 127 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 130 | if (policy == SCHED_FIFO || policy == SCHED_RR) |
| 128 | return 1; | 131 | return 1; |
| 129 | return 0; | 132 | return 0; |
| 130 | } | 133 | } |
| @@ -292,8 +295,8 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
| 292 | * (The default weight is 1024 - so there's no practical | 295 | * (The default weight is 1024 - so there's no practical |
| 293 | * limitation from this.) | 296 | * limitation from this.) |
| 294 | */ | 297 | */ |
| 295 | #define MIN_SHARES 2 | 298 | #define MIN_SHARES (1UL << 1) |
| 296 | #define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION)) | 299 | #define MAX_SHARES (1UL << 18) |
| 297 | 300 | ||
| 298 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; | 301 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; |
| 299 | #endif | 302 | #endif |
| @@ -422,6 +425,7 @@ struct rt_rq { | |||
| 422 | */ | 425 | */ |
| 423 | struct root_domain { | 426 | struct root_domain { |
| 424 | atomic_t refcount; | 427 | atomic_t refcount; |
| 428 | atomic_t rto_count; | ||
| 425 | struct rcu_head rcu; | 429 | struct rcu_head rcu; |
| 426 | cpumask_var_t span; | 430 | cpumask_var_t span; |
| 427 | cpumask_var_t online; | 431 | cpumask_var_t online; |
| @@ -431,7 +435,6 @@ struct root_domain { | |||
| 431 | * one runnable RT task. | 435 | * one runnable RT task. |
| 432 | */ | 436 | */ |
| 433 | cpumask_var_t rto_mask; | 437 | cpumask_var_t rto_mask; |
| 434 | atomic_t rto_count; | ||
| 435 | struct cpupri cpupri; | 438 | struct cpupri cpupri; |
| 436 | }; | 439 | }; |
| 437 | 440 | ||
| @@ -528,6 +531,12 @@ struct rq { | |||
| 528 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 531 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| 529 | u64 prev_irq_time; | 532 | u64 prev_irq_time; |
| 530 | #endif | 533 | #endif |
| 534 | #ifdef CONFIG_PARAVIRT | ||
| 535 | u64 prev_steal_time; | ||
| 536 | #endif | ||
| 537 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | ||
| 538 | u64 prev_steal_time_rq; | ||
| 539 | #endif | ||
| 531 | 540 | ||
| 532 | /* calc_load related fields */ | 541 | /* calc_load related fields */ |
| 533 | unsigned long calc_load_update; | 542 | unsigned long calc_load_update; |
| @@ -581,7 +590,6 @@ static inline int cpu_of(struct rq *rq) | |||
| 581 | 590 | ||
| 582 | #define rcu_dereference_check_sched_domain(p) \ | 591 | #define rcu_dereference_check_sched_domain(p) \ |
| 583 | rcu_dereference_check((p), \ | 592 | rcu_dereference_check((p), \ |
| 584 | rcu_read_lock_held() || \ | ||
| 585 | lockdep_is_held(&sched_domains_mutex)) | 593 | lockdep_is_held(&sched_domains_mutex)) |
| 586 | 594 | ||
| 587 | /* | 595 | /* |
| @@ -605,10 +613,10 @@ static inline int cpu_of(struct rq *rq) | |||
| 605 | /* | 613 | /* |
| 606 | * Return the group to which this tasks belongs. | 614 | * Return the group to which this tasks belongs. |
| 607 | * | 615 | * |
| 608 | * We use task_subsys_state_check() and extend the RCU verification | 616 | * We use task_subsys_state_check() and extend the RCU verification with |
| 609 | * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() | 617 | * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each |
| 610 | * holds that lock for each task it moves into the cgroup. Therefore | 618 | * task it moves into the cgroup. Therefore by holding either of those locks, |
| 611 | * by holding that lock, we pin the task to the current cgroup. | 619 | * we pin the task to the current cgroup. |
| 612 | */ | 620 | */ |
| 613 | static inline struct task_group *task_group(struct task_struct *p) | 621 | static inline struct task_group *task_group(struct task_struct *p) |
| 614 | { | 622 | { |
| @@ -616,7 +624,8 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 616 | struct cgroup_subsys_state *css; | 624 | struct cgroup_subsys_state *css; |
| 617 | 625 | ||
| 618 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 626 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
| 619 | lockdep_is_held(&p->pi_lock)); | 627 | lockdep_is_held(&p->pi_lock) || |
| 628 | lockdep_is_held(&task_rq(p)->lock)); | ||
| 620 | tg = container_of(css, struct task_group, css); | 629 | tg = container_of(css, struct task_group, css); |
| 621 | 630 | ||
| 622 | return autogroup_task_group(p, tg); | 631 | return autogroup_task_group(p, tg); |
| @@ -1567,38 +1576,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
| 1567 | return rq->avg_load_per_task; | 1576 | return rq->avg_load_per_task; |
| 1568 | } | 1577 | } |
| 1569 | 1578 | ||
| 1570 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1571 | |||
| 1572 | /* | ||
| 1573 | * Compute the cpu's hierarchical load factor for each task group. | ||
| 1574 | * This needs to be done in a top-down fashion because the load of a child | ||
| 1575 | * group is a fraction of its parents load. | ||
| 1576 | */ | ||
| 1577 | static int tg_load_down(struct task_group *tg, void *data) | ||
| 1578 | { | ||
| 1579 | unsigned long load; | ||
| 1580 | long cpu = (long)data; | ||
| 1581 | |||
| 1582 | if (!tg->parent) { | ||
| 1583 | load = cpu_rq(cpu)->load.weight; | ||
| 1584 | } else { | ||
| 1585 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
| 1586 | load *= tg->se[cpu]->load.weight; | ||
| 1587 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | tg->cfs_rq[cpu]->h_load = load; | ||
| 1591 | |||
| 1592 | return 0; | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | static void update_h_load(long cpu) | ||
| 1596 | { | ||
| 1597 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | #endif | ||
| 1601 | |||
| 1602 | #ifdef CONFIG_PREEMPT | 1579 | #ifdef CONFIG_PREEMPT |
| 1603 | 1580 | ||
| 1604 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | 1581 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); |
| @@ -1952,10 +1929,28 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1952 | } | 1929 | } |
| 1953 | EXPORT_SYMBOL_GPL(account_system_vtime); | 1930 | EXPORT_SYMBOL_GPL(account_system_vtime); |
| 1954 | 1931 | ||
| 1955 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 1932 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 1933 | |||
| 1934 | #ifdef CONFIG_PARAVIRT | ||
| 1935 | static inline u64 steal_ticks(u64 steal) | ||
| 1956 | { | 1936 | { |
| 1957 | s64 irq_delta; | 1937 | if (unlikely(steal > NSEC_PER_SEC)) |
| 1938 | return div_u64(steal, TICK_NSEC); | ||
| 1958 | 1939 | ||
| 1940 | return __iter_div_u64_rem(steal, TICK_NSEC, &steal); | ||
| 1941 | } | ||
| 1942 | #endif | ||
| 1943 | |||
| 1944 | static void update_rq_clock_task(struct rq *rq, s64 delta) | ||
| 1945 | { | ||
| 1946 | /* | ||
| 1947 | * In theory, the compile should just see 0 here, and optimize out the call | ||
| 1948 | * to sched_rt_avg_update. But I don't trust it... | ||
| 1949 | */ | ||
| 1950 | #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) | ||
| 1951 | s64 steal = 0, irq_delta = 0; | ||
| 1952 | #endif | ||
| 1953 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1959 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; | 1954 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
| 1960 | 1955 | ||
| 1961 | /* | 1956 | /* |
| @@ -1978,12 +1973,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 1978 | 1973 | ||
| 1979 | rq->prev_irq_time += irq_delta; | 1974 | rq->prev_irq_time += irq_delta; |
| 1980 | delta -= irq_delta; | 1975 | delta -= irq_delta; |
| 1976 | #endif | ||
| 1977 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | ||
| 1978 | if (static_branch((¶virt_steal_rq_enabled))) { | ||
| 1979 | u64 st; | ||
| 1980 | |||
| 1981 | steal = paravirt_steal_clock(cpu_of(rq)); | ||
| 1982 | steal -= rq->prev_steal_time_rq; | ||
| 1983 | |||
| 1984 | if (unlikely(steal > delta)) | ||
| 1985 | steal = delta; | ||
| 1986 | |||
| 1987 | st = steal_ticks(steal); | ||
| 1988 | steal = st * TICK_NSEC; | ||
| 1989 | |||
| 1990 | rq->prev_steal_time_rq += steal; | ||
| 1991 | |||
| 1992 | delta -= steal; | ||
| 1993 | } | ||
| 1994 | #endif | ||
| 1995 | |||
| 1981 | rq->clock_task += delta; | 1996 | rq->clock_task += delta; |
| 1982 | 1997 | ||
| 1983 | if (irq_delta && sched_feat(NONIRQ_POWER)) | 1998 | #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) |
| 1984 | sched_rt_avg_update(rq, irq_delta); | 1999 | if ((irq_delta + steal) && sched_feat(NONTASK_POWER)) |
| 2000 | sched_rt_avg_update(rq, irq_delta + steal); | ||
| 2001 | #endif | ||
| 1985 | } | 2002 | } |
| 1986 | 2003 | ||
| 2004 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1987 | static int irqtime_account_hi_update(void) | 2005 | static int irqtime_account_hi_update(void) |
| 1988 | { | 2006 | { |
| 1989 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 2007 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
| @@ -2018,12 +2036,7 @@ static int irqtime_account_si_update(void) | |||
| 2018 | 2036 | ||
| 2019 | #define sched_clock_irqtime (0) | 2037 | #define sched_clock_irqtime (0) |
| 2020 | 2038 | ||
| 2021 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 2039 | #endif |
| 2022 | { | ||
| 2023 | rq->clock_task += delta; | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 2027 | 2040 | ||
| 2028 | #include "sched_idletask.c" | 2041 | #include "sched_idletask.c" |
| 2029 | #include "sched_fair.c" | 2042 | #include "sched_fair.c" |
| @@ -2200,6 +2213,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2200 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | 2213 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
| 2201 | 2214 | ||
| 2202 | #ifdef CONFIG_LOCKDEP | 2215 | #ifdef CONFIG_LOCKDEP |
| 2216 | /* | ||
| 2217 | * The caller should hold either p->pi_lock or rq->lock, when changing | ||
| 2218 | * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. | ||
| 2219 | * | ||
| 2220 | * sched_move_task() holds both and thus holding either pins the cgroup, | ||
| 2221 | * see set_task_rq(). | ||
| 2222 | * | ||
| 2223 | * Furthermore, all task_rq users should acquire both locks, see | ||
| 2224 | * task_rq_lock(). | ||
| 2225 | */ | ||
| 2203 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || | 2226 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || |
| 2204 | lockdep_is_held(&task_rq(p)->lock))); | 2227 | lockdep_is_held(&task_rq(p)->lock))); |
| 2205 | #endif | 2228 | #endif |
| @@ -2209,7 +2232,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2209 | 2232 | ||
| 2210 | if (task_cpu(p) != new_cpu) { | 2233 | if (task_cpu(p) != new_cpu) { |
| 2211 | p->se.nr_migrations++; | 2234 | p->se.nr_migrations++; |
| 2212 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); | 2235 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); |
| 2213 | } | 2236 | } |
| 2214 | 2237 | ||
| 2215 | __set_task_cpu(p, new_cpu); | 2238 | __set_task_cpu(p, new_cpu); |
| @@ -2447,6 +2470,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) | |||
| 2447 | } | 2470 | } |
| 2448 | rcu_read_unlock(); | 2471 | rcu_read_unlock(); |
| 2449 | } | 2472 | } |
| 2473 | |||
| 2474 | if (wake_flags & WF_MIGRATED) | ||
| 2475 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | ||
| 2476 | |||
| 2450 | #endif /* CONFIG_SMP */ | 2477 | #endif /* CONFIG_SMP */ |
| 2451 | 2478 | ||
| 2452 | schedstat_inc(rq, ttwu_count); | 2479 | schedstat_inc(rq, ttwu_count); |
| @@ -2455,9 +2482,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) | |||
| 2455 | if (wake_flags & WF_SYNC) | 2482 | if (wake_flags & WF_SYNC) |
| 2456 | schedstat_inc(p, se.statistics.nr_wakeups_sync); | 2483 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
| 2457 | 2484 | ||
| 2458 | if (cpu != task_cpu(p)) | ||
| 2459 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | ||
| 2460 | |||
| 2461 | #endif /* CONFIG_SCHEDSTATS */ | 2485 | #endif /* CONFIG_SCHEDSTATS */ |
| 2462 | } | 2486 | } |
| 2463 | 2487 | ||
| @@ -2485,7 +2509,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | |||
| 2485 | if (p->sched_class->task_woken) | 2509 | if (p->sched_class->task_woken) |
| 2486 | p->sched_class->task_woken(rq, p); | 2510 | p->sched_class->task_woken(rq, p); |
| 2487 | 2511 | ||
| 2488 | if (unlikely(rq->idle_stamp)) { | 2512 | if (rq->idle_stamp) { |
| 2489 | u64 delta = rq->clock - rq->idle_stamp; | 2513 | u64 delta = rq->clock - rq->idle_stamp; |
| 2490 | u64 max = 2*sysctl_sched_migration_cost; | 2514 | u64 max = 2*sysctl_sched_migration_cost; |
| 2491 | 2515 | ||
| @@ -2532,13 +2556,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
| 2532 | } | 2556 | } |
| 2533 | 2557 | ||
| 2534 | #ifdef CONFIG_SMP | 2558 | #ifdef CONFIG_SMP |
| 2535 | static void sched_ttwu_pending(void) | 2559 | static void sched_ttwu_do_pending(struct task_struct *list) |
| 2536 | { | 2560 | { |
| 2537 | struct rq *rq = this_rq(); | 2561 | struct rq *rq = this_rq(); |
| 2538 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2539 | |||
| 2540 | if (!list) | ||
| 2541 | return; | ||
| 2542 | 2562 | ||
| 2543 | raw_spin_lock(&rq->lock); | 2563 | raw_spin_lock(&rq->lock); |
| 2544 | 2564 | ||
| @@ -2551,9 +2571,45 @@ static void sched_ttwu_pending(void) | |||
| 2551 | raw_spin_unlock(&rq->lock); | 2571 | raw_spin_unlock(&rq->lock); |
| 2552 | } | 2572 | } |
| 2553 | 2573 | ||
| 2574 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 2575 | |||
| 2576 | static void sched_ttwu_pending(void) | ||
| 2577 | { | ||
| 2578 | struct rq *rq = this_rq(); | ||
| 2579 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2580 | |||
| 2581 | if (!list) | ||
| 2582 | return; | ||
| 2583 | |||
| 2584 | sched_ttwu_do_pending(list); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
| 2588 | |||
| 2554 | void scheduler_ipi(void) | 2589 | void scheduler_ipi(void) |
| 2555 | { | 2590 | { |
| 2556 | sched_ttwu_pending(); | 2591 | struct rq *rq = this_rq(); |
| 2592 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2593 | |||
| 2594 | if (!list) | ||
| 2595 | return; | ||
| 2596 | |||
| 2597 | /* | ||
| 2598 | * Not all reschedule IPI handlers call irq_enter/irq_exit, since | ||
| 2599 | * traditionally all their work was done from the interrupt return | ||
| 2600 | * path. Now that we actually do some work, we need to make sure | ||
| 2601 | * we do call them. | ||
| 2602 | * | ||
| 2603 | * Some archs already do call them, luckily irq_enter/exit nest | ||
| 2604 | * properly. | ||
| 2605 | * | ||
| 2606 | * Arguably we should visit all archs and update all handlers, | ||
| 2607 | * however a fair share of IPIs are still resched only so this would | ||
| 2608 | * somewhat pessimize the simple resched case. | ||
| 2609 | */ | ||
| 2610 | irq_enter(); | ||
| 2611 | sched_ttwu_do_pending(list); | ||
| 2612 | irq_exit(); | ||
| 2557 | } | 2613 | } |
| 2558 | 2614 | ||
| 2559 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 2615 | static void ttwu_queue_remote(struct task_struct *p, int cpu) |
| @@ -2600,6 +2656,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) | |||
| 2600 | 2656 | ||
| 2601 | #if defined(CONFIG_SMP) | 2657 | #if defined(CONFIG_SMP) |
| 2602 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | 2658 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { |
| 2659 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ | ||
| 2603 | ttwu_queue_remote(p, cpu); | 2660 | ttwu_queue_remote(p, cpu); |
| 2604 | return; | 2661 | return; |
| 2605 | } | 2662 | } |
| @@ -2674,8 +2731,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 2674 | p->sched_class->task_waking(p); | 2731 | p->sched_class->task_waking(p); |
| 2675 | 2732 | ||
| 2676 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2733 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
| 2677 | if (task_cpu(p) != cpu) | 2734 | if (task_cpu(p) != cpu) { |
| 2735 | wake_flags |= WF_MIGRATED; | ||
| 2678 | set_task_cpu(p, cpu); | 2736 | set_task_cpu(p, cpu); |
| 2737 | } | ||
| 2679 | #endif /* CONFIG_SMP */ | 2738 | #endif /* CONFIG_SMP */ |
| 2680 | 2739 | ||
| 2681 | ttwu_queue(p, cpu); | 2740 | ttwu_queue(p, cpu); |
| @@ -2839,7 +2898,7 @@ void sched_fork(struct task_struct *p) | |||
| 2839 | #if defined(CONFIG_SMP) | 2898 | #if defined(CONFIG_SMP) |
| 2840 | p->on_cpu = 0; | 2899 | p->on_cpu = 0; |
| 2841 | #endif | 2900 | #endif |
| 2842 | #ifdef CONFIG_PREEMPT | 2901 | #ifdef CONFIG_PREEMPT_COUNT |
| 2843 | /* Want to start with kernel preemption disabled. */ | 2902 | /* Want to start with kernel preemption disabled. */ |
| 2844 | task_thread_info(p)->preempt_count = 1; | 2903 | task_thread_info(p)->preempt_count = 1; |
| 2845 | #endif | 2904 | #endif |
| @@ -3830,6 +3889,25 @@ void account_idle_time(cputime_t cputime) | |||
| 3830 | cpustat->idle = cputime64_add(cpustat->idle, cputime64); | 3889 | cpustat->idle = cputime64_add(cpustat->idle, cputime64); |
| 3831 | } | 3890 | } |
| 3832 | 3891 | ||
| 3892 | static __always_inline bool steal_account_process_tick(void) | ||
| 3893 | { | ||
| 3894 | #ifdef CONFIG_PARAVIRT | ||
| 3895 | if (static_branch(¶virt_steal_enabled)) { | ||
| 3896 | u64 steal, st = 0; | ||
| 3897 | |||
| 3898 | steal = paravirt_steal_clock(smp_processor_id()); | ||
| 3899 | steal -= this_rq()->prev_steal_time; | ||
| 3900 | |||
| 3901 | st = steal_ticks(steal); | ||
| 3902 | this_rq()->prev_steal_time += st * TICK_NSEC; | ||
| 3903 | |||
| 3904 | account_steal_time(st); | ||
| 3905 | return st; | ||
| 3906 | } | ||
| 3907 | #endif | ||
| 3908 | return false; | ||
| 3909 | } | ||
| 3910 | |||
| 3833 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 3911 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 3834 | 3912 | ||
| 3835 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 3913 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| @@ -3861,6 +3939,9 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |||
| 3861 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); | 3939 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); |
| 3862 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3940 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
| 3863 | 3941 | ||
| 3942 | if (steal_account_process_tick()) | ||
| 3943 | return; | ||
| 3944 | |||
| 3864 | if (irqtime_account_hi_update()) { | 3945 | if (irqtime_account_hi_update()) { |
| 3865 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3946 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
| 3866 | } else if (irqtime_account_si_update()) { | 3947 | } else if (irqtime_account_si_update()) { |
| @@ -3914,6 +3995,9 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
| 3914 | return; | 3995 | return; |
| 3915 | } | 3996 | } |
| 3916 | 3997 | ||
| 3998 | if (steal_account_process_tick()) | ||
| 3999 | return; | ||
| 4000 | |||
| 3917 | if (user_tick) | 4001 | if (user_tick) |
| 3918 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 4002 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
| 3919 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 4003 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
| @@ -4291,11 +4375,8 @@ EXPORT_SYMBOL(schedule); | |||
| 4291 | 4375 | ||
| 4292 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 4376 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
| 4293 | { | 4377 | { |
| 4294 | bool ret = false; | ||
| 4295 | |||
| 4296 | rcu_read_lock(); | ||
| 4297 | if (lock->owner != owner) | 4378 | if (lock->owner != owner) |
| 4298 | goto fail; | 4379 | return false; |
| 4299 | 4380 | ||
| 4300 | /* | 4381 | /* |
| 4301 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | 4382 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
| @@ -4305,11 +4386,7 @@ static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | |||
| 4305 | */ | 4386 | */ |
| 4306 | barrier(); | 4387 | barrier(); |
| 4307 | 4388 | ||
| 4308 | ret = owner->on_cpu; | 4389 | return owner->on_cpu; |
| 4309 | fail: | ||
| 4310 | rcu_read_unlock(); | ||
| 4311 | |||
| 4312 | return ret; | ||
| 4313 | } | 4390 | } |
| 4314 | 4391 | ||
| 4315 | /* | 4392 | /* |
| @@ -4321,21 +4398,21 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | |||
| 4321 | if (!sched_feat(OWNER_SPIN)) | 4398 | if (!sched_feat(OWNER_SPIN)) |
| 4322 | return 0; | 4399 | return 0; |
| 4323 | 4400 | ||
| 4401 | rcu_read_lock(); | ||
| 4324 | while (owner_running(lock, owner)) { | 4402 | while (owner_running(lock, owner)) { |
| 4325 | if (need_resched()) | 4403 | if (need_resched()) |
| 4326 | return 0; | 4404 | break; |
| 4327 | 4405 | ||
| 4328 | arch_mutex_cpu_relax(); | 4406 | arch_mutex_cpu_relax(); |
| 4329 | } | 4407 | } |
| 4408 | rcu_read_unlock(); | ||
| 4330 | 4409 | ||
| 4331 | /* | 4410 | /* |
| 4332 | * If the owner changed to another task there is likely | 4411 | * We break out the loop above on need_resched() and when the |
| 4333 | * heavy contention, stop spinning. | 4412 | * owner changed, which is a sign for heavy contention. Return |
| 4413 | * success only when lock->owner is NULL. | ||
| 4334 | */ | 4414 | */ |
| 4335 | if (lock->owner) | 4415 | return lock->owner == NULL; |
| 4336 | return 0; | ||
| 4337 | |||
| 4338 | return 1; | ||
| 4339 | } | 4416 | } |
| 4340 | #endif | 4417 | #endif |
| 4341 | 4418 | ||
| @@ -6542,7 +6619,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 6542 | break; | 6619 | break; |
| 6543 | } | 6620 | } |
| 6544 | 6621 | ||
| 6545 | if (!group->cpu_power) { | 6622 | if (!group->sgp->power) { |
| 6546 | printk(KERN_CONT "\n"); | 6623 | printk(KERN_CONT "\n"); |
| 6547 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 6624 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
| 6548 | "set\n"); | 6625 | "set\n"); |
| @@ -6566,9 +6643,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 6566 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 6643 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
| 6567 | 6644 | ||
| 6568 | printk(KERN_CONT " %s", str); | 6645 | printk(KERN_CONT " %s", str); |
| 6569 | if (group->cpu_power != SCHED_POWER_SCALE) { | 6646 | if (group->sgp->power != SCHED_POWER_SCALE) { |
| 6570 | printk(KERN_CONT " (cpu_power = %d)", | 6647 | printk(KERN_CONT " (cpu_power = %d)", |
| 6571 | group->cpu_power); | 6648 | group->sgp->power); |
| 6572 | } | 6649 | } |
| 6573 | 6650 | ||
| 6574 | group = group->next; | 6651 | group = group->next; |
| @@ -6759,11 +6836,39 @@ static struct root_domain *alloc_rootdomain(void) | |||
| 6759 | return rd; | 6836 | return rd; |
| 6760 | } | 6837 | } |
| 6761 | 6838 | ||
| 6839 | static void free_sched_groups(struct sched_group *sg, int free_sgp) | ||
| 6840 | { | ||
| 6841 | struct sched_group *tmp, *first; | ||
| 6842 | |||
| 6843 | if (!sg) | ||
| 6844 | return; | ||
| 6845 | |||
| 6846 | first = sg; | ||
| 6847 | do { | ||
| 6848 | tmp = sg->next; | ||
| 6849 | |||
| 6850 | if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) | ||
| 6851 | kfree(sg->sgp); | ||
| 6852 | |||
| 6853 | kfree(sg); | ||
| 6854 | sg = tmp; | ||
| 6855 | } while (sg != first); | ||
| 6856 | } | ||
| 6857 | |||
| 6762 | static void free_sched_domain(struct rcu_head *rcu) | 6858 | static void free_sched_domain(struct rcu_head *rcu) |
| 6763 | { | 6859 | { |
| 6764 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); | 6860 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); |
| 6765 | if (atomic_dec_and_test(&sd->groups->ref)) | 6861 | |
| 6862 | /* | ||
| 6863 | * If its an overlapping domain it has private groups, iterate and | ||
| 6864 | * nuke them all. | ||
| 6865 | */ | ||
| 6866 | if (sd->flags & SD_OVERLAP) { | ||
| 6867 | free_sched_groups(sd->groups, 1); | ||
| 6868 | } else if (atomic_dec_and_test(&sd->groups->ref)) { | ||
| 6869 | kfree(sd->groups->sgp); | ||
| 6766 | kfree(sd->groups); | 6870 | kfree(sd->groups); |
| 6871 | } | ||
| 6767 | kfree(sd); | 6872 | kfree(sd); |
| 6768 | } | 6873 | } |
| 6769 | 6874 | ||
| @@ -6930,6 +7035,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
| 6930 | struct sd_data { | 7035 | struct sd_data { |
| 6931 | struct sched_domain **__percpu sd; | 7036 | struct sched_domain **__percpu sd; |
| 6932 | struct sched_group **__percpu sg; | 7037 | struct sched_group **__percpu sg; |
| 7038 | struct sched_group_power **__percpu sgp; | ||
| 6933 | }; | 7039 | }; |
| 6934 | 7040 | ||
| 6935 | struct s_data { | 7041 | struct s_data { |
| @@ -6949,15 +7055,73 @@ struct sched_domain_topology_level; | |||
| 6949 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); | 7055 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); |
| 6950 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); | 7056 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); |
| 6951 | 7057 | ||
| 7058 | #define SDTL_OVERLAP 0x01 | ||
| 7059 | |||
| 6952 | struct sched_domain_topology_level { | 7060 | struct sched_domain_topology_level { |
| 6953 | sched_domain_init_f init; | 7061 | sched_domain_init_f init; |
| 6954 | sched_domain_mask_f mask; | 7062 | sched_domain_mask_f mask; |
| 7063 | int flags; | ||
| 6955 | struct sd_data data; | 7064 | struct sd_data data; |
| 6956 | }; | 7065 | }; |
| 6957 | 7066 | ||
| 6958 | /* | 7067 | static int |
| 6959 | * Assumes the sched_domain tree is fully constructed | 7068 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) |
| 6960 | */ | 7069 | { |
| 7070 | struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg; | ||
| 7071 | const struct cpumask *span = sched_domain_span(sd); | ||
| 7072 | struct cpumask *covered = sched_domains_tmpmask; | ||
| 7073 | struct sd_data *sdd = sd->private; | ||
| 7074 | struct sched_domain *child; | ||
| 7075 | int i; | ||
| 7076 | |||
| 7077 | cpumask_clear(covered); | ||
| 7078 | |||
| 7079 | for_each_cpu(i, span) { | ||
| 7080 | struct cpumask *sg_span; | ||
| 7081 | |||
| 7082 | if (cpumask_test_cpu(i, covered)) | ||
| 7083 | continue; | ||
| 7084 | |||
| 7085 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
| 7086 | GFP_KERNEL, cpu_to_node(i)); | ||
| 7087 | |||
| 7088 | if (!sg) | ||
| 7089 | goto fail; | ||
| 7090 | |||
| 7091 | sg_span = sched_group_cpus(sg); | ||
| 7092 | |||
| 7093 | child = *per_cpu_ptr(sdd->sd, i); | ||
| 7094 | if (child->child) { | ||
| 7095 | child = child->child; | ||
| 7096 | cpumask_copy(sg_span, sched_domain_span(child)); | ||
| 7097 | } else | ||
| 7098 | cpumask_set_cpu(i, sg_span); | ||
| 7099 | |||
| 7100 | cpumask_or(covered, covered, sg_span); | ||
| 7101 | |||
| 7102 | sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); | ||
| 7103 | atomic_inc(&sg->sgp->ref); | ||
| 7104 | |||
| 7105 | if (cpumask_test_cpu(cpu, sg_span)) | ||
| 7106 | groups = sg; | ||
| 7107 | |||
| 7108 | if (!first) | ||
| 7109 | first = sg; | ||
| 7110 | if (last) | ||
| 7111 | last->next = sg; | ||
| 7112 | last = sg; | ||
| 7113 | last->next = first; | ||
| 7114 | } | ||
| 7115 | sd->groups = groups; | ||
| 7116 | |||
| 7117 | return 0; | ||
| 7118 | |||
| 7119 | fail: | ||
| 7120 | free_sched_groups(first, 0); | ||
| 7121 | |||
| 7122 | return -ENOMEM; | ||
| 7123 | } | ||
| 7124 | |||
| 6961 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | 7125 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) |
| 6962 | { | 7126 | { |
| 6963 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); | 7127 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
| @@ -6966,24 +7130,24 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | |||
| 6966 | if (child) | 7130 | if (child) |
| 6967 | cpu = cpumask_first(sched_domain_span(child)); | 7131 | cpu = cpumask_first(sched_domain_span(child)); |
| 6968 | 7132 | ||
| 6969 | if (sg) | 7133 | if (sg) { |
| 6970 | *sg = *per_cpu_ptr(sdd->sg, cpu); | 7134 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
| 7135 | (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); | ||
| 7136 | atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ | ||
| 7137 | } | ||
| 6971 | 7138 | ||
| 6972 | return cpu; | 7139 | return cpu; |
| 6973 | } | 7140 | } |
| 6974 | 7141 | ||
| 6975 | /* | 7142 | /* |
| 6976 | * build_sched_groups takes the cpumask we wish to span, and a pointer | ||
| 6977 | * to a function which identifies what group(along with sched group) a CPU | ||
| 6978 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
| 6979 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
| 6980 | * | ||
| 6981 | * build_sched_groups will build a circular linked list of the groups | 7143 | * build_sched_groups will build a circular linked list of the groups |
| 6982 | * covered by the given span, and will set each group's ->cpumask correctly, | 7144 | * covered by the given span, and will set each group's ->cpumask correctly, |
| 6983 | * and ->cpu_power to 0. | 7145 | * and ->cpu_power to 0. |
| 7146 | * | ||
| 7147 | * Assumes the sched_domain tree is fully constructed | ||
| 6984 | */ | 7148 | */ |
| 6985 | static void | 7149 | static int |
| 6986 | build_sched_groups(struct sched_domain *sd) | 7150 | build_sched_groups(struct sched_domain *sd, int cpu) |
| 6987 | { | 7151 | { |
| 6988 | struct sched_group *first = NULL, *last = NULL; | 7152 | struct sched_group *first = NULL, *last = NULL; |
| 6989 | struct sd_data *sdd = sd->private; | 7153 | struct sd_data *sdd = sd->private; |
| @@ -6991,6 +7155,12 @@ build_sched_groups(struct sched_domain *sd) | |||
| 6991 | struct cpumask *covered; | 7155 | struct cpumask *covered; |
| 6992 | int i; | 7156 | int i; |
| 6993 | 7157 | ||
| 7158 | get_group(cpu, sdd, &sd->groups); | ||
| 7159 | atomic_inc(&sd->groups->ref); | ||
| 7160 | |||
| 7161 | if (cpu != cpumask_first(sched_domain_span(sd))) | ||
| 7162 | return 0; | ||
| 7163 | |||
| 6994 | lockdep_assert_held(&sched_domains_mutex); | 7164 | lockdep_assert_held(&sched_domains_mutex); |
| 6995 | covered = sched_domains_tmpmask; | 7165 | covered = sched_domains_tmpmask; |
| 6996 | 7166 | ||
| @@ -7005,7 +7175,7 @@ build_sched_groups(struct sched_domain *sd) | |||
| 7005 | continue; | 7175 | continue; |
| 7006 | 7176 | ||
| 7007 | cpumask_clear(sched_group_cpus(sg)); | 7177 | cpumask_clear(sched_group_cpus(sg)); |
| 7008 | sg->cpu_power = 0; | 7178 | sg->sgp->power = 0; |
| 7009 | 7179 | ||
| 7010 | for_each_cpu(j, span) { | 7180 | for_each_cpu(j, span) { |
| 7011 | if (get_group(j, sdd, NULL) != group) | 7181 | if (get_group(j, sdd, NULL) != group) |
| @@ -7022,6 +7192,8 @@ build_sched_groups(struct sched_domain *sd) | |||
| 7022 | last = sg; | 7192 | last = sg; |
| 7023 | } | 7193 | } |
| 7024 | last->next = first; | 7194 | last->next = first; |
| 7195 | |||
| 7196 | return 0; | ||
| 7025 | } | 7197 | } |
| 7026 | 7198 | ||
| 7027 | /* | 7199 | /* |
| @@ -7036,12 +7208,17 @@ build_sched_groups(struct sched_domain *sd) | |||
| 7036 | */ | 7208 | */ |
| 7037 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | 7209 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) |
| 7038 | { | 7210 | { |
| 7039 | WARN_ON(!sd || !sd->groups); | 7211 | struct sched_group *sg = sd->groups; |
| 7040 | 7212 | ||
| 7041 | if (cpu != group_first_cpu(sd->groups)) | 7213 | WARN_ON(!sd || !sg); |
| 7042 | return; | ||
| 7043 | 7214 | ||
| 7044 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | 7215 | do { |
| 7216 | sg->group_weight = cpumask_weight(sched_group_cpus(sg)); | ||
| 7217 | sg = sg->next; | ||
| 7218 | } while (sg != sd->groups); | ||
| 7219 | |||
| 7220 | if (cpu != group_first_cpu(sg)) | ||
| 7221 | return; | ||
| 7045 | 7222 | ||
| 7046 | update_group_power(sd, cpu); | 7223 | update_group_power(sd, cpu); |
| 7047 | } | 7224 | } |
| @@ -7162,15 +7339,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | |||
| 7162 | static void claim_allocations(int cpu, struct sched_domain *sd) | 7339 | static void claim_allocations(int cpu, struct sched_domain *sd) |
| 7163 | { | 7340 | { |
| 7164 | struct sd_data *sdd = sd->private; | 7341 | struct sd_data *sdd = sd->private; |
| 7165 | struct sched_group *sg = sd->groups; | ||
| 7166 | 7342 | ||
| 7167 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); | 7343 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); |
| 7168 | *per_cpu_ptr(sdd->sd, cpu) = NULL; | 7344 | *per_cpu_ptr(sdd->sd, cpu) = NULL; |
| 7169 | 7345 | ||
| 7170 | if (cpu == cpumask_first(sched_group_cpus(sg))) { | 7346 | if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) |
| 7171 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); | ||
| 7172 | *per_cpu_ptr(sdd->sg, cpu) = NULL; | 7347 | *per_cpu_ptr(sdd->sg, cpu) = NULL; |
| 7173 | } | 7348 | |
| 7349 | if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) | ||
| 7350 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; | ||
| 7174 | } | 7351 | } |
| 7175 | 7352 | ||
| 7176 | #ifdef CONFIG_SCHED_SMT | 7353 | #ifdef CONFIG_SCHED_SMT |
| @@ -7195,7 +7372,7 @@ static struct sched_domain_topology_level default_topology[] = { | |||
| 7195 | #endif | 7372 | #endif |
| 7196 | { sd_init_CPU, cpu_cpu_mask, }, | 7373 | { sd_init_CPU, cpu_cpu_mask, }, |
| 7197 | #ifdef CONFIG_NUMA | 7374 | #ifdef CONFIG_NUMA |
| 7198 | { sd_init_NODE, cpu_node_mask, }, | 7375 | { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, |
| 7199 | { sd_init_ALLNODES, cpu_allnodes_mask, }, | 7376 | { sd_init_ALLNODES, cpu_allnodes_mask, }, |
| 7200 | #endif | 7377 | #endif |
| 7201 | { NULL, }, | 7378 | { NULL, }, |
| @@ -7219,9 +7396,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
| 7219 | if (!sdd->sg) | 7396 | if (!sdd->sg) |
| 7220 | return -ENOMEM; | 7397 | return -ENOMEM; |
| 7221 | 7398 | ||
| 7399 | sdd->sgp = alloc_percpu(struct sched_group_power *); | ||
| 7400 | if (!sdd->sgp) | ||
| 7401 | return -ENOMEM; | ||
| 7402 | |||
| 7222 | for_each_cpu(j, cpu_map) { | 7403 | for_each_cpu(j, cpu_map) { |
| 7223 | struct sched_domain *sd; | 7404 | struct sched_domain *sd; |
| 7224 | struct sched_group *sg; | 7405 | struct sched_group *sg; |
| 7406 | struct sched_group_power *sgp; | ||
| 7225 | 7407 | ||
| 7226 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | 7408 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), |
| 7227 | GFP_KERNEL, cpu_to_node(j)); | 7409 | GFP_KERNEL, cpu_to_node(j)); |
| @@ -7236,6 +7418,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
| 7236 | return -ENOMEM; | 7418 | return -ENOMEM; |
| 7237 | 7419 | ||
| 7238 | *per_cpu_ptr(sdd->sg, j) = sg; | 7420 | *per_cpu_ptr(sdd->sg, j) = sg; |
| 7421 | |||
| 7422 | sgp = kzalloc_node(sizeof(struct sched_group_power), | ||
| 7423 | GFP_KERNEL, cpu_to_node(j)); | ||
| 7424 | if (!sgp) | ||
| 7425 | return -ENOMEM; | ||
| 7426 | |||
| 7427 | *per_cpu_ptr(sdd->sgp, j) = sgp; | ||
| 7239 | } | 7428 | } |
| 7240 | } | 7429 | } |
| 7241 | 7430 | ||
| @@ -7251,11 +7440,15 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
| 7251 | struct sd_data *sdd = &tl->data; | 7440 | struct sd_data *sdd = &tl->data; |
| 7252 | 7441 | ||
| 7253 | for_each_cpu(j, cpu_map) { | 7442 | for_each_cpu(j, cpu_map) { |
| 7254 | kfree(*per_cpu_ptr(sdd->sd, j)); | 7443 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); |
| 7444 | if (sd && (sd->flags & SD_OVERLAP)) | ||
| 7445 | free_sched_groups(sd->groups, 0); | ||
| 7255 | kfree(*per_cpu_ptr(sdd->sg, j)); | 7446 | kfree(*per_cpu_ptr(sdd->sg, j)); |
| 7447 | kfree(*per_cpu_ptr(sdd->sgp, j)); | ||
| 7256 | } | 7448 | } |
| 7257 | free_percpu(sdd->sd); | 7449 | free_percpu(sdd->sd); |
| 7258 | free_percpu(sdd->sg); | 7450 | free_percpu(sdd->sg); |
| 7451 | free_percpu(sdd->sgp); | ||
| 7259 | } | 7452 | } |
| 7260 | } | 7453 | } |
| 7261 | 7454 | ||
| @@ -7301,8 +7494,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
| 7301 | struct sched_domain_topology_level *tl; | 7494 | struct sched_domain_topology_level *tl; |
| 7302 | 7495 | ||
| 7303 | sd = NULL; | 7496 | sd = NULL; |
| 7304 | for (tl = sched_domain_topology; tl->init; tl++) | 7497 | for (tl = sched_domain_topology; tl->init; tl++) { |
| 7305 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); | 7498 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); |
| 7499 | if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) | ||
| 7500 | sd->flags |= SD_OVERLAP; | ||
| 7501 | if (cpumask_equal(cpu_map, sched_domain_span(sd))) | ||
| 7502 | break; | ||
| 7503 | } | ||
| 7306 | 7504 | ||
| 7307 | while (sd->child) | 7505 | while (sd->child) |
| 7308 | sd = sd->child; | 7506 | sd = sd->child; |
| @@ -7314,13 +7512,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
| 7314 | for_each_cpu(i, cpu_map) { | 7512 | for_each_cpu(i, cpu_map) { |
| 7315 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | 7513 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
| 7316 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); | 7514 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); |
| 7317 | get_group(i, sd->private, &sd->groups); | 7515 | if (sd->flags & SD_OVERLAP) { |
| 7318 | atomic_inc(&sd->groups->ref); | 7516 | if (build_overlap_sched_groups(sd, i)) |
| 7319 | 7517 | goto error; | |
| 7320 | if (i != cpumask_first(sched_domain_span(sd))) | 7518 | } else { |
| 7321 | continue; | 7519 | if (build_sched_groups(sd, i)) |
| 7322 | 7520 | goto error; | |
| 7323 | build_sched_groups(sd); | 7521 | } |
| 7324 | } | 7522 | } |
| 7325 | } | 7523 | } |
| 7326 | 7524 | ||
| @@ -7730,18 +7928,14 @@ int in_sched_functions(unsigned long addr) | |||
| 7730 | && addr < (unsigned long)__sched_text_end); | 7928 | && addr < (unsigned long)__sched_text_end); |
| 7731 | } | 7929 | } |
| 7732 | 7930 | ||
| 7733 | static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | 7931 | static void init_cfs_rq(struct cfs_rq *cfs_rq) |
| 7734 | { | 7932 | { |
| 7735 | cfs_rq->tasks_timeline = RB_ROOT; | 7933 | cfs_rq->tasks_timeline = RB_ROOT; |
| 7736 | INIT_LIST_HEAD(&cfs_rq->tasks); | 7934 | INIT_LIST_HEAD(&cfs_rq->tasks); |
| 7737 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 7738 | cfs_rq->rq = rq; | ||
| 7739 | /* allow initial update_cfs_load() to truncate */ | ||
| 7740 | #ifdef CONFIG_SMP | ||
| 7741 | cfs_rq->load_stamp = 1; | ||
| 7742 | #endif | ||
| 7743 | #endif | ||
| 7744 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7935 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
| 7936 | #ifndef CONFIG_64BIT | ||
| 7937 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
| 7938 | #endif | ||
| 7745 | } | 7939 | } |
| 7746 | 7940 | ||
| 7747 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | 7941 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) |
| @@ -7757,27 +7951,18 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
| 7757 | /* delimiter for bitsearch: */ | 7951 | /* delimiter for bitsearch: */ |
| 7758 | __set_bit(MAX_RT_PRIO, array->bitmap); | 7952 | __set_bit(MAX_RT_PRIO, array->bitmap); |
| 7759 | 7953 | ||
| 7760 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 7954 | #if defined CONFIG_SMP |
| 7761 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | 7955 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
| 7762 | #ifdef CONFIG_SMP | ||
| 7763 | rt_rq->highest_prio.next = MAX_RT_PRIO; | 7956 | rt_rq->highest_prio.next = MAX_RT_PRIO; |
| 7764 | #endif | ||
| 7765 | #endif | ||
| 7766 | #ifdef CONFIG_SMP | ||
| 7767 | rt_rq->rt_nr_migratory = 0; | 7957 | rt_rq->rt_nr_migratory = 0; |
| 7768 | rt_rq->overloaded = 0; | 7958 | rt_rq->overloaded = 0; |
| 7769 | plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock); | 7959 | plist_head_init(&rt_rq->pushable_tasks); |
| 7770 | #endif | 7960 | #endif |
| 7771 | 7961 | ||
| 7772 | rt_rq->rt_time = 0; | 7962 | rt_rq->rt_time = 0; |
| 7773 | rt_rq->rt_throttled = 0; | 7963 | rt_rq->rt_throttled = 0; |
| 7774 | rt_rq->rt_runtime = 0; | 7964 | rt_rq->rt_runtime = 0; |
| 7775 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); | 7965 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); |
| 7776 | |||
| 7777 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 7778 | rt_rq->rt_nr_boosted = 0; | ||
| 7779 | rt_rq->rq = rq; | ||
| 7780 | #endif | ||
| 7781 | } | 7966 | } |
| 7782 | 7967 | ||
| 7783 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7968 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -7786,11 +7971,17 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
| 7786 | struct sched_entity *parent) | 7971 | struct sched_entity *parent) |
| 7787 | { | 7972 | { |
| 7788 | struct rq *rq = cpu_rq(cpu); | 7973 | struct rq *rq = cpu_rq(cpu); |
| 7789 | tg->cfs_rq[cpu] = cfs_rq; | 7974 | |
| 7790 | init_cfs_rq(cfs_rq, rq); | ||
| 7791 | cfs_rq->tg = tg; | 7975 | cfs_rq->tg = tg; |
| 7976 | cfs_rq->rq = rq; | ||
| 7977 | #ifdef CONFIG_SMP | ||
| 7978 | /* allow initial update_cfs_load() to truncate */ | ||
| 7979 | cfs_rq->load_stamp = 1; | ||
| 7980 | #endif | ||
| 7792 | 7981 | ||
| 7982 | tg->cfs_rq[cpu] = cfs_rq; | ||
| 7793 | tg->se[cpu] = se; | 7983 | tg->se[cpu] = se; |
| 7984 | |||
| 7794 | /* se could be NULL for root_task_group */ | 7985 | /* se could be NULL for root_task_group */ |
| 7795 | if (!se) | 7986 | if (!se) |
| 7796 | return; | 7987 | return; |
| @@ -7813,12 +8004,14 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
| 7813 | { | 8004 | { |
| 7814 | struct rq *rq = cpu_rq(cpu); | 8005 | struct rq *rq = cpu_rq(cpu); |
| 7815 | 8006 | ||
| 7816 | tg->rt_rq[cpu] = rt_rq; | 8007 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
| 7817 | init_rt_rq(rt_rq, rq); | 8008 | rt_rq->rt_nr_boosted = 0; |
| 8009 | rt_rq->rq = rq; | ||
| 7818 | rt_rq->tg = tg; | 8010 | rt_rq->tg = tg; |
| 7819 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
| 7820 | 8011 | ||
| 8012 | tg->rt_rq[cpu] = rt_rq; | ||
| 7821 | tg->rt_se[cpu] = rt_se; | 8013 | tg->rt_se[cpu] = rt_se; |
| 8014 | |||
| 7822 | if (!rt_se) | 8015 | if (!rt_se) |
| 7823 | return; | 8016 | return; |
| 7824 | 8017 | ||
| @@ -7900,7 +8093,7 @@ void __init sched_init(void) | |||
| 7900 | rq->nr_running = 0; | 8093 | rq->nr_running = 0; |
| 7901 | rq->calc_load_active = 0; | 8094 | rq->calc_load_active = 0; |
| 7902 | rq->calc_load_update = jiffies + LOAD_FREQ; | 8095 | rq->calc_load_update = jiffies + LOAD_FREQ; |
| 7903 | init_cfs_rq(&rq->cfs, rq); | 8096 | init_cfs_rq(&rq->cfs); |
| 7904 | init_rt_rq(&rq->rt, rq); | 8097 | init_rt_rq(&rq->rt, rq); |
| 7905 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8098 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7906 | root_task_group.shares = root_task_group_load; | 8099 | root_task_group.shares = root_task_group_load; |
| @@ -7971,7 +8164,7 @@ void __init sched_init(void) | |||
| 7971 | #endif | 8164 | #endif |
| 7972 | 8165 | ||
| 7973 | #ifdef CONFIG_RT_MUTEXES | 8166 | #ifdef CONFIG_RT_MUTEXES |
| 7974 | plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock); | 8167 | plist_head_init(&init_task.pi_waiters); |
| 7975 | #endif | 8168 | #endif |
| 7976 | 8169 | ||
| 7977 | /* | 8170 | /* |
| @@ -8014,7 +8207,7 @@ void __init sched_init(void) | |||
| 8014 | scheduler_running = 1; | 8207 | scheduler_running = 1; |
| 8015 | } | 8208 | } |
| 8016 | 8209 | ||
| 8017 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 8210 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
| 8018 | static inline int preempt_count_equals(int preempt_offset) | 8211 | static inline int preempt_count_equals(int preempt_offset) |
| 8019 | { | 8212 | { |
| 8020 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); | 8213 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
| @@ -8024,7 +8217,6 @@ static inline int preempt_count_equals(int preempt_offset) | |||
| 8024 | 8217 | ||
| 8025 | void __might_sleep(const char *file, int line, int preempt_offset) | 8218 | void __might_sleep(const char *file, int line, int preempt_offset) |
| 8026 | { | 8219 | { |
| 8027 | #ifdef in_atomic | ||
| 8028 | static unsigned long prev_jiffy; /* ratelimiting */ | 8220 | static unsigned long prev_jiffy; /* ratelimiting */ |
| 8029 | 8221 | ||
| 8030 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8222 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || |
| @@ -8046,7 +8238,6 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 8046 | if (irqs_disabled()) | 8238 | if (irqs_disabled()) |
| 8047 | print_irqtrace_events(current); | 8239 | print_irqtrace_events(current); |
| 8048 | dump_stack(); | 8240 | dump_stack(); |
| 8049 | #endif | ||
| 8050 | } | 8241 | } |
| 8051 | EXPORT_SYMBOL(__might_sleep); | 8242 | EXPORT_SYMBOL(__might_sleep); |
| 8052 | #endif | 8243 | #endif |
| @@ -8205,6 +8396,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8205 | if (!se) | 8396 | if (!se) |
| 8206 | goto err_free_rq; | 8397 | goto err_free_rq; |
| 8207 | 8398 | ||
| 8399 | init_cfs_rq(cfs_rq); | ||
| 8208 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | 8400 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
| 8209 | } | 8401 | } |
| 8210 | 8402 | ||
| @@ -8232,7 +8424,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | |||
| 8232 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | 8424 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); |
| 8233 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8425 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
| 8234 | } | 8426 | } |
| 8235 | #else /* !CONFG_FAIR_GROUP_SCHED */ | 8427 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
| 8236 | static inline void free_fair_sched_group(struct task_group *tg) | 8428 | static inline void free_fair_sched_group(struct task_group *tg) |
| 8237 | { | 8429 | { |
| 8238 | } | 8430 | } |
| @@ -8253,7 +8445,8 @@ static void free_rt_sched_group(struct task_group *tg) | |||
| 8253 | { | 8445 | { |
| 8254 | int i; | 8446 | int i; |
| 8255 | 8447 | ||
| 8256 | destroy_rt_bandwidth(&tg->rt_bandwidth); | 8448 | if (tg->rt_se) |
| 8449 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
| 8257 | 8450 | ||
| 8258 | for_each_possible_cpu(i) { | 8451 | for_each_possible_cpu(i) { |
| 8259 | if (tg->rt_rq) | 8452 | if (tg->rt_rq) |
| @@ -8294,6 +8487,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8294 | if (!rt_se) | 8487 | if (!rt_se) |
| 8295 | goto err_free_rq; | 8488 | goto err_free_rq; |
| 8296 | 8489 | ||
| 8490 | init_rt_rq(rt_rq, cpu_rq(i)); | ||
| 8491 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
| 8297 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); | 8492 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); |
| 8298 | } | 8493 | } |
| 8299 | 8494 | ||
| @@ -8435,10 +8630,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 8435 | if (!tg->se[0]) | 8630 | if (!tg->se[0]) |
| 8436 | return -EINVAL; | 8631 | return -EINVAL; |
| 8437 | 8632 | ||
| 8438 | if (shares < MIN_SHARES) | 8633 | shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); |
| 8439 | shares = MIN_SHARES; | ||
| 8440 | else if (shares > MAX_SHARES) | ||
| 8441 | shares = MAX_SHARES; | ||
| 8442 | 8634 | ||
| 8443 | mutex_lock(&shares_mutex); | 8635 | mutex_lock(&shares_mutex); |
| 8444 | if (tg->shares == shares) | 8636 | if (tg->shares == shares) |
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 05577055cfca..c2f0e7248dca 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
| @@ -13,6 +13,7 @@ struct autogroup { | |||
| 13 | int nice; | 13 | int nice; |
| 14 | }; | 14 | }; |
| 15 | 15 | ||
| 16 | static inline bool task_group_is_autogroup(struct task_group *tg); | ||
| 16 | static inline struct task_group * | 17 | static inline struct task_group * |
| 17 | autogroup_task_group(struct task_struct *p, struct task_group *tg); | 18 | autogroup_task_group(struct task_struct *p, struct task_group *tg); |
| 18 | 19 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 433491c2dc8f..bc8ee9993814 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -135,14 +135,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | |||
| 135 | return grp->my_q; | 135 | return grp->my_q; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on | ||
| 139 | * another cpu ('this_cpu') | ||
| 140 | */ | ||
| 141 | static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | ||
| 142 | { | ||
| 143 | return cfs_rq->tg->cfs_rq[this_cpu]; | ||
| 144 | } | ||
| 145 | |||
| 146 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | 138 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) |
| 147 | { | 139 | { |
| 148 | if (!cfs_rq->on_list) { | 140 | if (!cfs_rq->on_list) { |
| @@ -271,11 +263,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | |||
| 271 | return NULL; | 263 | return NULL; |
| 272 | } | 264 | } |
| 273 | 265 | ||
| 274 | static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | ||
| 275 | { | ||
| 276 | return &cpu_rq(this_cpu)->cfs; | ||
| 277 | } | ||
| 278 | |||
| 279 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | 266 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) |
| 280 | { | 267 | { |
| 281 | } | 268 | } |
| @@ -334,11 +321,6 @@ static inline int entity_before(struct sched_entity *a, | |||
| 334 | return (s64)(a->vruntime - b->vruntime) < 0; | 321 | return (s64)(a->vruntime - b->vruntime) < 0; |
| 335 | } | 322 | } |
| 336 | 323 | ||
| 337 | static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
| 338 | { | ||
| 339 | return se->vruntime - cfs_rq->min_vruntime; | ||
| 340 | } | ||
| 341 | |||
| 342 | static void update_min_vruntime(struct cfs_rq *cfs_rq) | 324 | static void update_min_vruntime(struct cfs_rq *cfs_rq) |
| 343 | { | 325 | { |
| 344 | u64 vruntime = cfs_rq->min_vruntime; | 326 | u64 vruntime = cfs_rq->min_vruntime; |
| @@ -372,7 +354,6 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 372 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | 354 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; |
| 373 | struct rb_node *parent = NULL; | 355 | struct rb_node *parent = NULL; |
| 374 | struct sched_entity *entry; | 356 | struct sched_entity *entry; |
| 375 | s64 key = entity_key(cfs_rq, se); | ||
| 376 | int leftmost = 1; | 357 | int leftmost = 1; |
| 377 | 358 | ||
| 378 | /* | 359 | /* |
| @@ -385,7 +366,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 385 | * We dont care about collisions. Nodes with | 366 | * We dont care about collisions. Nodes with |
| 386 | * the same key stay together. | 367 | * the same key stay together. |
| 387 | */ | 368 | */ |
| 388 | if (key < entity_key(cfs_rq, entry)) { | 369 | if (entity_before(se, entry)) { |
| 389 | link = &parent->rb_left; | 370 | link = &parent->rb_left; |
| 390 | } else { | 371 | } else { |
| 391 | link = &parent->rb_right; | 372 | link = &parent->rb_right; |
| @@ -1336,7 +1317,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
| 1336 | } | 1317 | } |
| 1337 | 1318 | ||
| 1338 | for_each_sched_entity(se) { | 1319 | for_each_sched_entity(se) { |
| 1339 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1320 | cfs_rq = cfs_rq_of(se); |
| 1340 | 1321 | ||
| 1341 | update_cfs_load(cfs_rq, 0); | 1322 | update_cfs_load(cfs_rq, 0); |
| 1342 | update_cfs_shares(cfs_rq); | 1323 | update_cfs_shares(cfs_rq); |
| @@ -1370,13 +1351,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
| 1370 | */ | 1351 | */ |
| 1371 | if (task_sleep && parent_entity(se)) | 1352 | if (task_sleep && parent_entity(se)) |
| 1372 | set_next_buddy(parent_entity(se)); | 1353 | set_next_buddy(parent_entity(se)); |
| 1354 | |||
| 1355 | /* avoid re-evaluating load for this entity */ | ||
| 1356 | se = parent_entity(se); | ||
| 1373 | break; | 1357 | break; |
| 1374 | } | 1358 | } |
| 1375 | flags |= DEQUEUE_SLEEP; | 1359 | flags |= DEQUEUE_SLEEP; |
| 1376 | } | 1360 | } |
| 1377 | 1361 | ||
| 1378 | for_each_sched_entity(se) { | 1362 | for_each_sched_entity(se) { |
| 1379 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1363 | cfs_rq = cfs_rq_of(se); |
| 1380 | 1364 | ||
| 1381 | update_cfs_load(cfs_rq, 0); | 1365 | update_cfs_load(cfs_rq, 0); |
| 1382 | update_cfs_shares(cfs_rq); | 1366 | update_cfs_shares(cfs_rq); |
| @@ -1481,7 +1465,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1481 | * effect of the currently running task from the load | 1465 | * effect of the currently running task from the load |
| 1482 | * of the current CPU: | 1466 | * of the current CPU: |
| 1483 | */ | 1467 | */ |
| 1484 | rcu_read_lock(); | ||
| 1485 | if (sync) { | 1468 | if (sync) { |
| 1486 | tg = task_group(current); | 1469 | tg = task_group(current); |
| 1487 | weight = current->se.load.weight; | 1470 | weight = current->se.load.weight; |
| @@ -1517,7 +1500,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1517 | balanced = this_eff_load <= prev_eff_load; | 1500 | balanced = this_eff_load <= prev_eff_load; |
| 1518 | } else | 1501 | } else |
| 1519 | balanced = true; | 1502 | balanced = true; |
| 1520 | rcu_read_unlock(); | ||
| 1521 | 1503 | ||
| 1522 | /* | 1504 | /* |
| 1523 | * If the currently running task will sleep within | 1505 | * If the currently running task will sleep within |
| @@ -1585,7 +1567,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
| 1585 | } | 1567 | } |
| 1586 | 1568 | ||
| 1587 | /* Adjust by relative CPU power of the group */ | 1569 | /* Adjust by relative CPU power of the group */ |
| 1588 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power; | 1570 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power; |
| 1589 | 1571 | ||
| 1590 | if (local_group) { | 1572 | if (local_group) { |
| 1591 | this_load = avg_load; | 1573 | this_load = avg_load; |
| @@ -1921,8 +1903,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1921 | if (!sched_feat(WAKEUP_PREEMPT)) | 1903 | if (!sched_feat(WAKEUP_PREEMPT)) |
| 1922 | return; | 1904 | return; |
| 1923 | 1905 | ||
| 1924 | update_curr(cfs_rq); | ||
| 1925 | find_matching_se(&se, &pse); | 1906 | find_matching_se(&se, &pse); |
| 1907 | update_curr(cfs_rq_of(se)); | ||
| 1926 | BUG_ON(!pse); | 1908 | BUG_ON(!pse); |
| 1927 | if (wakeup_preempt_entity(se, pse) == 1) { | 1909 | if (wakeup_preempt_entity(se, pse) == 1) { |
| 1928 | /* | 1910 | /* |
| @@ -2231,11 +2213,43 @@ static void update_shares(int cpu) | |||
| 2231 | struct rq *rq = cpu_rq(cpu); | 2213 | struct rq *rq = cpu_rq(cpu); |
| 2232 | 2214 | ||
| 2233 | rcu_read_lock(); | 2215 | rcu_read_lock(); |
| 2216 | /* | ||
| 2217 | * Iterates the task_group tree in a bottom up fashion, see | ||
| 2218 | * list_add_leaf_cfs_rq() for details. | ||
| 2219 | */ | ||
| 2234 | for_each_leaf_cfs_rq(rq, cfs_rq) | 2220 | for_each_leaf_cfs_rq(rq, cfs_rq) |
| 2235 | update_shares_cpu(cfs_rq->tg, cpu); | 2221 | update_shares_cpu(cfs_rq->tg, cpu); |
| 2236 | rcu_read_unlock(); | 2222 | rcu_read_unlock(); |
| 2237 | } | 2223 | } |
| 2238 | 2224 | ||
| 2225 | /* | ||
| 2226 | * Compute the cpu's hierarchical load factor for each task group. | ||
| 2227 | * This needs to be done in a top-down fashion because the load of a child | ||
| 2228 | * group is a fraction of its parents load. | ||
| 2229 | */ | ||
| 2230 | static int tg_load_down(struct task_group *tg, void *data) | ||
| 2231 | { | ||
| 2232 | unsigned long load; | ||
| 2233 | long cpu = (long)data; | ||
| 2234 | |||
| 2235 | if (!tg->parent) { | ||
| 2236 | load = cpu_rq(cpu)->load.weight; | ||
| 2237 | } else { | ||
| 2238 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
| 2239 | load *= tg->se[cpu]->load.weight; | ||
| 2240 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | tg->cfs_rq[cpu]->h_load = load; | ||
| 2244 | |||
| 2245 | return 0; | ||
| 2246 | } | ||
| 2247 | |||
| 2248 | static void update_h_load(long cpu) | ||
| 2249 | { | ||
| 2250 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | ||
| 2251 | } | ||
| 2252 | |||
| 2239 | static unsigned long | 2253 | static unsigned long |
| 2240 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2254 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
| 2241 | unsigned long max_load_move, | 2255 | unsigned long max_load_move, |
| @@ -2243,14 +2257,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 2243 | int *all_pinned) | 2257 | int *all_pinned) |
| 2244 | { | 2258 | { |
| 2245 | long rem_load_move = max_load_move; | 2259 | long rem_load_move = max_load_move; |
| 2246 | int busiest_cpu = cpu_of(busiest); | 2260 | struct cfs_rq *busiest_cfs_rq; |
| 2247 | struct task_group *tg; | ||
| 2248 | 2261 | ||
| 2249 | rcu_read_lock(); | 2262 | rcu_read_lock(); |
| 2250 | update_h_load(busiest_cpu); | 2263 | update_h_load(cpu_of(busiest)); |
| 2251 | 2264 | ||
| 2252 | list_for_each_entry_rcu(tg, &task_groups, list) { | 2265 | for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) { |
| 2253 | struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu]; | ||
| 2254 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; | 2266 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; |
| 2255 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; | 2267 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; |
| 2256 | u64 rem_load, moved_load; | 2268 | u64 rem_load, moved_load; |
| @@ -2631,7 +2643,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
| 2631 | power >>= SCHED_POWER_SHIFT; | 2643 | power >>= SCHED_POWER_SHIFT; |
| 2632 | } | 2644 | } |
| 2633 | 2645 | ||
| 2634 | sdg->cpu_power_orig = power; | 2646 | sdg->sgp->power_orig = power; |
| 2635 | 2647 | ||
| 2636 | if (sched_feat(ARCH_POWER)) | 2648 | if (sched_feat(ARCH_POWER)) |
| 2637 | power *= arch_scale_freq_power(sd, cpu); | 2649 | power *= arch_scale_freq_power(sd, cpu); |
| @@ -2647,7 +2659,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
| 2647 | power = 1; | 2659 | power = 1; |
| 2648 | 2660 | ||
| 2649 | cpu_rq(cpu)->cpu_power = power; | 2661 | cpu_rq(cpu)->cpu_power = power; |
| 2650 | sdg->cpu_power = power; | 2662 | sdg->sgp->power = power; |
| 2651 | } | 2663 | } |
| 2652 | 2664 | ||
| 2653 | static void update_group_power(struct sched_domain *sd, int cpu) | 2665 | static void update_group_power(struct sched_domain *sd, int cpu) |
| @@ -2665,11 +2677,11 @@ static void update_group_power(struct sched_domain *sd, int cpu) | |||
| 2665 | 2677 | ||
| 2666 | group = child->groups; | 2678 | group = child->groups; |
| 2667 | do { | 2679 | do { |
| 2668 | power += group->cpu_power; | 2680 | power += group->sgp->power; |
| 2669 | group = group->next; | 2681 | group = group->next; |
| 2670 | } while (group != child->groups); | 2682 | } while (group != child->groups); |
| 2671 | 2683 | ||
| 2672 | sdg->cpu_power = power; | 2684 | sdg->sgp->power = power; |
| 2673 | } | 2685 | } |
| 2674 | 2686 | ||
| 2675 | /* | 2687 | /* |
| @@ -2691,7 +2703,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
| 2691 | /* | 2703 | /* |
| 2692 | * If ~90% of the cpu_power is still there, we're good. | 2704 | * If ~90% of the cpu_power is still there, we're good. |
| 2693 | */ | 2705 | */ |
| 2694 | if (group->cpu_power * 32 > group->cpu_power_orig * 29) | 2706 | if (group->sgp->power * 32 > group->sgp->power_orig * 29) |
| 2695 | return 1; | 2707 | return 1; |
| 2696 | 2708 | ||
| 2697 | return 0; | 2709 | return 0; |
| @@ -2771,7 +2783,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2771 | } | 2783 | } |
| 2772 | 2784 | ||
| 2773 | /* Adjust by relative CPU power of the group */ | 2785 | /* Adjust by relative CPU power of the group */ |
| 2774 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power; | 2786 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power; |
| 2775 | 2787 | ||
| 2776 | /* | 2788 | /* |
| 2777 | * Consider the group unbalanced when the imbalance is larger | 2789 | * Consider the group unbalanced when the imbalance is larger |
| @@ -2788,7 +2800,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2788 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) | 2800 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) |
| 2789 | sgs->group_imb = 1; | 2801 | sgs->group_imb = 1; |
| 2790 | 2802 | ||
| 2791 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, | 2803 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power, |
| 2792 | SCHED_POWER_SCALE); | 2804 | SCHED_POWER_SCALE); |
| 2793 | if (!sgs->group_capacity) | 2805 | if (!sgs->group_capacity) |
| 2794 | sgs->group_capacity = fix_small_capacity(sd, group); | 2806 | sgs->group_capacity = fix_small_capacity(sd, group); |
| @@ -2877,7 +2889,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2877 | return; | 2889 | return; |
| 2878 | 2890 | ||
| 2879 | sds->total_load += sgs.group_load; | 2891 | sds->total_load += sgs.group_load; |
| 2880 | sds->total_pwr += sg->cpu_power; | 2892 | sds->total_pwr += sg->sgp->power; |
| 2881 | 2893 | ||
| 2882 | /* | 2894 | /* |
| 2883 | * In case the child domain prefers tasks go to siblings | 2895 | * In case the child domain prefers tasks go to siblings |
| @@ -2962,7 +2974,7 @@ static int check_asym_packing(struct sched_domain *sd, | |||
| 2962 | if (this_cpu > busiest_cpu) | 2974 | if (this_cpu > busiest_cpu) |
| 2963 | return 0; | 2975 | return 0; |
| 2964 | 2976 | ||
| 2965 | *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, | 2977 | *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power, |
| 2966 | SCHED_POWER_SCALE); | 2978 | SCHED_POWER_SCALE); |
| 2967 | return 1; | 2979 | return 1; |
| 2968 | } | 2980 | } |
| @@ -2993,7 +3005,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
| 2993 | 3005 | ||
| 2994 | scaled_busy_load_per_task = sds->busiest_load_per_task | 3006 | scaled_busy_load_per_task = sds->busiest_load_per_task |
| 2995 | * SCHED_POWER_SCALE; | 3007 | * SCHED_POWER_SCALE; |
| 2996 | scaled_busy_load_per_task /= sds->busiest->cpu_power; | 3008 | scaled_busy_load_per_task /= sds->busiest->sgp->power; |
| 2997 | 3009 | ||
| 2998 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= | 3010 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= |
| 2999 | (scaled_busy_load_per_task * imbn)) { | 3011 | (scaled_busy_load_per_task * imbn)) { |
| @@ -3007,28 +3019,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
| 3007 | * moving them. | 3019 | * moving them. |
| 3008 | */ | 3020 | */ |
| 3009 | 3021 | ||
| 3010 | pwr_now += sds->busiest->cpu_power * | 3022 | pwr_now += sds->busiest->sgp->power * |
| 3011 | min(sds->busiest_load_per_task, sds->max_load); | 3023 | min(sds->busiest_load_per_task, sds->max_load); |
| 3012 | pwr_now += sds->this->cpu_power * | 3024 | pwr_now += sds->this->sgp->power * |
| 3013 | min(sds->this_load_per_task, sds->this_load); | 3025 | min(sds->this_load_per_task, sds->this_load); |
| 3014 | pwr_now /= SCHED_POWER_SCALE; | 3026 | pwr_now /= SCHED_POWER_SCALE; |
| 3015 | 3027 | ||
| 3016 | /* Amount of load we'd subtract */ | 3028 | /* Amount of load we'd subtract */ |
| 3017 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / | 3029 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / |
| 3018 | sds->busiest->cpu_power; | 3030 | sds->busiest->sgp->power; |
| 3019 | if (sds->max_load > tmp) | 3031 | if (sds->max_load > tmp) |
| 3020 | pwr_move += sds->busiest->cpu_power * | 3032 | pwr_move += sds->busiest->sgp->power * |
| 3021 | min(sds->busiest_load_per_task, sds->max_load - tmp); | 3033 | min(sds->busiest_load_per_task, sds->max_load - tmp); |
| 3022 | 3034 | ||
| 3023 | /* Amount of load we'd add */ | 3035 | /* Amount of load we'd add */ |
| 3024 | if (sds->max_load * sds->busiest->cpu_power < | 3036 | if (sds->max_load * sds->busiest->sgp->power < |
| 3025 | sds->busiest_load_per_task * SCHED_POWER_SCALE) | 3037 | sds->busiest_load_per_task * SCHED_POWER_SCALE) |
| 3026 | tmp = (sds->max_load * sds->busiest->cpu_power) / | 3038 | tmp = (sds->max_load * sds->busiest->sgp->power) / |
| 3027 | sds->this->cpu_power; | 3039 | sds->this->sgp->power; |
| 3028 | else | 3040 | else |
| 3029 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / | 3041 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / |
| 3030 | sds->this->cpu_power; | 3042 | sds->this->sgp->power; |
| 3031 | pwr_move += sds->this->cpu_power * | 3043 | pwr_move += sds->this->sgp->power * |
| 3032 | min(sds->this_load_per_task, sds->this_load + tmp); | 3044 | min(sds->this_load_per_task, sds->this_load + tmp); |
| 3033 | pwr_move /= SCHED_POWER_SCALE; | 3045 | pwr_move /= SCHED_POWER_SCALE; |
| 3034 | 3046 | ||
| @@ -3074,7 +3086,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 3074 | 3086 | ||
| 3075 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); | 3087 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); |
| 3076 | 3088 | ||
| 3077 | load_above_capacity /= sds->busiest->cpu_power; | 3089 | load_above_capacity /= sds->busiest->sgp->power; |
| 3078 | } | 3090 | } |
| 3079 | 3091 | ||
| 3080 | /* | 3092 | /* |
| @@ -3090,8 +3102,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 3090 | max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); | 3102 | max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); |
| 3091 | 3103 | ||
| 3092 | /* How much load to actually move to equalise the imbalance */ | 3104 | /* How much load to actually move to equalise the imbalance */ |
| 3093 | *imbalance = min(max_pull * sds->busiest->cpu_power, | 3105 | *imbalance = min(max_pull * sds->busiest->sgp->power, |
| 3094 | (sds->avg_load - sds->this_load) * sds->this->cpu_power) | 3106 | (sds->avg_load - sds->this_load) * sds->this->sgp->power) |
| 3095 | / SCHED_POWER_SCALE; | 3107 | / SCHED_POWER_SCALE; |
| 3096 | 3108 | ||
| 3097 | /* | 3109 | /* |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index be40f7371ee1..2e74677cb040 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -61,12 +61,14 @@ SCHED_FEAT(LB_BIAS, 1) | |||
| 61 | SCHED_FEAT(OWNER_SPIN, 1) | 61 | SCHED_FEAT(OWNER_SPIN, 1) |
| 62 | 62 | ||
| 63 | /* | 63 | /* |
| 64 | * Decrement CPU power based on irq activity | 64 | * Decrement CPU power based on time not spent running tasks |
| 65 | */ | 65 | */ |
| 66 | SCHED_FEAT(NONIRQ_POWER, 1) | 66 | SCHED_FEAT(NONTASK_POWER, 1) |
| 67 | 67 | ||
| 68 | /* | 68 | /* |
| 69 | * Queue remote wakeups on the target CPU and process them | 69 | * Queue remote wakeups on the target CPU and process them |
| 70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | 70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. |
| 71 | */ | 71 | */ |
| 72 | SCHED_FEAT(TTWU_QUEUE, 1) | 72 | SCHED_FEAT(TTWU_QUEUE, 1) |
| 73 | |||
| 74 | SCHED_FEAT(FORCE_SD_OVERLAP, 0) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 88725c939e0b..97540f0c9e47 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -185,11 +185,23 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) | |||
| 185 | 185 | ||
| 186 | typedef struct task_group *rt_rq_iter_t; | 186 | typedef struct task_group *rt_rq_iter_t; |
| 187 | 187 | ||
| 188 | #define for_each_rt_rq(rt_rq, iter, rq) \ | 188 | static inline struct task_group *next_task_group(struct task_group *tg) |
| 189 | for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \ | 189 | { |
| 190 | (&iter->list != &task_groups) && \ | 190 | do { |
| 191 | (rt_rq = iter->rt_rq[cpu_of(rq)]); \ | 191 | tg = list_entry_rcu(tg->list.next, |
| 192 | iter = list_entry_rcu(iter->list.next, typeof(*iter), list)) | 192 | typeof(struct task_group), list); |
| 193 | } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); | ||
| 194 | |||
| 195 | if (&tg->list == &task_groups) | ||
| 196 | tg = NULL; | ||
| 197 | |||
| 198 | return tg; | ||
| 199 | } | ||
| 200 | |||
| 201 | #define for_each_rt_rq(rt_rq, iter, rq) \ | ||
| 202 | for (iter = container_of(&task_groups, typeof(*iter), list); \ | ||
| 203 | (iter = next_task_group(iter)) && \ | ||
| 204 | (rt_rq = iter->rt_rq[cpu_of(rq)]);) | ||
| 193 | 205 | ||
| 194 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | 206 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) |
| 195 | { | 207 | { |
| @@ -1096,7 +1108,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag | |||
| 1096 | * to move current somewhere else, making room for our non-migratable | 1108 | * to move current somewhere else, making room for our non-migratable |
| 1097 | * task. | 1109 | * task. |
| 1098 | */ | 1110 | */ |
| 1099 | if (p->prio == rq->curr->prio && !need_resched()) | 1111 | if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) |
| 1100 | check_preempt_equal_prio(rq, p); | 1112 | check_preempt_equal_prio(rq, p); |
| 1101 | #endif | 1113 | #endif |
| 1102 | } | 1114 | } |
| @@ -1126,7 +1138,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
| 1126 | 1138 | ||
| 1127 | rt_rq = &rq->rt; | 1139 | rt_rq = &rq->rt; |
| 1128 | 1140 | ||
| 1129 | if (unlikely(!rt_rq->rt_nr_running)) | 1141 | if (!rt_rq->rt_nr_running) |
| 1130 | return NULL; | 1142 | return NULL; |
| 1131 | 1143 | ||
| 1132 | if (rt_rq_throttled(rt_rq)) | 1144 | if (rt_rq_throttled(rt_rq)) |
| @@ -1239,6 +1251,10 @@ static int find_lowest_rq(struct task_struct *task) | |||
| 1239 | int this_cpu = smp_processor_id(); | 1251 | int this_cpu = smp_processor_id(); |
| 1240 | int cpu = task_cpu(task); | 1252 | int cpu = task_cpu(task); |
| 1241 | 1253 | ||
| 1254 | /* Make sure the mask is initialized first */ | ||
| 1255 | if (unlikely(!lowest_mask)) | ||
| 1256 | return -1; | ||
| 1257 | |||
| 1242 | if (task->rt.nr_cpus_allowed == 1) | 1258 | if (task->rt.nr_cpus_allowed == 1) |
| 1243 | return -1; /* No other targets possible */ | 1259 | return -1; /* No other targets possible */ |
| 1244 | 1260 | ||
| @@ -1544,7 +1560,7 @@ skip: | |||
| 1544 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) | 1560 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) |
| 1545 | { | 1561 | { |
| 1546 | /* Try to pull RT tasks here if we lower this rq's prio */ | 1562 | /* Try to pull RT tasks here if we lower this rq's prio */ |
| 1547 | if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) | 1563 | if (rq->rt.highest_prio.curr > prev->prio) |
| 1548 | pull_rt_task(rq); | 1564 | pull_rt_task(rq); |
| 1549 | } | 1565 | } |
| 1550 | 1566 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 86c32b884f8e..291c9700be75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -87,7 +87,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) | |||
| 87 | /* | 87 | /* |
| 88 | * Tracers may want to know about even ignored signals. | 88 | * Tracers may want to know about even ignored signals. |
| 89 | */ | 89 | */ |
| 90 | return !tracehook_consider_ignored_signal(t, sig); | 90 | return !t->ptrace; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | /* | 93 | /* |
| @@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) | |||
| 124 | 124 | ||
| 125 | static int recalc_sigpending_tsk(struct task_struct *t) | 125 | static int recalc_sigpending_tsk(struct task_struct *t) |
| 126 | { | 126 | { |
| 127 | if ((t->group_stop & GROUP_STOP_PENDING) || | 127 | if ((t->jobctl & JOBCTL_PENDING_MASK) || |
| 128 | PENDING(&t->pending, &t->blocked) || | 128 | PENDING(&t->pending, &t->blocked) || |
| 129 | PENDING(&t->signal->shared_pending, &t->blocked)) { | 129 | PENDING(&t->signal->shared_pending, &t->blocked)) { |
| 130 | set_tsk_thread_flag(t, TIF_SIGPENDING); | 130 | set_tsk_thread_flag(t, TIF_SIGPENDING); |
| @@ -150,9 +150,7 @@ void recalc_sigpending_and_wake(struct task_struct *t) | |||
| 150 | 150 | ||
| 151 | void recalc_sigpending(void) | 151 | void recalc_sigpending(void) |
| 152 | { | 152 | { |
| 153 | if (unlikely(tracehook_force_sigpending())) | 153 | if (!recalc_sigpending_tsk(current) && !freezing(current)) |
| 154 | set_thread_flag(TIF_SIGPENDING); | ||
| 155 | else if (!recalc_sigpending_tsk(current) && !freezing(current)) | ||
| 156 | clear_thread_flag(TIF_SIGPENDING); | 154 | clear_thread_flag(TIF_SIGPENDING); |
| 157 | 155 | ||
| 158 | } | 156 | } |
| @@ -224,47 +222,93 @@ static inline void print_dropped_signal(int sig) | |||
| 224 | } | 222 | } |
| 225 | 223 | ||
| 226 | /** | 224 | /** |
| 227 | * task_clear_group_stop_trapping - clear group stop trapping bit | 225 | * task_set_jobctl_pending - set jobctl pending bits |
| 228 | * @task: target task | 226 | * @task: target task |
| 227 | * @mask: pending bits to set | ||
| 229 | * | 228 | * |
| 230 | * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us. Clear it | 229 | * Clear @mask from @task->jobctl. @mask must be subset of |
| 231 | * and wake up the ptracer. Note that we don't need any further locking. | 230 | * %JOBCTL_PENDING_MASK | %JOBCTL_STOP_CONSUME | %JOBCTL_STOP_SIGMASK | |
| 232 | * @task->siglock guarantees that @task->parent points to the ptracer. | 231 | * %JOBCTL_TRAPPING. If stop signo is being set, the existing signo is |
| 232 | * cleared. If @task is already being killed or exiting, this function | ||
| 233 | * becomes noop. | ||
| 233 | * | 234 | * |
| 234 | * CONTEXT: | 235 | * CONTEXT: |
| 235 | * Must be called with @task->sighand->siglock held. | 236 | * Must be called with @task->sighand->siglock held. |
| 237 | * | ||
| 238 | * RETURNS: | ||
| 239 | * %true if @mask is set, %false if made noop because @task was dying. | ||
| 236 | */ | 240 | */ |
| 237 | static void task_clear_group_stop_trapping(struct task_struct *task) | 241 | bool task_set_jobctl_pending(struct task_struct *task, unsigned int mask) |
| 238 | { | 242 | { |
| 239 | if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) { | 243 | BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME | |
| 240 | task->group_stop &= ~GROUP_STOP_TRAPPING; | 244 | JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); |
| 241 | __wake_up_sync_key(&task->parent->signal->wait_chldexit, | 245 | BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK)); |
| 242 | TASK_UNINTERRUPTIBLE, 1, task); | 246 | |
| 247 | if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING))) | ||
| 248 | return false; | ||
| 249 | |||
| 250 | if (mask & JOBCTL_STOP_SIGMASK) | ||
| 251 | task->jobctl &= ~JOBCTL_STOP_SIGMASK; | ||
| 252 | |||
| 253 | task->jobctl |= mask; | ||
| 254 | return true; | ||
| 255 | } | ||
| 256 | |||
| 257 | /** | ||
| 258 | * task_clear_jobctl_trapping - clear jobctl trapping bit | ||
| 259 | * @task: target task | ||
| 260 | * | ||
| 261 | * If JOBCTL_TRAPPING is set, a ptracer is waiting for us to enter TRACED. | ||
| 262 | * Clear it and wake up the ptracer. Note that we don't need any further | ||
| 263 | * locking. @task->siglock guarantees that @task->parent points to the | ||
| 264 | * ptracer. | ||
| 265 | * | ||
| 266 | * CONTEXT: | ||
| 267 | * Must be called with @task->sighand->siglock held. | ||
| 268 | */ | ||
| 269 | void task_clear_jobctl_trapping(struct task_struct *task) | ||
| 270 | { | ||
| 271 | if (unlikely(task->jobctl & JOBCTL_TRAPPING)) { | ||
| 272 | task->jobctl &= ~JOBCTL_TRAPPING; | ||
| 273 | wake_up_bit(&task->jobctl, JOBCTL_TRAPPING_BIT); | ||
| 243 | } | 274 | } |
| 244 | } | 275 | } |
| 245 | 276 | ||
| 246 | /** | 277 | /** |
| 247 | * task_clear_group_stop_pending - clear pending group stop | 278 | * task_clear_jobctl_pending - clear jobctl pending bits |
| 248 | * @task: target task | 279 | * @task: target task |
| 280 | * @mask: pending bits to clear | ||
| 281 | * | ||
| 282 | * Clear @mask from @task->jobctl. @mask must be subset of | ||
| 283 | * %JOBCTL_PENDING_MASK. If %JOBCTL_STOP_PENDING is being cleared, other | ||
| 284 | * STOP bits are cleared together. | ||
| 249 | * | 285 | * |
| 250 | * Clear group stop states for @task. | 286 | * If clearing of @mask leaves no stop or trap pending, this function calls |
| 287 | * task_clear_jobctl_trapping(). | ||
| 251 | * | 288 | * |
| 252 | * CONTEXT: | 289 | * CONTEXT: |
| 253 | * Must be called with @task->sighand->siglock held. | 290 | * Must be called with @task->sighand->siglock held. |
| 254 | */ | 291 | */ |
| 255 | void task_clear_group_stop_pending(struct task_struct *task) | 292 | void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask) |
| 256 | { | 293 | { |
| 257 | task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME | | 294 | BUG_ON(mask & ~JOBCTL_PENDING_MASK); |
| 258 | GROUP_STOP_DEQUEUED); | 295 | |
| 296 | if (mask & JOBCTL_STOP_PENDING) | ||
| 297 | mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED; | ||
| 298 | |||
| 299 | task->jobctl &= ~mask; | ||
| 300 | |||
| 301 | if (!(task->jobctl & JOBCTL_PENDING_MASK)) | ||
| 302 | task_clear_jobctl_trapping(task); | ||
| 259 | } | 303 | } |
| 260 | 304 | ||
| 261 | /** | 305 | /** |
| 262 | * task_participate_group_stop - participate in a group stop | 306 | * task_participate_group_stop - participate in a group stop |
| 263 | * @task: task participating in a group stop | 307 | * @task: task participating in a group stop |
| 264 | * | 308 | * |
| 265 | * @task has GROUP_STOP_PENDING set and is participating in a group stop. | 309 | * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop. |
| 266 | * Group stop states are cleared and the group stop count is consumed if | 310 | * Group stop states are cleared and the group stop count is consumed if |
| 267 | * %GROUP_STOP_CONSUME was set. If the consumption completes the group | 311 | * %JOBCTL_STOP_CONSUME was set. If the consumption completes the group |
| 268 | * stop, the appropriate %SIGNAL_* flags are set. | 312 | * stop, the appropriate %SIGNAL_* flags are set. |
| 269 | * | 313 | * |
| 270 | * CONTEXT: | 314 | * CONTEXT: |
| @@ -277,11 +321,11 @@ void task_clear_group_stop_pending(struct task_struct *task) | |||
| 277 | static bool task_participate_group_stop(struct task_struct *task) | 321 | static bool task_participate_group_stop(struct task_struct *task) |
| 278 | { | 322 | { |
| 279 | struct signal_struct *sig = task->signal; | 323 | struct signal_struct *sig = task->signal; |
| 280 | bool consume = task->group_stop & GROUP_STOP_CONSUME; | 324 | bool consume = task->jobctl & JOBCTL_STOP_CONSUME; |
| 281 | 325 | ||
| 282 | WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING)); | 326 | WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING)); |
| 283 | 327 | ||
| 284 | task_clear_group_stop_pending(task); | 328 | task_clear_jobctl_pending(task, JOBCTL_STOP_PENDING); |
| 285 | 329 | ||
| 286 | if (!consume) | 330 | if (!consume) |
| 287 | return false; | 331 | return false; |
| @@ -449,7 +493,8 @@ int unhandled_signal(struct task_struct *tsk, int sig) | |||
| 449 | return 1; | 493 | return 1; |
| 450 | if (handler != SIG_IGN && handler != SIG_DFL) | 494 | if (handler != SIG_IGN && handler != SIG_DFL) |
| 451 | return 0; | 495 | return 0; |
| 452 | return !tracehook_consider_fatal_signal(tsk, sig); | 496 | /* if ptraced, let the tracer determine */ |
| 497 | return !tsk->ptrace; | ||
| 453 | } | 498 | } |
| 454 | 499 | ||
| 455 | /* | 500 | /* |
| @@ -604,7 +649,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
| 604 | * is to alert stop-signal processing code when another | 649 | * is to alert stop-signal processing code when another |
| 605 | * processor has come along and cleared the flag. | 650 | * processor has come along and cleared the flag. |
| 606 | */ | 651 | */ |
| 607 | current->group_stop |= GROUP_STOP_DEQUEUED; | 652 | current->jobctl |= JOBCTL_STOP_DEQUEUED; |
| 608 | } | 653 | } |
| 609 | if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { | 654 | if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { |
| 610 | /* | 655 | /* |
| @@ -773,6 +818,32 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
| 773 | return security_task_kill(t, info, sig, 0); | 818 | return security_task_kill(t, info, sig, 0); |
| 774 | } | 819 | } |
| 775 | 820 | ||
| 821 | /** | ||
| 822 | * ptrace_trap_notify - schedule trap to notify ptracer | ||
| 823 | * @t: tracee wanting to notify tracer | ||
| 824 | * | ||
| 825 | * This function schedules sticky ptrace trap which is cleared on the next | ||
| 826 | * TRAP_STOP to notify ptracer of an event. @t must have been seized by | ||
| 827 | * ptracer. | ||
| 828 | * | ||
| 829 | * If @t is running, STOP trap will be taken. If trapped for STOP and | ||
| 830 | * ptracer is listening for events, tracee is woken up so that it can | ||
| 831 | * re-trap for the new event. If trapped otherwise, STOP trap will be | ||
| 832 | * eventually taken without returning to userland after the existing traps | ||
| 833 | * are finished by PTRACE_CONT. | ||
| 834 | * | ||
| 835 | * CONTEXT: | ||
| 836 | * Must be called with @task->sighand->siglock held. | ||
| 837 | */ | ||
| 838 | static void ptrace_trap_notify(struct task_struct *t) | ||
| 839 | { | ||
| 840 | WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); | ||
| 841 | assert_spin_locked(&t->sighand->siglock); | ||
| 842 | |||
| 843 | task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); | ||
| 844 | signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); | ||
| 845 | } | ||
| 846 | |||
| 776 | /* | 847 | /* |
| 777 | * Handle magic process-wide effects of stop/continue signals. Unlike | 848 | * Handle magic process-wide effects of stop/continue signals. Unlike |
| 778 | * the signal actions, these happen immediately at signal-generation | 849 | * the signal actions, these happen immediately at signal-generation |
| @@ -809,9 +880,12 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | |||
| 809 | rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); | 880 | rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); |
| 810 | t = p; | 881 | t = p; |
| 811 | do { | 882 | do { |
| 812 | task_clear_group_stop_pending(t); | 883 | task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); |
| 813 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); | 884 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); |
| 814 | wake_up_state(t, __TASK_STOPPED); | 885 | if (likely(!(t->ptrace & PT_SEIZED))) |
| 886 | wake_up_state(t, __TASK_STOPPED); | ||
| 887 | else | ||
| 888 | ptrace_trap_notify(t); | ||
| 815 | } while_each_thread(p, t); | 889 | } while_each_thread(p, t); |
| 816 | 890 | ||
| 817 | /* | 891 | /* |
| @@ -908,8 +982,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) | |||
| 908 | if (sig_fatal(p, sig) && | 982 | if (sig_fatal(p, sig) && |
| 909 | !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && | 983 | !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && |
| 910 | !sigismember(&t->real_blocked, sig) && | 984 | !sigismember(&t->real_blocked, sig) && |
| 911 | (sig == SIGKILL || | 985 | (sig == SIGKILL || !t->ptrace)) { |
| 912 | !tracehook_consider_fatal_signal(t, sig))) { | ||
| 913 | /* | 986 | /* |
| 914 | * This signal will be fatal to the whole group. | 987 | * This signal will be fatal to the whole group. |
| 915 | */ | 988 | */ |
| @@ -925,7 +998,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) | |||
| 925 | signal->group_stop_count = 0; | 998 | signal->group_stop_count = 0; |
| 926 | t = p; | 999 | t = p; |
| 927 | do { | 1000 | do { |
| 928 | task_clear_group_stop_pending(t); | 1001 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); |
| 929 | sigaddset(&t->pending.signal, SIGKILL); | 1002 | sigaddset(&t->pending.signal, SIGKILL); |
| 930 | signal_wake_up(t, 1); | 1003 | signal_wake_up(t, 1); |
| 931 | } while_each_thread(p, t); | 1004 | } while_each_thread(p, t); |
| @@ -1160,7 +1233,7 @@ int zap_other_threads(struct task_struct *p) | |||
| 1160 | p->signal->group_stop_count = 0; | 1233 | p->signal->group_stop_count = 0; |
| 1161 | 1234 | ||
| 1162 | while_each_thread(p, t) { | 1235 | while_each_thread(p, t) { |
| 1163 | task_clear_group_stop_pending(t); | 1236 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); |
| 1164 | count++; | 1237 | count++; |
| 1165 | 1238 | ||
| 1166 | /* Don't bother with already dead threads */ | 1239 | /* Don't bother with already dead threads */ |
| @@ -1178,18 +1251,25 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |||
| 1178 | { | 1251 | { |
| 1179 | struct sighand_struct *sighand; | 1252 | struct sighand_struct *sighand; |
| 1180 | 1253 | ||
| 1181 | rcu_read_lock(); | ||
| 1182 | for (;;) { | 1254 | for (;;) { |
| 1255 | local_irq_save(*flags); | ||
| 1256 | rcu_read_lock(); | ||
| 1183 | sighand = rcu_dereference(tsk->sighand); | 1257 | sighand = rcu_dereference(tsk->sighand); |
| 1184 | if (unlikely(sighand == NULL)) | 1258 | if (unlikely(sighand == NULL)) { |
| 1259 | rcu_read_unlock(); | ||
| 1260 | local_irq_restore(*flags); | ||
| 1185 | break; | 1261 | break; |
| 1262 | } | ||
| 1186 | 1263 | ||
| 1187 | spin_lock_irqsave(&sighand->siglock, *flags); | 1264 | spin_lock(&sighand->siglock); |
| 1188 | if (likely(sighand == tsk->sighand)) | 1265 | if (likely(sighand == tsk->sighand)) { |
| 1266 | rcu_read_unlock(); | ||
| 1189 | break; | 1267 | break; |
| 1190 | spin_unlock_irqrestore(&sighand->siglock, *flags); | 1268 | } |
| 1269 | spin_unlock(&sighand->siglock); | ||
| 1270 | rcu_read_unlock(); | ||
| 1271 | local_irq_restore(*flags); | ||
| 1191 | } | 1272 | } |
| 1192 | rcu_read_unlock(); | ||
| 1193 | 1273 | ||
| 1194 | return sighand; | 1274 | return sighand; |
| 1195 | } | 1275 | } |
| @@ -1504,22 +1584,22 @@ ret: | |||
| 1504 | * Let a parent know about the death of a child. | 1584 | * Let a parent know about the death of a child. |
| 1505 | * For a stopped/continued status change, use do_notify_parent_cldstop instead. | 1585 | * For a stopped/continued status change, use do_notify_parent_cldstop instead. |
| 1506 | * | 1586 | * |
| 1507 | * Returns -1 if our parent ignored us and so we've switched to | 1587 | * Returns true if our parent ignored us and so we've switched to |
| 1508 | * self-reaping, or else @sig. | 1588 | * self-reaping. |
| 1509 | */ | 1589 | */ |
| 1510 | int do_notify_parent(struct task_struct *tsk, int sig) | 1590 | bool do_notify_parent(struct task_struct *tsk, int sig) |
| 1511 | { | 1591 | { |
| 1512 | struct siginfo info; | 1592 | struct siginfo info; |
| 1513 | unsigned long flags; | 1593 | unsigned long flags; |
| 1514 | struct sighand_struct *psig; | 1594 | struct sighand_struct *psig; |
| 1515 | int ret = sig; | 1595 | bool autoreap = false; |
| 1516 | 1596 | ||
| 1517 | BUG_ON(sig == -1); | 1597 | BUG_ON(sig == -1); |
| 1518 | 1598 | ||
| 1519 | /* do_notify_parent_cldstop should have been called instead. */ | 1599 | /* do_notify_parent_cldstop should have been called instead. */ |
| 1520 | BUG_ON(task_is_stopped_or_traced(tsk)); | 1600 | BUG_ON(task_is_stopped_or_traced(tsk)); |
| 1521 | 1601 | ||
| 1522 | BUG_ON(!task_ptrace(tsk) && | 1602 | BUG_ON(!tsk->ptrace && |
| 1523 | (tsk->group_leader != tsk || !thread_group_empty(tsk))); | 1603 | (tsk->group_leader != tsk || !thread_group_empty(tsk))); |
| 1524 | 1604 | ||
| 1525 | info.si_signo = sig; | 1605 | info.si_signo = sig; |
| @@ -1558,7 +1638,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
| 1558 | 1638 | ||
| 1559 | psig = tsk->parent->sighand; | 1639 | psig = tsk->parent->sighand; |
| 1560 | spin_lock_irqsave(&psig->siglock, flags); | 1640 | spin_lock_irqsave(&psig->siglock, flags); |
| 1561 | if (!task_ptrace(tsk) && sig == SIGCHLD && | 1641 | if (!tsk->ptrace && sig == SIGCHLD && |
| 1562 | (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || | 1642 | (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || |
| 1563 | (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { | 1643 | (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { |
| 1564 | /* | 1644 | /* |
| @@ -1576,16 +1656,16 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
| 1576 | * is implementation-defined: we do (if you don't want | 1656 | * is implementation-defined: we do (if you don't want |
| 1577 | * it, just use SIG_IGN instead). | 1657 | * it, just use SIG_IGN instead). |
| 1578 | */ | 1658 | */ |
| 1579 | ret = tsk->exit_signal = -1; | 1659 | autoreap = true; |
| 1580 | if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) | 1660 | if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) |
| 1581 | sig = -1; | 1661 | sig = 0; |
| 1582 | } | 1662 | } |
| 1583 | if (valid_signal(sig) && sig > 0) | 1663 | if (valid_signal(sig) && sig) |
| 1584 | __group_send_sig_info(sig, &info, tsk->parent); | 1664 | __group_send_sig_info(sig, &info, tsk->parent); |
| 1585 | __wake_up_parent(tsk, tsk->parent); | 1665 | __wake_up_parent(tsk, tsk->parent); |
| 1586 | spin_unlock_irqrestore(&psig->siglock, flags); | 1666 | spin_unlock_irqrestore(&psig->siglock, flags); |
| 1587 | 1667 | ||
| 1588 | return ret; | 1668 | return autoreap; |
| 1589 | } | 1669 | } |
| 1590 | 1670 | ||
| 1591 | /** | 1671 | /** |
| @@ -1658,7 +1738,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, | |||
| 1658 | 1738 | ||
| 1659 | static inline int may_ptrace_stop(void) | 1739 | static inline int may_ptrace_stop(void) |
| 1660 | { | 1740 | { |
| 1661 | if (!likely(task_ptrace(current))) | 1741 | if (!likely(current->ptrace)) |
| 1662 | return 0; | 1742 | return 0; |
| 1663 | /* | 1743 | /* |
| 1664 | * Are we in the middle of do_coredump? | 1744 | * Are we in the middle of do_coredump? |
| @@ -1687,15 +1767,6 @@ static int sigkill_pending(struct task_struct *tsk) | |||
| 1687 | } | 1767 | } |
| 1688 | 1768 | ||
| 1689 | /* | 1769 | /* |
| 1690 | * Test whether the target task of the usual cldstop notification - the | ||
| 1691 | * real_parent of @child - is in the same group as the ptracer. | ||
| 1692 | */ | ||
| 1693 | static bool real_parent_is_ptracer(struct task_struct *child) | ||
| 1694 | { | ||
| 1695 | return same_thread_group(child->parent, child->real_parent); | ||
| 1696 | } | ||
| 1697 | |||
| 1698 | /* | ||
| 1699 | * This must be called with current->sighand->siglock held. | 1770 | * This must be called with current->sighand->siglock held. |
| 1700 | * | 1771 | * |
| 1701 | * This should be the path for all ptrace stops. | 1772 | * This should be the path for all ptrace stops. |
| @@ -1732,31 +1803,34 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
| 1732 | } | 1803 | } |
| 1733 | 1804 | ||
| 1734 | /* | 1805 | /* |
| 1735 | * If @why is CLD_STOPPED, we're trapping to participate in a group | 1806 | * We're committing to trapping. TRACED should be visible before |
| 1736 | * stop. Do the bookkeeping. Note that if SIGCONT was delievered | 1807 | * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). |
| 1737 | * while siglock was released for the arch hook, PENDING could be | 1808 | * Also, transition to TRACED and updates to ->jobctl should be |
| 1738 | * clear now. We act as if SIGCONT is received after TASK_TRACED | 1809 | * atomic with respect to siglock and should be done after the arch |
| 1739 | * is entered - ignore it. | 1810 | * hook as siglock is released and regrabbed across it. |
| 1740 | */ | 1811 | */ |
| 1741 | if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING)) | 1812 | set_current_state(TASK_TRACED); |
| 1742 | gstop_done = task_participate_group_stop(current); | ||
| 1743 | 1813 | ||
| 1744 | current->last_siginfo = info; | 1814 | current->last_siginfo = info; |
| 1745 | current->exit_code = exit_code; | 1815 | current->exit_code = exit_code; |
| 1746 | 1816 | ||
| 1747 | /* | 1817 | /* |
| 1748 | * TRACED should be visible before TRAPPING is cleared; otherwise, | 1818 | * If @why is CLD_STOPPED, we're trapping to participate in a group |
| 1749 | * the tracer might fail do_wait(). | 1819 | * stop. Do the bookkeeping. Note that if SIGCONT was delievered |
| 1820 | * across siglock relocks since INTERRUPT was scheduled, PENDING | ||
| 1821 | * could be clear now. We act as if SIGCONT is received after | ||
| 1822 | * TASK_TRACED is entered - ignore it. | ||
| 1750 | */ | 1823 | */ |
| 1751 | set_current_state(TASK_TRACED); | 1824 | if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING)) |
| 1825 | gstop_done = task_participate_group_stop(current); | ||
| 1752 | 1826 | ||
| 1753 | /* | 1827 | /* any trap clears pending STOP trap, STOP trap clears NOTIFY */ |
| 1754 | * We're committing to trapping. Clearing GROUP_STOP_TRAPPING and | 1828 | task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP); |
| 1755 | * transition to TASK_TRACED should be atomic with respect to | 1829 | if (info && info->si_code >> 8 == PTRACE_EVENT_STOP) |
| 1756 | * siglock. This hsould be done after the arch hook as siglock is | 1830 | task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY); |
| 1757 | * released and regrabbed across it. | 1831 | |
| 1758 | */ | 1832 | /* entering a trap, clear TRAPPING */ |
| 1759 | task_clear_group_stop_trapping(current); | 1833 | task_clear_jobctl_trapping(current); |
| 1760 | 1834 | ||
| 1761 | spin_unlock_irq(¤t->sighand->siglock); | 1835 | spin_unlock_irq(¤t->sighand->siglock); |
| 1762 | read_lock(&tasklist_lock); | 1836 | read_lock(&tasklist_lock); |
| @@ -1772,7 +1846,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
| 1772 | * separately unless they're gonna be duplicates. | 1846 | * separately unless they're gonna be duplicates. |
| 1773 | */ | 1847 | */ |
| 1774 | do_notify_parent_cldstop(current, true, why); | 1848 | do_notify_parent_cldstop(current, true, why); |
| 1775 | if (gstop_done && !real_parent_is_ptracer(current)) | 1849 | if (gstop_done && ptrace_reparented(current)) |
| 1776 | do_notify_parent_cldstop(current, false, why); | 1850 | do_notify_parent_cldstop(current, false, why); |
| 1777 | 1851 | ||
| 1778 | /* | 1852 | /* |
| @@ -1792,9 +1866,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
| 1792 | * | 1866 | * |
| 1793 | * If @gstop_done, the ptracer went away between group stop | 1867 | * If @gstop_done, the ptracer went away between group stop |
| 1794 | * completion and here. During detach, it would have set | 1868 | * completion and here. During detach, it would have set |
| 1795 | * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED | 1869 | * JOBCTL_STOP_PENDING on us and we'll re-enter |
| 1796 | * in do_signal_stop() on return, so notifying the real | 1870 | * TASK_STOPPED in do_signal_stop() on return, so notifying |
| 1797 | * parent of the group stop completion is enough. | 1871 | * the real parent of the group stop completion is enough. |
| 1798 | */ | 1872 | */ |
| 1799 | if (gstop_done) | 1873 | if (gstop_done) |
| 1800 | do_notify_parent_cldstop(current, false, why); | 1874 | do_notify_parent_cldstop(current, false, why); |
| @@ -1820,6 +1894,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
| 1820 | spin_lock_irq(¤t->sighand->siglock); | 1894 | spin_lock_irq(¤t->sighand->siglock); |
| 1821 | current->last_siginfo = NULL; | 1895 | current->last_siginfo = NULL; |
| 1822 | 1896 | ||
| 1897 | /* LISTENING can be set only during STOP traps, clear it */ | ||
| 1898 | current->jobctl &= ~JOBCTL_LISTENING; | ||
| 1899 | |||
| 1823 | /* | 1900 | /* |
| 1824 | * Queued signals ignored us while we were stopped for tracing. | 1901 | * Queued signals ignored us while we were stopped for tracing. |
| 1825 | * So check for any that we should take before resuming user mode. | 1902 | * So check for any that we should take before resuming user mode. |
| @@ -1828,44 +1905,66 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
| 1828 | recalc_sigpending_tsk(current); | 1905 | recalc_sigpending_tsk(current); |
| 1829 | } | 1906 | } |
| 1830 | 1907 | ||
| 1831 | void ptrace_notify(int exit_code) | 1908 | static void ptrace_do_notify(int signr, int exit_code, int why) |
| 1832 | { | 1909 | { |
| 1833 | siginfo_t info; | 1910 | siginfo_t info; |
| 1834 | 1911 | ||
| 1835 | BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); | ||
| 1836 | |||
| 1837 | memset(&info, 0, sizeof info); | 1912 | memset(&info, 0, sizeof info); |
| 1838 | info.si_signo = SIGTRAP; | 1913 | info.si_signo = signr; |
| 1839 | info.si_code = exit_code; | 1914 | info.si_code = exit_code; |
| 1840 | info.si_pid = task_pid_vnr(current); | 1915 | info.si_pid = task_pid_vnr(current); |
| 1841 | info.si_uid = current_uid(); | 1916 | info.si_uid = current_uid(); |
| 1842 | 1917 | ||
| 1843 | /* Let the debugger run. */ | 1918 | /* Let the debugger run. */ |
| 1919 | ptrace_stop(exit_code, why, 1, &info); | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | void ptrace_notify(int exit_code) | ||
| 1923 | { | ||
| 1924 | BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); | ||
| 1925 | |||
| 1844 | spin_lock_irq(¤t->sighand->siglock); | 1926 | spin_lock_irq(¤t->sighand->siglock); |
| 1845 | ptrace_stop(exit_code, CLD_TRAPPED, 1, &info); | 1927 | ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); |
| 1846 | spin_unlock_irq(¤t->sighand->siglock); | 1928 | spin_unlock_irq(¤t->sighand->siglock); |
| 1847 | } | 1929 | } |
| 1848 | 1930 | ||
| 1849 | /* | 1931 | /** |
| 1850 | * This performs the stopping for SIGSTOP and other stop signals. | 1932 | * do_signal_stop - handle group stop for SIGSTOP and other stop signals |
| 1851 | * We have to stop all threads in the thread group. | 1933 | * @signr: signr causing group stop if initiating |
| 1852 | * Returns non-zero if we've actually stopped and released the siglock. | 1934 | * |
| 1853 | * Returns zero if we didn't stop and still hold the siglock. | 1935 | * If %JOBCTL_STOP_PENDING is not set yet, initiate group stop with @signr |
| 1936 | * and participate in it. If already set, participate in the existing | ||
| 1937 | * group stop. If participated in a group stop (and thus slept), %true is | ||
| 1938 | * returned with siglock released. | ||
| 1939 | * | ||
| 1940 | * If ptraced, this function doesn't handle stop itself. Instead, | ||
| 1941 | * %JOBCTL_TRAP_STOP is scheduled and %false is returned with siglock | ||
| 1942 | * untouched. The caller must ensure that INTERRUPT trap handling takes | ||
| 1943 | * places afterwards. | ||
| 1944 | * | ||
| 1945 | * CONTEXT: | ||
| 1946 | * Must be called with @current->sighand->siglock held, which is released | ||
| 1947 | * on %true return. | ||
| 1948 | * | ||
| 1949 | * RETURNS: | ||
| 1950 | * %false if group stop is already cancelled or ptrace trap is scheduled. | ||
| 1951 | * %true if participated in group stop. | ||
| 1854 | */ | 1952 | */ |
| 1855 | static int do_signal_stop(int signr) | 1953 | static bool do_signal_stop(int signr) |
| 1954 | __releases(¤t->sighand->siglock) | ||
| 1856 | { | 1955 | { |
| 1857 | struct signal_struct *sig = current->signal; | 1956 | struct signal_struct *sig = current->signal; |
| 1858 | 1957 | ||
| 1859 | if (!(current->group_stop & GROUP_STOP_PENDING)) { | 1958 | if (!(current->jobctl & JOBCTL_STOP_PENDING)) { |
| 1860 | unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME; | 1959 | unsigned int gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; |
| 1861 | struct task_struct *t; | 1960 | struct task_struct *t; |
| 1862 | 1961 | ||
| 1863 | /* signr will be recorded in task->group_stop for retries */ | 1962 | /* signr will be recorded in task->jobctl for retries */ |
| 1864 | WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK); | 1963 | WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK); |
| 1865 | 1964 | ||
| 1866 | if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) || | 1965 | if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) || |
| 1867 | unlikely(signal_group_exit(sig))) | 1966 | unlikely(signal_group_exit(sig))) |
| 1868 | return 0; | 1967 | return false; |
| 1869 | /* | 1968 | /* |
| 1870 | * There is no group stop already in progress. We must | 1969 | * There is no group stop already in progress. We must |
| 1871 | * initiate one now. | 1970 | * initiate one now. |
| @@ -1888,28 +1987,32 @@ static int do_signal_stop(int signr) | |||
| 1888 | if (!(sig->flags & SIGNAL_STOP_STOPPED)) | 1987 | if (!(sig->flags & SIGNAL_STOP_STOPPED)) |
| 1889 | sig->group_exit_code = signr; | 1988 | sig->group_exit_code = signr; |
| 1890 | else | 1989 | else |
| 1891 | WARN_ON_ONCE(!task_ptrace(current)); | 1990 | WARN_ON_ONCE(!current->ptrace); |
| 1991 | |||
| 1992 | sig->group_stop_count = 0; | ||
| 1993 | |||
| 1994 | if (task_set_jobctl_pending(current, signr | gstop)) | ||
| 1995 | sig->group_stop_count++; | ||
| 1892 | 1996 | ||
| 1893 | current->group_stop &= ~GROUP_STOP_SIGMASK; | ||
| 1894 | current->group_stop |= signr | gstop; | ||
| 1895 | sig->group_stop_count = 1; | ||
| 1896 | for (t = next_thread(current); t != current; | 1997 | for (t = next_thread(current); t != current; |
| 1897 | t = next_thread(t)) { | 1998 | t = next_thread(t)) { |
| 1898 | t->group_stop &= ~GROUP_STOP_SIGMASK; | ||
| 1899 | /* | 1999 | /* |
| 1900 | * Setting state to TASK_STOPPED for a group | 2000 | * Setting state to TASK_STOPPED for a group |
| 1901 | * stop is always done with the siglock held, | 2001 | * stop is always done with the siglock held, |
| 1902 | * so this check has no races. | 2002 | * so this check has no races. |
| 1903 | */ | 2003 | */ |
| 1904 | if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) { | 2004 | if (!task_is_stopped(t) && |
| 1905 | t->group_stop |= signr | gstop; | 2005 | task_set_jobctl_pending(t, signr | gstop)) { |
| 1906 | sig->group_stop_count++; | 2006 | sig->group_stop_count++; |
| 1907 | signal_wake_up(t, 0); | 2007 | if (likely(!(t->ptrace & PT_SEIZED))) |
| 2008 | signal_wake_up(t, 0); | ||
| 2009 | else | ||
| 2010 | ptrace_trap_notify(t); | ||
| 1908 | } | 2011 | } |
| 1909 | } | 2012 | } |
| 1910 | } | 2013 | } |
| 1911 | retry: | 2014 | |
| 1912 | if (likely(!task_ptrace(current))) { | 2015 | if (likely(!current->ptrace)) { |
| 1913 | int notify = 0; | 2016 | int notify = 0; |
| 1914 | 2017 | ||
| 1915 | /* | 2018 | /* |
| @@ -1940,43 +2043,65 @@ retry: | |||
| 1940 | 2043 | ||
| 1941 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ | 2044 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ |
| 1942 | schedule(); | 2045 | schedule(); |
| 1943 | 2046 | return true; | |
| 1944 | spin_lock_irq(¤t->sighand->siglock); | ||
| 1945 | } else { | 2047 | } else { |
| 1946 | ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK, | 2048 | /* |
| 1947 | CLD_STOPPED, 0, NULL); | 2049 | * While ptraced, group stop is handled by STOP trap. |
| 1948 | current->exit_code = 0; | 2050 | * Schedule it and let the caller deal with it. |
| 2051 | */ | ||
| 2052 | task_set_jobctl_pending(current, JOBCTL_TRAP_STOP); | ||
| 2053 | return false; | ||
| 1949 | } | 2054 | } |
| 2055 | } | ||
| 1950 | 2056 | ||
| 1951 | /* | 2057 | /** |
| 1952 | * GROUP_STOP_PENDING could be set if another group stop has | 2058 | * do_jobctl_trap - take care of ptrace jobctl traps |
| 1953 | * started since being woken up or ptrace wants us to transit | 2059 | * |
| 1954 | * between TASK_STOPPED and TRACED. Retry group stop. | 2060 | * When PT_SEIZED, it's used for both group stop and explicit |
| 1955 | */ | 2061 | * SEIZE/INTERRUPT traps. Both generate PTRACE_EVENT_STOP trap with |
| 1956 | if (current->group_stop & GROUP_STOP_PENDING) { | 2062 | * accompanying siginfo. If stopped, lower eight bits of exit_code contain |
| 1957 | WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK)); | 2063 | * the stop signal; otherwise, %SIGTRAP. |
| 1958 | goto retry; | 2064 | * |
| 2065 | * When !PT_SEIZED, it's used only for group stop trap with stop signal | ||
| 2066 | * number as exit_code and no siginfo. | ||
| 2067 | * | ||
| 2068 | * CONTEXT: | ||
| 2069 | * Must be called with @current->sighand->siglock held, which may be | ||
| 2070 | * released and re-acquired before returning with intervening sleep. | ||
| 2071 | */ | ||
| 2072 | static void do_jobctl_trap(void) | ||
| 2073 | { | ||
| 2074 | struct signal_struct *signal = current->signal; | ||
| 2075 | int signr = current->jobctl & JOBCTL_STOP_SIGMASK; | ||
| 2076 | |||
| 2077 | if (current->ptrace & PT_SEIZED) { | ||
| 2078 | if (!signal->group_stop_count && | ||
| 2079 | !(signal->flags & SIGNAL_STOP_STOPPED)) | ||
| 2080 | signr = SIGTRAP; | ||
| 2081 | WARN_ON_ONCE(!signr); | ||
| 2082 | ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8), | ||
| 2083 | CLD_STOPPED); | ||
| 2084 | } else { | ||
| 2085 | WARN_ON_ONCE(!signr); | ||
| 2086 | ptrace_stop(signr, CLD_STOPPED, 0, NULL); | ||
| 2087 | current->exit_code = 0; | ||
| 1959 | } | 2088 | } |
| 1960 | |||
| 1961 | /* PTRACE_ATTACH might have raced with task killing, clear trapping */ | ||
| 1962 | task_clear_group_stop_trapping(current); | ||
| 1963 | |||
| 1964 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 1965 | |||
| 1966 | tracehook_finish_jctl(); | ||
| 1967 | |||
| 1968 | return 1; | ||
| 1969 | } | 2089 | } |
| 1970 | 2090 | ||
| 1971 | static int ptrace_signal(int signr, siginfo_t *info, | 2091 | static int ptrace_signal(int signr, siginfo_t *info, |
| 1972 | struct pt_regs *regs, void *cookie) | 2092 | struct pt_regs *regs, void *cookie) |
| 1973 | { | 2093 | { |
| 1974 | if (!task_ptrace(current)) | ||
| 1975 | return signr; | ||
| 1976 | |||
| 1977 | ptrace_signal_deliver(regs, cookie); | 2094 | ptrace_signal_deliver(regs, cookie); |
| 1978 | 2095 | /* | |
| 1979 | /* Let the debugger run. */ | 2096 | * We do not check sig_kernel_stop(signr) but set this marker |
| 2097 | * unconditionally because we do not know whether debugger will | ||
| 2098 | * change signr. This flag has no meaning unless we are going | ||
| 2099 | * to stop after return from ptrace_stop(). In this case it will | ||
| 2100 | * be checked in do_signal_stop(), we should only stop if it was | ||
| 2101 | * not cleared by SIGCONT while we were sleeping. See also the | ||
| 2102 | * comment in dequeue_signal(). | ||
| 2103 | */ | ||
| 2104 | current->jobctl |= JOBCTL_STOP_DEQUEUED; | ||
| 1980 | ptrace_stop(signr, CLD_TRAPPED, 0, info); | 2105 | ptrace_stop(signr, CLD_TRAPPED, 0, info); |
| 1981 | 2106 | ||
| 1982 | /* We're back. Did the debugger cancel the sig? */ | 2107 | /* We're back. Did the debugger cancel the sig? */ |
| @@ -2032,7 +2157,6 @@ relock: | |||
| 2032 | * the CLD_ si_code into SIGNAL_CLD_MASK bits. | 2157 | * the CLD_ si_code into SIGNAL_CLD_MASK bits. |
| 2033 | */ | 2158 | */ |
| 2034 | if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { | 2159 | if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { |
| 2035 | struct task_struct *leader; | ||
| 2036 | int why; | 2160 | int why; |
| 2037 | 2161 | ||
| 2038 | if (signal->flags & SIGNAL_CLD_CONTINUED) | 2162 | if (signal->flags & SIGNAL_CLD_CONTINUED) |
| @@ -2053,13 +2177,11 @@ relock: | |||
| 2053 | * a duplicate. | 2177 | * a duplicate. |
| 2054 | */ | 2178 | */ |
| 2055 | read_lock(&tasklist_lock); | 2179 | read_lock(&tasklist_lock); |
| 2056 | |||
| 2057 | do_notify_parent_cldstop(current, false, why); | 2180 | do_notify_parent_cldstop(current, false, why); |
| 2058 | 2181 | ||
| 2059 | leader = current->group_leader; | 2182 | if (ptrace_reparented(current->group_leader)) |
| 2060 | if (task_ptrace(leader) && !real_parent_is_ptracer(leader)) | 2183 | do_notify_parent_cldstop(current->group_leader, |
| 2061 | do_notify_parent_cldstop(leader, true, why); | 2184 | true, why); |
| 2062 | |||
| 2063 | read_unlock(&tasklist_lock); | 2185 | read_unlock(&tasklist_lock); |
| 2064 | 2186 | ||
| 2065 | goto relock; | 2187 | goto relock; |
| @@ -2067,37 +2189,31 @@ relock: | |||
| 2067 | 2189 | ||
| 2068 | for (;;) { | 2190 | for (;;) { |
| 2069 | struct k_sigaction *ka; | 2191 | struct k_sigaction *ka; |
| 2070 | /* | 2192 | |
| 2071 | * Tracing can induce an artificial signal and choose sigaction. | 2193 | if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) && |
| 2072 | * The return value in @signr determines the default action, | 2194 | do_signal_stop(0)) |
| 2073 | * but @info->si_signo is the signal number we will report. | ||
| 2074 | */ | ||
| 2075 | signr = tracehook_get_signal(current, regs, info, return_ka); | ||
| 2076 | if (unlikely(signr < 0)) | ||
| 2077 | goto relock; | 2195 | goto relock; |
| 2078 | if (unlikely(signr != 0)) | ||
| 2079 | ka = return_ka; | ||
| 2080 | else { | ||
| 2081 | if (unlikely(current->group_stop & | ||
| 2082 | GROUP_STOP_PENDING) && do_signal_stop(0)) | ||
| 2083 | goto relock; | ||
| 2084 | 2196 | ||
| 2085 | signr = dequeue_signal(current, ¤t->blocked, | 2197 | if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) { |
| 2086 | info); | 2198 | do_jobctl_trap(); |
| 2199 | spin_unlock_irq(&sighand->siglock); | ||
| 2200 | goto relock; | ||
| 2201 | } | ||
| 2087 | 2202 | ||
| 2088 | if (!signr) | 2203 | signr = dequeue_signal(current, ¤t->blocked, info); |
| 2089 | break; /* will return 0 */ | ||
| 2090 | 2204 | ||
| 2091 | if (signr != SIGKILL) { | 2205 | if (!signr) |
| 2092 | signr = ptrace_signal(signr, info, | 2206 | break; /* will return 0 */ |
| 2093 | regs, cookie); | ||
| 2094 | if (!signr) | ||
| 2095 | continue; | ||
| 2096 | } | ||
| 2097 | 2207 | ||
| 2098 | ka = &sighand->action[signr-1]; | 2208 | if (unlikely(current->ptrace) && signr != SIGKILL) { |
| 2209 | signr = ptrace_signal(signr, info, | ||
| 2210 | regs, cookie); | ||
| 2211 | if (!signr) | ||
| 2212 | continue; | ||
| 2099 | } | 2213 | } |
| 2100 | 2214 | ||
| 2215 | ka = &sighand->action[signr-1]; | ||
| 2216 | |||
| 2101 | /* Trace actually delivered signals. */ | 2217 | /* Trace actually delivered signals. */ |
| 2102 | trace_signal_deliver(signr, info, ka); | 2218 | trace_signal_deliver(signr, info, ka); |
| 2103 | 2219 | ||
| @@ -2253,7 +2369,7 @@ void exit_signals(struct task_struct *tsk) | |||
| 2253 | signotset(&unblocked); | 2369 | signotset(&unblocked); |
| 2254 | retarget_shared_pending(tsk, &unblocked); | 2370 | retarget_shared_pending(tsk, &unblocked); |
| 2255 | 2371 | ||
| 2256 | if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) && | 2372 | if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) && |
| 2257 | task_participate_group_stop(tsk)) | 2373 | task_participate_group_stop(tsk)) |
| 2258 | group_stop = CLD_STOPPED; | 2374 | group_stop = CLD_STOPPED; |
| 2259 | out: | 2375 | out: |
| @@ -2365,7 +2481,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) | |||
| 2365 | /** | 2481 | /** |
| 2366 | * sys_rt_sigprocmask - change the list of currently blocked signals | 2482 | * sys_rt_sigprocmask - change the list of currently blocked signals |
| 2367 | * @how: whether to add, remove, or set signals | 2483 | * @how: whether to add, remove, or set signals |
| 2368 | * @set: stores pending signals | 2484 | * @nset: stores pending signals |
| 2369 | * @oset: previous value of signal mask if non-null | 2485 | * @oset: previous value of signal mask if non-null |
| 2370 | * @sigsetsize: size of sigset_t type | 2486 | * @sigsetsize: size of sigset_t type |
| 2371 | */ | 2487 | */ |
| @@ -2986,15 +3102,11 @@ SYSCALL_DEFINE0(sgetmask) | |||
| 2986 | 3102 | ||
| 2987 | SYSCALL_DEFINE1(ssetmask, int, newmask) | 3103 | SYSCALL_DEFINE1(ssetmask, int, newmask) |
| 2988 | { | 3104 | { |
| 2989 | int old; | 3105 | int old = current->blocked.sig[0]; |
| 2990 | 3106 | sigset_t newset; | |
| 2991 | spin_lock_irq(¤t->sighand->siglock); | ||
| 2992 | old = current->blocked.sig[0]; | ||
| 2993 | 3107 | ||
| 2994 | siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)| | 3108 | siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP))); |
| 2995 | sigmask(SIGSTOP))); | 3109 | set_current_blocked(&newset); |
| 2996 | recalc_sigpending(); | ||
| 2997 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 2998 | 3110 | ||
| 2999 | return old; | 3111 | return old; |
| 3000 | } | 3112 | } |
| @@ -3051,11 +3163,8 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) | |||
| 3051 | return -EFAULT; | 3163 | return -EFAULT; |
| 3052 | sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 3164 | sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
| 3053 | 3165 | ||
| 3054 | spin_lock_irq(¤t->sighand->siglock); | ||
| 3055 | current->saved_sigmask = current->blocked; | 3166 | current->saved_sigmask = current->blocked; |
| 3056 | current->blocked = newset; | 3167 | set_current_blocked(&newset); |
| 3057 | recalc_sigpending(); | ||
| 3058 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 3059 | 3168 | ||
| 3060 | current->state = TASK_INTERRUPTIBLE; | 3169 | current->state = TASK_INTERRUPTIBLE; |
| 3061 | schedule(); | 3170 | schedule(); |
diff --git a/kernel/smp.c b/kernel/smp.c index 73a195193558..fb67dfa8394e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | |||
| 74 | .notifier_call = hotplug_cfd, | 74 | .notifier_call = hotplug_cfd, |
| 75 | }; | 75 | }; |
| 76 | 76 | ||
| 77 | static int __cpuinit init_call_single_data(void) | 77 | void __init call_function_init(void) |
| 78 | { | 78 | { |
| 79 | void *cpu = (void *)(long)smp_processor_id(); | 79 | void *cpu = (void *)(long)smp_processor_id(); |
| 80 | int i; | 80 | int i; |
| @@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void) | |||
| 88 | 88 | ||
| 89 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | 89 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); |
| 90 | register_cpu_notifier(&hotplug_cfd_notifier); | 90 | register_cpu_notifier(&hotplug_cfd_notifier); |
| 91 | |||
| 92 | return 0; | ||
| 93 | } | 91 | } |
| 94 | early_initcall(init_call_single_data); | ||
| 95 | 92 | ||
| 96 | /* | 93 | /* |
| 97 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | 94 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 13960170cad4..fca82c32042b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |||
| 58 | 58 | ||
| 59 | char *softirq_to_name[NR_SOFTIRQS] = { | 59 | char *softirq_to_name[NR_SOFTIRQS] = { |
| 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", |
| 61 | "TASKLET", "SCHED", "HRTIMER" | 61 | "TASKLET", "SCHED", "HRTIMER", "RCU" |
| 62 | }; | 62 | }; |
| 63 | 63 | ||
| 64 | /* | 64 | /* |
| @@ -315,16 +315,24 @@ static inline void invoke_softirq(void) | |||
| 315 | { | 315 | { |
| 316 | if (!force_irqthreads) | 316 | if (!force_irqthreads) |
| 317 | __do_softirq(); | 317 | __do_softirq(); |
| 318 | else | 318 | else { |
| 319 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
| 320 | SOFTIRQ_OFFSET); | ||
| 319 | wakeup_softirqd(); | 321 | wakeup_softirqd(); |
| 322 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
| 323 | } | ||
| 320 | } | 324 | } |
| 321 | #else | 325 | #else |
| 322 | static inline void invoke_softirq(void) | 326 | static inline void invoke_softirq(void) |
| 323 | { | 327 | { |
| 324 | if (!force_irqthreads) | 328 | if (!force_irqthreads) |
| 325 | do_softirq(); | 329 | do_softirq(); |
| 326 | else | 330 | else { |
| 331 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
| 332 | SOFTIRQ_OFFSET); | ||
| 327 | wakeup_softirqd(); | 333 | wakeup_softirqd(); |
| 334 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
| 335 | } | ||
| 328 | } | 336 | } |
| 329 | #endif | 337 | #endif |
| 330 | 338 | ||
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index eb212f8f8bc8..d20c6983aad9 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c | |||
| @@ -26,12 +26,18 @@ void print_stack_trace(struct stack_trace *trace, int spaces) | |||
| 26 | EXPORT_SYMBOL_GPL(print_stack_trace); | 26 | EXPORT_SYMBOL_GPL(print_stack_trace); |
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| 29 | * Architectures that do not implement save_stack_trace_tsk get this | 29 | * Architectures that do not implement save_stack_trace_tsk or |
| 30 | * weak alias and a once-per-bootup warning (whenever this facility | 30 | * save_stack_trace_regs get this weak alias and a once-per-bootup warning |
| 31 | * is utilized - for example by procfs): | 31 | * (whenever this facility is utilized - for example by procfs): |
| 32 | */ | 32 | */ |
| 33 | __weak void | 33 | __weak void |
| 34 | save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 34 | save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
| 35 | { | 35 | { |
| 36 | WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n"); | 36 | WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n"); |
| 37 | } | 37 | } |
| 38 | |||
| 39 | __weak void | ||
| 40 | save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) | ||
| 41 | { | ||
| 42 | WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n"); | ||
| 43 | } | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index e3516b29076c..ba5070ce5765 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
| 20 | #include <linux/kallsyms.h> | 20 | #include <linux/kallsyms.h> |
| 21 | 21 | ||
| 22 | #include <asm/atomic.h> | 22 | #include <linux/atomic.h> |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * Structure to determine completion condition and record errors. May | 25 | * Structure to determine completion condition and record errors. May |
| @@ -136,10 +136,11 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, | |||
| 136 | static DEFINE_MUTEX(stop_cpus_mutex); | 136 | static DEFINE_MUTEX(stop_cpus_mutex); |
| 137 | static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); | 137 | static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); |
| 138 | 138 | ||
| 139 | int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | 139 | static void queue_stop_cpus_work(const struct cpumask *cpumask, |
| 140 | cpu_stop_fn_t fn, void *arg, | ||
| 141 | struct cpu_stop_done *done) | ||
| 140 | { | 142 | { |
| 141 | struct cpu_stop_work *work; | 143 | struct cpu_stop_work *work; |
| 142 | struct cpu_stop_done done; | ||
| 143 | unsigned int cpu; | 144 | unsigned int cpu; |
| 144 | 145 | ||
| 145 | /* initialize works and done */ | 146 | /* initialize works and done */ |
| @@ -147,9 +148,8 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | |||
| 147 | work = &per_cpu(stop_cpus_work, cpu); | 148 | work = &per_cpu(stop_cpus_work, cpu); |
| 148 | work->fn = fn; | 149 | work->fn = fn; |
| 149 | work->arg = arg; | 150 | work->arg = arg; |
| 150 | work->done = &done; | 151 | work->done = done; |
| 151 | } | 152 | } |
| 152 | cpu_stop_init_done(&done, cpumask_weight(cpumask)); | ||
| 153 | 153 | ||
| 154 | /* | 154 | /* |
| 155 | * Disable preemption while queueing to avoid getting | 155 | * Disable preemption while queueing to avoid getting |
| @@ -161,7 +161,15 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | |||
| 161 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), | 161 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), |
| 162 | &per_cpu(stop_cpus_work, cpu)); | 162 | &per_cpu(stop_cpus_work, cpu)); |
| 163 | preempt_enable(); | 163 | preempt_enable(); |
| 164 | } | ||
| 164 | 165 | ||
| 166 | static int __stop_cpus(const struct cpumask *cpumask, | ||
| 167 | cpu_stop_fn_t fn, void *arg) | ||
| 168 | { | ||
| 169 | struct cpu_stop_done done; | ||
| 170 | |||
| 171 | cpu_stop_init_done(&done, cpumask_weight(cpumask)); | ||
| 172 | queue_stop_cpus_work(cpumask, fn, arg, &done); | ||
| 165 | wait_for_completion(&done.completion); | 173 | wait_for_completion(&done.completion); |
| 166 | return done.executed ? done.ret : -ENOENT; | 174 | return done.executed ? done.ret : -ENOENT; |
| 167 | } | 175 | } |
| @@ -431,8 +439,15 @@ static int stop_machine_cpu_stop(void *data) | |||
| 431 | struct stop_machine_data *smdata = data; | 439 | struct stop_machine_data *smdata = data; |
| 432 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 440 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
| 433 | int cpu = smp_processor_id(), err = 0; | 441 | int cpu = smp_processor_id(), err = 0; |
| 442 | unsigned long flags; | ||
| 434 | bool is_active; | 443 | bool is_active; |
| 435 | 444 | ||
| 445 | /* | ||
| 446 | * When called from stop_machine_from_inactive_cpu(), irq might | ||
| 447 | * already be disabled. Save the state and restore it on exit. | ||
| 448 | */ | ||
| 449 | local_save_flags(flags); | ||
| 450 | |||
| 436 | if (!smdata->active_cpus) | 451 | if (!smdata->active_cpus) |
| 437 | is_active = cpu == cpumask_first(cpu_online_mask); | 452 | is_active = cpu == cpumask_first(cpu_online_mask); |
| 438 | else | 453 | else |
| @@ -460,7 +475,7 @@ static int stop_machine_cpu_stop(void *data) | |||
| 460 | } | 475 | } |
| 461 | } while (curstate != STOPMACHINE_EXIT); | 476 | } while (curstate != STOPMACHINE_EXIT); |
| 462 | 477 | ||
| 463 | local_irq_enable(); | 478 | local_irq_restore(flags); |
| 464 | return err; | 479 | return err; |
| 465 | } | 480 | } |
| 466 | 481 | ||
| @@ -487,4 +502,57 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | |||
| 487 | } | 502 | } |
| 488 | EXPORT_SYMBOL_GPL(stop_machine); | 503 | EXPORT_SYMBOL_GPL(stop_machine); |
| 489 | 504 | ||
| 505 | /** | ||
| 506 | * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU | ||
| 507 | * @fn: the function to run | ||
| 508 | * @data: the data ptr for the @fn() | ||
| 509 | * @cpus: the cpus to run the @fn() on (NULL = any online cpu) | ||
| 510 | * | ||
| 511 | * This is identical to stop_machine() but can be called from a CPU which | ||
| 512 | * is not active. The local CPU is in the process of hotplug (so no other | ||
| 513 | * CPU hotplug can start) and not marked active and doesn't have enough | ||
| 514 | * context to sleep. | ||
| 515 | * | ||
| 516 | * This function provides stop_machine() functionality for such state by | ||
| 517 | * using busy-wait for synchronization and executing @fn directly for local | ||
| 518 | * CPU. | ||
| 519 | * | ||
| 520 | * CONTEXT: | ||
| 521 | * Local CPU is inactive. Temporarily stops all active CPUs. | ||
| 522 | * | ||
| 523 | * RETURNS: | ||
| 524 | * 0 if all executions of @fn returned 0, any non zero return value if any | ||
| 525 | * returned non zero. | ||
| 526 | */ | ||
| 527 | int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, | ||
| 528 | const struct cpumask *cpus) | ||
| 529 | { | ||
| 530 | struct stop_machine_data smdata = { .fn = fn, .data = data, | ||
| 531 | .active_cpus = cpus }; | ||
| 532 | struct cpu_stop_done done; | ||
| 533 | int ret; | ||
| 534 | |||
| 535 | /* Local CPU must be inactive and CPU hotplug in progress. */ | ||
| 536 | BUG_ON(cpu_active(raw_smp_processor_id())); | ||
| 537 | smdata.num_threads = num_active_cpus() + 1; /* +1 for local */ | ||
| 538 | |||
| 539 | /* No proper task established and can't sleep - busy wait for lock. */ | ||
| 540 | while (!mutex_trylock(&stop_cpus_mutex)) | ||
| 541 | cpu_relax(); | ||
| 542 | |||
| 543 | /* Schedule work on other CPUs and execute directly for local CPU */ | ||
| 544 | set_state(&smdata, STOPMACHINE_PREPARE); | ||
| 545 | cpu_stop_init_done(&done, num_active_cpus()); | ||
| 546 | queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata, | ||
| 547 | &done); | ||
| 548 | ret = stop_machine_cpu_stop(&smdata); | ||
| 549 | |||
| 550 | /* Busy wait for completion. */ | ||
| 551 | while (!completion_done(&done.completion)) | ||
| 552 | cpu_relax(); | ||
| 553 | |||
| 554 | mutex_unlock(&stop_cpus_mutex); | ||
| 555 | return ret ?: done.ret; | ||
| 556 | } | ||
| 557 | |||
| 490 | #endif /* CONFIG_STOP_MACHINE */ | 558 | #endif /* CONFIG_STOP_MACHINE */ |
diff --git a/kernel/sys.c b/kernel/sys.c index e4128b278f23..a101ba36c444 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
| 9 | #include <linux/utsname.h> | 9 | #include <linux/utsname.h> |
| 10 | #include <linux/mman.h> | 10 | #include <linux/mman.h> |
| 11 | #include <linux/notifier.h> | ||
| 12 | #include <linux/reboot.h> | 11 | #include <linux/reboot.h> |
| 13 | #include <linux/prctl.h> | 12 | #include <linux/prctl.h> |
| 14 | #include <linux/highuid.h> | 13 | #include <linux/highuid.h> |
| @@ -320,6 +319,37 @@ void kernel_restart_prepare(char *cmd) | |||
| 320 | } | 319 | } |
| 321 | 320 | ||
| 322 | /** | 321 | /** |
| 322 | * register_reboot_notifier - Register function to be called at reboot time | ||
| 323 | * @nb: Info about notifier function to be called | ||
| 324 | * | ||
| 325 | * Registers a function with the list of functions | ||
| 326 | * to be called at reboot time. | ||
| 327 | * | ||
| 328 | * Currently always returns zero, as blocking_notifier_chain_register() | ||
| 329 | * always returns zero. | ||
| 330 | */ | ||
| 331 | int register_reboot_notifier(struct notifier_block *nb) | ||
| 332 | { | ||
| 333 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | ||
| 334 | } | ||
| 335 | EXPORT_SYMBOL(register_reboot_notifier); | ||
| 336 | |||
| 337 | /** | ||
| 338 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | ||
| 339 | * @nb: Hook to be unregistered | ||
| 340 | * | ||
| 341 | * Unregisters a previously registered reboot | ||
| 342 | * notifier function. | ||
| 343 | * | ||
| 344 | * Returns zero on success, or %-ENOENT on failure. | ||
| 345 | */ | ||
| 346 | int unregister_reboot_notifier(struct notifier_block *nb) | ||
| 347 | { | ||
| 348 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | ||
| 349 | } | ||
| 350 | EXPORT_SYMBOL(unregister_reboot_notifier); | ||
| 351 | |||
| 352 | /** | ||
| 323 | * kernel_restart - reboot the system | 353 | * kernel_restart - reboot the system |
| 324 | * @cmd: pointer to buffer containing command to execute for restart | 354 | * @cmd: pointer to buffer containing command to execute for restart |
| 325 | * or %NULL | 355 | * or %NULL |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4fc92445a29c..11d65b531e50 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = { | |||
| 938 | }, | 938 | }, |
| 939 | #endif | 939 | #endif |
| 940 | #ifdef CONFIG_PERF_EVENTS | 940 | #ifdef CONFIG_PERF_EVENTS |
| 941 | /* | ||
| 942 | * User-space scripts rely on the existence of this file | ||
| 943 | * as a feature check for perf_events being enabled. | ||
| 944 | * | ||
| 945 | * So it's an ABI, do not remove! | ||
| 946 | */ | ||
| 941 | { | 947 | { |
| 942 | .procname = "perf_event_paranoid", | 948 | .procname = "perf_event_paranoid", |
| 943 | .data = &sysctl_perf_event_paranoid, | 949 | .data = &sysctl_perf_event_paranoid, |
| @@ -1584,16 +1590,11 @@ void sysctl_head_get(struct ctl_table_header *head) | |||
| 1584 | spin_unlock(&sysctl_lock); | 1590 | spin_unlock(&sysctl_lock); |
| 1585 | } | 1591 | } |
| 1586 | 1592 | ||
| 1587 | static void free_head(struct rcu_head *rcu) | ||
| 1588 | { | ||
| 1589 | kfree(container_of(rcu, struct ctl_table_header, rcu)); | ||
| 1590 | } | ||
| 1591 | |||
| 1592 | void sysctl_head_put(struct ctl_table_header *head) | 1593 | void sysctl_head_put(struct ctl_table_header *head) |
| 1593 | { | 1594 | { |
| 1594 | spin_lock(&sysctl_lock); | 1595 | spin_lock(&sysctl_lock); |
| 1595 | if (!--head->count) | 1596 | if (!--head->count) |
| 1596 | call_rcu(&head->rcu, free_head); | 1597 | kfree_rcu(head, rcu); |
| 1597 | spin_unlock(&sysctl_lock); | 1598 | spin_unlock(&sysctl_lock); |
| 1598 | } | 1599 | } |
| 1599 | 1600 | ||
| @@ -1965,10 +1966,10 @@ void unregister_sysctl_table(struct ctl_table_header * header) | |||
| 1965 | start_unregistering(header); | 1966 | start_unregistering(header); |
| 1966 | if (!--header->parent->count) { | 1967 | if (!--header->parent->count) { |
| 1967 | WARN_ON(1); | 1968 | WARN_ON(1); |
| 1968 | call_rcu(&header->parent->rcu, free_head); | 1969 | kfree_rcu(header->parent, rcu); |
| 1969 | } | 1970 | } |
| 1970 | if (!--header->count) | 1971 | if (!--header->count) |
| 1971 | call_rcu(&header->rcu, free_head); | 1972 | kfree_rcu(header, rcu); |
| 1972 | spin_unlock(&sysctl_lock); | 1973 | spin_unlock(&sysctl_lock); |
| 1973 | } | 1974 | } |
| 1974 | 1975 | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 9ffea360a778..e19ce1454ee1 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
| 29 | #include <linux/file.h> | 29 | #include <linux/file.h> |
| 30 | #include <net/genetlink.h> | 30 | #include <net/genetlink.h> |
| 31 | #include <asm/atomic.h> | 31 | #include <linux/atomic.h> |
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * Maximum length of a cpumask that can be specified in | 34 | * Maximum length of a cpumask that can be specified in |
| @@ -285,7 +285,7 @@ ret: | |||
| 285 | static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) | 285 | static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) |
| 286 | { | 286 | { |
| 287 | struct listener_list *listeners; | 287 | struct listener_list *listeners; |
| 288 | struct listener *s, *tmp; | 288 | struct listener *s, *tmp, *s2; |
| 289 | unsigned int cpu; | 289 | unsigned int cpu; |
| 290 | 290 | ||
| 291 | if (!cpumask_subset(mask, cpu_possible_mask)) | 291 | if (!cpumask_subset(mask, cpu_possible_mask)) |
| @@ -293,18 +293,25 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) | |||
| 293 | 293 | ||
| 294 | if (isadd == REGISTER) { | 294 | if (isadd == REGISTER) { |
| 295 | for_each_cpu(cpu, mask) { | 295 | for_each_cpu(cpu, mask) { |
| 296 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | 296 | s = kmalloc_node(sizeof(struct listener), |
| 297 | cpu_to_node(cpu)); | 297 | GFP_KERNEL, cpu_to_node(cpu)); |
| 298 | if (!s) | 298 | if (!s) |
| 299 | goto cleanup; | 299 | goto cleanup; |
| 300 | |||
| 300 | s->pid = pid; | 301 | s->pid = pid; |
| 301 | INIT_LIST_HEAD(&s->list); | ||
| 302 | s->valid = 1; | 302 | s->valid = 1; |
| 303 | 303 | ||
| 304 | listeners = &per_cpu(listener_array, cpu); | 304 | listeners = &per_cpu(listener_array, cpu); |
| 305 | down_write(&listeners->sem); | 305 | down_write(&listeners->sem); |
| 306 | list_for_each_entry(s2, &listeners->list, list) { | ||
| 307 | if (s2->pid == pid && s2->valid) | ||
| 308 | goto exists; | ||
| 309 | } | ||
| 306 | list_add(&s->list, &listeners->list); | 310 | list_add(&s->list, &listeners->list); |
| 311 | s = NULL; | ||
| 312 | exists: | ||
| 307 | up_write(&listeners->sem); | 313 | up_write(&listeners->sem); |
| 314 | kfree(s); /* nop if NULL */ | ||
| 308 | } | 315 | } |
| 309 | return 0; | 316 | return 0; |
| 310 | } | 317 | } |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 2d966244ea60..59f369f98a04 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -42,15 +42,75 @@ static struct alarm_base { | |||
| 42 | clockid_t base_clockid; | 42 | clockid_t base_clockid; |
| 43 | } alarm_bases[ALARM_NUMTYPE]; | 43 | } alarm_bases[ALARM_NUMTYPE]; |
| 44 | 44 | ||
| 45 | /* freezer delta & lock used to handle clock_nanosleep triggered wakeups */ | ||
| 46 | static ktime_t freezer_delta; | ||
| 47 | static DEFINE_SPINLOCK(freezer_delta_lock); | ||
| 48 | |||
| 45 | #ifdef CONFIG_RTC_CLASS | 49 | #ifdef CONFIG_RTC_CLASS |
| 46 | /* rtc timer and device for setting alarm wakeups at suspend */ | 50 | /* rtc timer and device for setting alarm wakeups at suspend */ |
| 47 | static struct rtc_timer rtctimer; | 51 | static struct rtc_timer rtctimer; |
| 48 | static struct rtc_device *rtcdev; | 52 | static struct rtc_device *rtcdev; |
| 49 | #endif | 53 | static DEFINE_SPINLOCK(rtcdev_lock); |
| 50 | 54 | ||
| 51 | /* freezer delta & lock used to handle clock_nanosleep triggered wakeups */ | 55 | /** |
| 52 | static ktime_t freezer_delta; | 56 | * has_wakealarm - check rtc device has wakealarm ability |
| 53 | static DEFINE_SPINLOCK(freezer_delta_lock); | 57 | * @dev: current device |
| 58 | * @name_ptr: name to be returned | ||
| 59 | * | ||
| 60 | * This helper function checks to see if the rtc device can wake | ||
| 61 | * from suspend. | ||
| 62 | */ | ||
| 63 | static int has_wakealarm(struct device *dev, void *name_ptr) | ||
| 64 | { | ||
| 65 | struct rtc_device *candidate = to_rtc_device(dev); | ||
| 66 | |||
| 67 | if (!candidate->ops->set_alarm) | ||
| 68 | return 0; | ||
| 69 | if (!device_may_wakeup(candidate->dev.parent)) | ||
| 70 | return 0; | ||
| 71 | |||
| 72 | *(const char **)name_ptr = dev_name(dev); | ||
| 73 | return 1; | ||
| 74 | } | ||
| 75 | |||
| 76 | /** | ||
| 77 | * alarmtimer_get_rtcdev - Return selected rtcdevice | ||
| 78 | * | ||
| 79 | * This function returns the rtc device to use for wakealarms. | ||
| 80 | * If one has not already been chosen, it checks to see if a | ||
| 81 | * functional rtc device is available. | ||
| 82 | */ | ||
| 83 | static struct rtc_device *alarmtimer_get_rtcdev(void) | ||
| 84 | { | ||
| 85 | struct device *dev; | ||
| 86 | char *str; | ||
| 87 | unsigned long flags; | ||
| 88 | struct rtc_device *ret; | ||
| 89 | |||
| 90 | spin_lock_irqsave(&rtcdev_lock, flags); | ||
| 91 | if (!rtcdev) { | ||
| 92 | /* Find an rtc device and init the rtc_timer */ | ||
| 93 | dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); | ||
| 94 | /* If we have a device then str is valid. See has_wakealarm() */ | ||
| 95 | if (dev) { | ||
| 96 | rtcdev = rtc_class_open(str); | ||
| 97 | /* | ||
| 98 | * Drop the reference we got in class_find_device, | ||
| 99 | * rtc_open takes its own. | ||
| 100 | */ | ||
| 101 | put_device(dev); | ||
| 102 | rtc_timer_init(&rtctimer, NULL, NULL); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | ret = rtcdev; | ||
| 106 | spin_unlock_irqrestore(&rtcdev_lock, flags); | ||
| 107 | |||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | #else | ||
| 111 | #define alarmtimer_get_rtcdev() (0) | ||
| 112 | #define rtcdev (0) | ||
| 113 | #endif | ||
| 54 | 114 | ||
| 55 | 115 | ||
| 56 | /** | 116 | /** |
| @@ -166,6 +226,7 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 166 | struct rtc_time tm; | 226 | struct rtc_time tm; |
| 167 | ktime_t min, now; | 227 | ktime_t min, now; |
| 168 | unsigned long flags; | 228 | unsigned long flags; |
| 229 | struct rtc_device *rtc; | ||
| 169 | int i; | 230 | int i; |
| 170 | 231 | ||
| 171 | spin_lock_irqsave(&freezer_delta_lock, flags); | 232 | spin_lock_irqsave(&freezer_delta_lock, flags); |
| @@ -173,8 +234,9 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 173 | freezer_delta = ktime_set(0, 0); | 234 | freezer_delta = ktime_set(0, 0); |
| 174 | spin_unlock_irqrestore(&freezer_delta_lock, flags); | 235 | spin_unlock_irqrestore(&freezer_delta_lock, flags); |
| 175 | 236 | ||
| 237 | rtc = rtcdev; | ||
| 176 | /* If we have no rtcdev, just return */ | 238 | /* If we have no rtcdev, just return */ |
| 177 | if (!rtcdev) | 239 | if (!rtc) |
| 178 | return 0; | 240 | return 0; |
| 179 | 241 | ||
| 180 | /* Find the soonest timer to expire*/ | 242 | /* Find the soonest timer to expire*/ |
| @@ -199,12 +261,12 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 199 | WARN_ON(min.tv64 < NSEC_PER_SEC); | 261 | WARN_ON(min.tv64 < NSEC_PER_SEC); |
| 200 | 262 | ||
| 201 | /* Setup an rtc timer to fire that far in the future */ | 263 | /* Setup an rtc timer to fire that far in the future */ |
| 202 | rtc_timer_cancel(rtcdev, &rtctimer); | 264 | rtc_timer_cancel(rtc, &rtctimer); |
| 203 | rtc_read_time(rtcdev, &tm); | 265 | rtc_read_time(rtc, &tm); |
| 204 | now = rtc_tm_to_ktime(tm); | 266 | now = rtc_tm_to_ktime(tm); |
| 205 | now = ktime_add(now, min); | 267 | now = ktime_add(now, min); |
| 206 | 268 | ||
| 207 | rtc_timer_start(rtcdev, &rtctimer, now, ktime_set(0, 0)); | 269 | rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); |
| 208 | 270 | ||
| 209 | return 0; | 271 | return 0; |
| 210 | } | 272 | } |
| @@ -322,6 +384,9 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) | |||
| 322 | { | 384 | { |
| 323 | clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid; | 385 | clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid; |
| 324 | 386 | ||
| 387 | if (!alarmtimer_get_rtcdev()) | ||
| 388 | return -ENOTSUPP; | ||
| 389 | |||
| 325 | return hrtimer_get_res(baseid, tp); | 390 | return hrtimer_get_res(baseid, tp); |
| 326 | } | 391 | } |
| 327 | 392 | ||
| @@ -336,6 +401,9 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) | |||
| 336 | { | 401 | { |
| 337 | struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; | 402 | struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; |
| 338 | 403 | ||
| 404 | if (!alarmtimer_get_rtcdev()) | ||
| 405 | return -ENOTSUPP; | ||
| 406 | |||
| 339 | *tp = ktime_to_timespec(base->gettime()); | 407 | *tp = ktime_to_timespec(base->gettime()); |
| 340 | return 0; | 408 | return 0; |
| 341 | } | 409 | } |
| @@ -351,6 +419,9 @@ static int alarm_timer_create(struct k_itimer *new_timer) | |||
| 351 | enum alarmtimer_type type; | 419 | enum alarmtimer_type type; |
| 352 | struct alarm_base *base; | 420 | struct alarm_base *base; |
| 353 | 421 | ||
| 422 | if (!alarmtimer_get_rtcdev()) | ||
| 423 | return -ENOTSUPP; | ||
| 424 | |||
| 354 | if (!capable(CAP_WAKE_ALARM)) | 425 | if (!capable(CAP_WAKE_ALARM)) |
| 355 | return -EPERM; | 426 | return -EPERM; |
| 356 | 427 | ||
| @@ -385,6 +456,9 @@ static void alarm_timer_get(struct k_itimer *timr, | |||
| 385 | */ | 456 | */ |
| 386 | static int alarm_timer_del(struct k_itimer *timr) | 457 | static int alarm_timer_del(struct k_itimer *timr) |
| 387 | { | 458 | { |
| 459 | if (!rtcdev) | ||
| 460 | return -ENOTSUPP; | ||
| 461 | |||
| 388 | alarm_cancel(&timr->it.alarmtimer); | 462 | alarm_cancel(&timr->it.alarmtimer); |
| 389 | return 0; | 463 | return 0; |
| 390 | } | 464 | } |
| @@ -402,6 +476,9 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, | |||
| 402 | struct itimerspec *new_setting, | 476 | struct itimerspec *new_setting, |
| 403 | struct itimerspec *old_setting) | 477 | struct itimerspec *old_setting) |
| 404 | { | 478 | { |
| 479 | if (!rtcdev) | ||
| 480 | return -ENOTSUPP; | ||
| 481 | |||
| 405 | /* Save old values */ | 482 | /* Save old values */ |
| 406 | old_setting->it_interval = | 483 | old_setting->it_interval = |
| 407 | ktime_to_timespec(timr->it.alarmtimer.period); | 484 | ktime_to_timespec(timr->it.alarmtimer.period); |
| @@ -541,6 +618,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, | |||
| 541 | int ret = 0; | 618 | int ret = 0; |
| 542 | struct restart_block *restart; | 619 | struct restart_block *restart; |
| 543 | 620 | ||
| 621 | if (!alarmtimer_get_rtcdev()) | ||
| 622 | return -ENOTSUPP; | ||
| 623 | |||
| 544 | if (!capable(CAP_WAKE_ALARM)) | 624 | if (!capable(CAP_WAKE_ALARM)) |
| 545 | return -EPERM; | 625 | return -EPERM; |
| 546 | 626 | ||
| @@ -638,65 +718,3 @@ static int __init alarmtimer_init(void) | |||
| 638 | } | 718 | } |
| 639 | device_initcall(alarmtimer_init); | 719 | device_initcall(alarmtimer_init); |
| 640 | 720 | ||
| 641 | #ifdef CONFIG_RTC_CLASS | ||
| 642 | /** | ||
| 643 | * has_wakealarm - check rtc device has wakealarm ability | ||
| 644 | * @dev: current device | ||
| 645 | * @name_ptr: name to be returned | ||
| 646 | * | ||
| 647 | * This helper function checks to see if the rtc device can wake | ||
| 648 | * from suspend. | ||
| 649 | */ | ||
| 650 | static int __init has_wakealarm(struct device *dev, void *name_ptr) | ||
| 651 | { | ||
| 652 | struct rtc_device *candidate = to_rtc_device(dev); | ||
| 653 | |||
| 654 | if (!candidate->ops->set_alarm) | ||
| 655 | return 0; | ||
| 656 | if (!device_may_wakeup(candidate->dev.parent)) | ||
| 657 | return 0; | ||
| 658 | |||
| 659 | *(const char **)name_ptr = dev_name(dev); | ||
| 660 | return 1; | ||
| 661 | } | ||
| 662 | |||
| 663 | /** | ||
| 664 | * alarmtimer_init_late - Late initializing of alarmtimer code | ||
| 665 | * | ||
| 666 | * This function locates a rtc device to use for wakealarms. | ||
| 667 | * Run as late_initcall to make sure rtc devices have been | ||
| 668 | * registered. | ||
| 669 | */ | ||
| 670 | static int __init alarmtimer_init_late(void) | ||
| 671 | { | ||
| 672 | struct device *dev; | ||
| 673 | char *str; | ||
| 674 | |||
| 675 | /* Find an rtc device and init the rtc_timer */ | ||
| 676 | dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); | ||
| 677 | /* If we have a device then str is valid. See has_wakealarm() */ | ||
| 678 | if (dev) { | ||
| 679 | rtcdev = rtc_class_open(str); | ||
| 680 | /* | ||
| 681 | * Drop the reference we got in class_find_device, | ||
| 682 | * rtc_open takes its own. | ||
| 683 | */ | ||
| 684 | put_device(dev); | ||
| 685 | } | ||
| 686 | if (!rtcdev) { | ||
| 687 | printk(KERN_WARNING "No RTC device found, ALARM timers will" | ||
| 688 | " not wake from suspend"); | ||
| 689 | } | ||
| 690 | rtc_timer_init(&rtctimer, NULL, NULL); | ||
| 691 | |||
| 692 | return 0; | ||
| 693 | } | ||
| 694 | #else | ||
| 695 | static int __init alarmtimer_init_late(void) | ||
| 696 | { | ||
| 697 | printk(KERN_WARNING "Kernel not built with RTC support, ALARM timers" | ||
| 698 | " will not wake from suspend"); | ||
| 699 | return 0; | ||
| 700 | } | ||
| 701 | #endif | ||
| 702 | late_initcall(alarmtimer_init_late); | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c027d4f602f1..e4c699dfa4e8 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -182,7 +182,10 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
| 182 | unsigned long flags; | 182 | unsigned long flags; |
| 183 | 183 | ||
| 184 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | 184 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); |
| 185 | BUG_ON(!dev->cpumask); | 185 | if (!dev->cpumask) { |
| 186 | WARN_ON(num_possible_cpus() > 1); | ||
| 187 | dev->cpumask = cpumask_of(smp_processor_id()); | ||
| 188 | } | ||
| 186 | 189 | ||
| 187 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 190 | raw_spin_lock_irqsave(&clockevents_lock, flags); |
| 188 | 191 | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c95fd677328..e0980f0d9a0a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -185,7 +185,6 @@ static struct clocksource *watchdog; | |||
| 185 | static struct timer_list watchdog_timer; | 185 | static struct timer_list watchdog_timer; |
| 186 | static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); | 186 | static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); |
| 187 | static DEFINE_SPINLOCK(watchdog_lock); | 187 | static DEFINE_SPINLOCK(watchdog_lock); |
| 188 | static cycle_t watchdog_last; | ||
| 189 | static int watchdog_running; | 188 | static int watchdog_running; |
| 190 | 189 | ||
| 191 | static int clocksource_watchdog_kthread(void *data); | 190 | static int clocksource_watchdog_kthread(void *data); |
| @@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data) | |||
| 254 | if (!watchdog_running) | 253 | if (!watchdog_running) |
| 255 | goto out; | 254 | goto out; |
| 256 | 255 | ||
| 257 | wdnow = watchdog->read(watchdog); | ||
| 258 | wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, | ||
| 259 | watchdog->mult, watchdog->shift); | ||
| 260 | watchdog_last = wdnow; | ||
| 261 | |||
| 262 | list_for_each_entry(cs, &watchdog_list, wd_list) { | 256 | list_for_each_entry(cs, &watchdog_list, wd_list) { |
| 263 | 257 | ||
| 264 | /* Clocksource already marked unstable? */ | 258 | /* Clocksource already marked unstable? */ |
| @@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data) | |||
| 268 | continue; | 262 | continue; |
| 269 | } | 263 | } |
| 270 | 264 | ||
| 265 | local_irq_disable(); | ||
| 271 | csnow = cs->read(cs); | 266 | csnow = cs->read(cs); |
| 267 | wdnow = watchdog->read(watchdog); | ||
| 268 | local_irq_enable(); | ||
| 272 | 269 | ||
| 273 | /* Clocksource initialized ? */ | 270 | /* Clocksource initialized ? */ |
| 274 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | 271 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { |
| 275 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | 272 | cs->flags |= CLOCK_SOURCE_WATCHDOG; |
| 276 | cs->wd_last = csnow; | 273 | cs->wd_last = wdnow; |
| 274 | cs->cs_last = csnow; | ||
| 277 | continue; | 275 | continue; |
| 278 | } | 276 | } |
| 279 | 277 | ||
| 280 | /* Check the deviation from the watchdog clocksource. */ | 278 | wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, |
| 281 | cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & | 279 | watchdog->mult, watchdog->shift); |
| 280 | |||
| 281 | cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & | ||
| 282 | cs->mask, cs->mult, cs->shift); | 282 | cs->mask, cs->mult, cs->shift); |
| 283 | cs->wd_last = csnow; | 283 | cs->cs_last = csnow; |
| 284 | cs->wd_last = wdnow; | ||
| 285 | |||
| 286 | /* Check the deviation from the watchdog clocksource. */ | ||
| 284 | if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { | 287 | if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { |
| 285 | clocksource_unstable(cs, cs_nsec - wd_nsec); | 288 | clocksource_unstable(cs, cs_nsec - wd_nsec); |
| 286 | continue; | 289 | continue; |
| @@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void) | |||
| 318 | return; | 321 | return; |
| 319 | init_timer(&watchdog_timer); | 322 | init_timer(&watchdog_timer); |
| 320 | watchdog_timer.function = clocksource_watchdog; | 323 | watchdog_timer.function = clocksource_watchdog; |
| 321 | watchdog_last = watchdog->read(watchdog); | ||
| 322 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | 324 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; |
| 323 | add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); | 325 | add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); |
| 324 | watchdog_running = 1; | 326 | watchdog_running = 1; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 342408cf68dd..2b021b0e8507 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -604,6 +604,12 @@ static struct timespec timekeeping_suspend_time; | |||
| 604 | */ | 604 | */ |
| 605 | static void __timekeeping_inject_sleeptime(struct timespec *delta) | 605 | static void __timekeeping_inject_sleeptime(struct timespec *delta) |
| 606 | { | 606 | { |
| 607 | if (!timespec_valid(delta)) { | ||
| 608 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " | ||
| 609 | "sleep delta value!\n"); | ||
| 610 | return; | ||
| 611 | } | ||
| 612 | |||
| 607 | xtime = timespec_add(xtime, *delta); | 613 | xtime = timespec_add(xtime, *delta); |
| 608 | wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta); | 614 | wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta); |
| 609 | total_sleep_time = timespec_add(total_sleep_time, *delta); | 615 | total_sleep_time = timespec_add(total_sleep_time, *delta); |
| @@ -686,12 +692,34 @@ static void timekeeping_resume(void) | |||
| 686 | static int timekeeping_suspend(void) | 692 | static int timekeeping_suspend(void) |
| 687 | { | 693 | { |
| 688 | unsigned long flags; | 694 | unsigned long flags; |
| 695 | struct timespec delta, delta_delta; | ||
| 696 | static struct timespec old_delta; | ||
| 689 | 697 | ||
| 690 | read_persistent_clock(&timekeeping_suspend_time); | 698 | read_persistent_clock(&timekeeping_suspend_time); |
| 691 | 699 | ||
| 692 | write_seqlock_irqsave(&xtime_lock, flags); | 700 | write_seqlock_irqsave(&xtime_lock, flags); |
| 693 | timekeeping_forward_now(); | 701 | timekeeping_forward_now(); |
| 694 | timekeeping_suspended = 1; | 702 | timekeeping_suspended = 1; |
| 703 | |||
| 704 | /* | ||
| 705 | * To avoid drift caused by repeated suspend/resumes, | ||
| 706 | * which each can add ~1 second drift error, | ||
| 707 | * try to compensate so the difference in system time | ||
| 708 | * and persistent_clock time stays close to constant. | ||
| 709 | */ | ||
| 710 | delta = timespec_sub(xtime, timekeeping_suspend_time); | ||
| 711 | delta_delta = timespec_sub(delta, old_delta); | ||
| 712 | if (abs(delta_delta.tv_sec) >= 2) { | ||
| 713 | /* | ||
| 714 | * if delta_delta is too large, assume time correction | ||
| 715 | * has occured and set old_delta to the current delta. | ||
| 716 | */ | ||
| 717 | old_delta = delta; | ||
| 718 | } else { | ||
| 719 | /* Otherwise try to adjust old_system to compensate */ | ||
| 720 | timekeeping_suspend_time = | ||
| 721 | timespec_add(timekeeping_suspend_time, delta_delta); | ||
| 722 | } | ||
| 695 | write_sequnlock_irqrestore(&xtime_lock, flags); | 723 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 696 | 724 | ||
| 697 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 725 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
diff --git a/kernel/timer.c b/kernel/timer.c index fd6198692b57..8cff36119e4d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -749,16 +749,15 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | |||
| 749 | unsigned long expires_limit, mask; | 749 | unsigned long expires_limit, mask; |
| 750 | int bit; | 750 | int bit; |
| 751 | 751 | ||
| 752 | expires_limit = expires; | ||
| 753 | |||
| 754 | if (timer->slack >= 0) { | 752 | if (timer->slack >= 0) { |
| 755 | expires_limit = expires + timer->slack; | 753 | expires_limit = expires + timer->slack; |
| 756 | } else { | 754 | } else { |
| 757 | unsigned long now = jiffies; | 755 | long delta = expires - jiffies; |
| 756 | |||
| 757 | if (delta < 256) | ||
| 758 | return expires; | ||
| 758 | 759 | ||
| 759 | /* No slack, if already expired else auto slack 0.4% */ | 760 | expires_limit = expires + delta / 256; |
| 760 | if (time_after(expires, now)) | ||
| 761 | expires_limit = expires + (expires - now)/256; | ||
| 762 | } | 761 | } |
| 763 | mask = expires ^ expires_limit; | 762 | mask = expires ^ expires_limit; |
| 764 | if (mask == 0) | 763 | if (mask == 0) |
| @@ -795,6 +794,8 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | |||
| 795 | */ | 794 | */ |
| 796 | int mod_timer(struct timer_list *timer, unsigned long expires) | 795 | int mod_timer(struct timer_list *timer, unsigned long expires) |
| 797 | { | 796 | { |
| 797 | expires = apply_slack(timer, expires); | ||
| 798 | |||
| 798 | /* | 799 | /* |
| 799 | * This is a common optimization triggered by the | 800 | * This is a common optimization triggered by the |
| 800 | * networking code - if the timer is re-modified | 801 | * networking code - if the timer is re-modified |
| @@ -803,8 +804,6 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 803 | if (timer_pending(timer) && timer->expires == expires) | 804 | if (timer_pending(timer) && timer->expires == expires) |
| 804 | return 1; | 805 | return 1; |
| 805 | 806 | ||
| 806 | expires = apply_slack(timer, expires); | ||
| 807 | |||
| 808 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 807 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
| 809 | } | 808 | } |
| 810 | EXPORT_SYMBOL(mod_timer); | 809 | EXPORT_SYMBOL(mod_timer); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1ee417fcbfa5..c3e4575e7829 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | 32 | ||
| 33 | #include <trace/events/sched.h> | 33 | #include <trace/events/sched.h> |
| 34 | 34 | ||
| 35 | #include <asm/ftrace.h> | ||
| 36 | #include <asm/setup.h> | 35 | #include <asm/setup.h> |
| 37 | 36 | ||
| 38 | #include "trace_output.h" | 37 | #include "trace_output.h" |
| @@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly; | |||
| 82 | 81 | ||
| 83 | static DEFINE_MUTEX(ftrace_lock); | 82 | static DEFINE_MUTEX(ftrace_lock); |
| 84 | 83 | ||
| 85 | static struct ftrace_ops ftrace_list_end __read_mostly = | 84 | static struct ftrace_ops ftrace_list_end __read_mostly = { |
| 86 | { | ||
| 87 | .func = ftrace_stub, | 85 | .func = ftrace_stub, |
| 88 | }; | 86 | }; |
| 89 | 87 | ||
| 90 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; | 88 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; |
| 91 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; | 89 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; |
| 92 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; | 90 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; |
| 91 | static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; | ||
| 93 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; | 92 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; |
| 94 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; | 93 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; |
| 95 | static struct ftrace_ops global_ops; | 94 | static struct ftrace_ops global_ops; |
| @@ -148,9 +147,11 @@ void clear_ftrace_function(void) | |||
| 148 | { | 147 | { |
| 149 | ftrace_trace_function = ftrace_stub; | 148 | ftrace_trace_function = ftrace_stub; |
| 150 | __ftrace_trace_function = ftrace_stub; | 149 | __ftrace_trace_function = ftrace_stub; |
| 150 | __ftrace_trace_function_delay = ftrace_stub; | ||
| 151 | ftrace_pid_function = ftrace_stub; | 151 | ftrace_pid_function = ftrace_stub; |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | #undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
| 154 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | 155 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST |
| 155 | /* | 156 | /* |
| 156 | * For those archs that do not test ftrace_trace_stop in their | 157 | * For those archs that do not test ftrace_trace_stop in their |
| @@ -210,7 +211,12 @@ static void update_ftrace_function(void) | |||
| 210 | #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | 211 | #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST |
| 211 | ftrace_trace_function = func; | 212 | ftrace_trace_function = func; |
| 212 | #else | 213 | #else |
| 214 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
| 215 | /* do not update till all functions have been modified */ | ||
| 216 | __ftrace_trace_function_delay = func; | ||
| 217 | #else | ||
| 213 | __ftrace_trace_function = func; | 218 | __ftrace_trace_function = func; |
| 219 | #endif | ||
| 214 | ftrace_trace_function = ftrace_test_stop_func; | 220 | ftrace_trace_function = ftrace_test_stop_func; |
| 215 | #endif | 221 | #endif |
| 216 | } | 222 | } |
| @@ -785,8 +791,7 @@ static void unregister_ftrace_profiler(void) | |||
| 785 | unregister_ftrace_graph(); | 791 | unregister_ftrace_graph(); |
| 786 | } | 792 | } |
| 787 | #else | 793 | #else |
| 788 | static struct ftrace_ops ftrace_profile_ops __read_mostly = | 794 | static struct ftrace_ops ftrace_profile_ops __read_mostly = { |
| 789 | { | ||
| 790 | .func = function_profile_call, | 795 | .func = function_profile_call, |
| 791 | }; | 796 | }; |
| 792 | 797 | ||
| @@ -806,19 +811,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, | |||
| 806 | size_t cnt, loff_t *ppos) | 811 | size_t cnt, loff_t *ppos) |
| 807 | { | 812 | { |
| 808 | unsigned long val; | 813 | unsigned long val; |
| 809 | char buf[64]; /* big enough to hold a number */ | ||
| 810 | int ret; | 814 | int ret; |
| 811 | 815 | ||
| 812 | if (cnt >= sizeof(buf)) | 816 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 813 | return -EINVAL; | 817 | if (ret) |
| 814 | |||
| 815 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 816 | return -EFAULT; | ||
| 817 | |||
| 818 | buf[cnt] = 0; | ||
| 819 | |||
| 820 | ret = strict_strtoul(buf, 10, &val); | ||
| 821 | if (ret < 0) | ||
| 822 | return ret; | 818 | return ret; |
| 823 | 819 | ||
| 824 | val = !!val; | 820 | val = !!val; |
| @@ -1182,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) | |||
| 1182 | return NULL; | 1178 | return NULL; |
| 1183 | } | 1179 | } |
| 1184 | 1180 | ||
| 1181 | static void | ||
| 1182 | ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash); | ||
| 1183 | static void | ||
| 1184 | ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash); | ||
| 1185 | |||
| 1185 | static int | 1186 | static int |
| 1186 | ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | 1187 | ftrace_hash_move(struct ftrace_ops *ops, int enable, |
| 1188 | struct ftrace_hash **dst, struct ftrace_hash *src) | ||
| 1187 | { | 1189 | { |
| 1188 | struct ftrace_func_entry *entry; | 1190 | struct ftrace_func_entry *entry; |
| 1189 | struct hlist_node *tp, *tn; | 1191 | struct hlist_node *tp, *tn; |
| @@ -1193,9 +1195,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | |||
| 1193 | unsigned long key; | 1195 | unsigned long key; |
| 1194 | int size = src->count; | 1196 | int size = src->count; |
| 1195 | int bits = 0; | 1197 | int bits = 0; |
| 1198 | int ret; | ||
| 1196 | int i; | 1199 | int i; |
| 1197 | 1200 | ||
| 1198 | /* | 1201 | /* |
| 1202 | * Remove the current set, update the hash and add | ||
| 1203 | * them back. | ||
| 1204 | */ | ||
| 1205 | ftrace_hash_rec_disable(ops, enable); | ||
| 1206 | |||
| 1207 | /* | ||
| 1199 | * If the new source is empty, just free dst and assign it | 1208 | * If the new source is empty, just free dst and assign it |
| 1200 | * the empty_hash. | 1209 | * the empty_hash. |
| 1201 | */ | 1210 | */ |
| @@ -1215,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | |||
| 1215 | if (bits > FTRACE_HASH_MAX_BITS) | 1224 | if (bits > FTRACE_HASH_MAX_BITS) |
| 1216 | bits = FTRACE_HASH_MAX_BITS; | 1225 | bits = FTRACE_HASH_MAX_BITS; |
| 1217 | 1226 | ||
| 1227 | ret = -ENOMEM; | ||
| 1218 | new_hash = alloc_ftrace_hash(bits); | 1228 | new_hash = alloc_ftrace_hash(bits); |
| 1219 | if (!new_hash) | 1229 | if (!new_hash) |
| 1220 | return -ENOMEM; | 1230 | goto out; |
| 1221 | 1231 | ||
| 1222 | size = 1 << src->size_bits; | 1232 | size = 1 << src->size_bits; |
| 1223 | for (i = 0; i < size; i++) { | 1233 | for (i = 0; i < size; i++) { |
| @@ -1236,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) | |||
| 1236 | rcu_assign_pointer(*dst, new_hash); | 1246 | rcu_assign_pointer(*dst, new_hash); |
| 1237 | free_ftrace_hash_rcu(old_hash); | 1247 | free_ftrace_hash_rcu(old_hash); |
| 1238 | 1248 | ||
| 1239 | return 0; | 1249 | ret = 0; |
| 1250 | out: | ||
| 1251 | /* | ||
| 1252 | * Enable regardless of ret: | ||
| 1253 | * On success, we enable the new hash. | ||
| 1254 | * On failure, we re-enable the original hash. | ||
| 1255 | */ | ||
| 1256 | ftrace_hash_rec_enable(ops, enable); | ||
| 1257 | |||
| 1258 | return ret; | ||
| 1240 | } | 1259 | } |
| 1241 | 1260 | ||
| 1242 | /* | 1261 | /* |
| @@ -1596,6 +1615,12 @@ static int __ftrace_modify_code(void *data) | |||
| 1596 | { | 1615 | { |
| 1597 | int *command = data; | 1616 | int *command = data; |
| 1598 | 1617 | ||
| 1618 | /* | ||
| 1619 | * Do not call function tracer while we update the code. | ||
| 1620 | * We are in stop machine, no worrying about races. | ||
| 1621 | */ | ||
| 1622 | function_trace_stop++; | ||
| 1623 | |||
| 1599 | if (*command & FTRACE_ENABLE_CALLS) | 1624 | if (*command & FTRACE_ENABLE_CALLS) |
| 1600 | ftrace_replace_code(1); | 1625 | ftrace_replace_code(1); |
| 1601 | else if (*command & FTRACE_DISABLE_CALLS) | 1626 | else if (*command & FTRACE_DISABLE_CALLS) |
| @@ -1609,6 +1634,18 @@ static int __ftrace_modify_code(void *data) | |||
| 1609 | else if (*command & FTRACE_STOP_FUNC_RET) | 1634 | else if (*command & FTRACE_STOP_FUNC_RET) |
| 1610 | ftrace_disable_ftrace_graph_caller(); | 1635 | ftrace_disable_ftrace_graph_caller(); |
| 1611 | 1636 | ||
| 1637 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
| 1638 | /* | ||
| 1639 | * For archs that call ftrace_test_stop_func(), we must | ||
| 1640 | * wait till after we update all the function callers | ||
| 1641 | * before we update the callback. This keeps different | ||
| 1642 | * ops that record different functions from corrupting | ||
| 1643 | * each other. | ||
| 1644 | */ | ||
| 1645 | __ftrace_trace_function = __ftrace_trace_function_delay; | ||
| 1646 | #endif | ||
| 1647 | function_trace_stop--; | ||
| 1648 | |||
| 1612 | return 0; | 1649 | return 0; |
| 1613 | } | 1650 | } |
| 1614 | 1651 | ||
| @@ -1744,10 +1781,36 @@ static cycle_t ftrace_update_time; | |||
| 1744 | static unsigned long ftrace_update_cnt; | 1781 | static unsigned long ftrace_update_cnt; |
| 1745 | unsigned long ftrace_update_tot_cnt; | 1782 | unsigned long ftrace_update_tot_cnt; |
| 1746 | 1783 | ||
| 1784 | static int ops_traces_mod(struct ftrace_ops *ops) | ||
| 1785 | { | ||
| 1786 | struct ftrace_hash *hash; | ||
| 1787 | |||
| 1788 | hash = ops->filter_hash; | ||
| 1789 | return !!(!hash || !hash->count); | ||
| 1790 | } | ||
| 1791 | |||
| 1747 | static int ftrace_update_code(struct module *mod) | 1792 | static int ftrace_update_code(struct module *mod) |
| 1748 | { | 1793 | { |
| 1749 | struct dyn_ftrace *p; | 1794 | struct dyn_ftrace *p; |
| 1750 | cycle_t start, stop; | 1795 | cycle_t start, stop; |
| 1796 | unsigned long ref = 0; | ||
| 1797 | |||
| 1798 | /* | ||
| 1799 | * When adding a module, we need to check if tracers are | ||
| 1800 | * currently enabled and if they are set to trace all functions. | ||
| 1801 | * If they are, we need to enable the module functions as well | ||
| 1802 | * as update the reference counts for those function records. | ||
| 1803 | */ | ||
| 1804 | if (mod) { | ||
| 1805 | struct ftrace_ops *ops; | ||
| 1806 | |||
| 1807 | for (ops = ftrace_ops_list; | ||
| 1808 | ops != &ftrace_list_end; ops = ops->next) { | ||
| 1809 | if (ops->flags & FTRACE_OPS_FL_ENABLED && | ||
| 1810 | ops_traces_mod(ops)) | ||
| 1811 | ref++; | ||
| 1812 | } | ||
| 1813 | } | ||
| 1751 | 1814 | ||
| 1752 | start = ftrace_now(raw_smp_processor_id()); | 1815 | start = ftrace_now(raw_smp_processor_id()); |
| 1753 | ftrace_update_cnt = 0; | 1816 | ftrace_update_cnt = 0; |
| @@ -1760,7 +1823,7 @@ static int ftrace_update_code(struct module *mod) | |||
| 1760 | 1823 | ||
| 1761 | p = ftrace_new_addrs; | 1824 | p = ftrace_new_addrs; |
| 1762 | ftrace_new_addrs = p->newlist; | 1825 | ftrace_new_addrs = p->newlist; |
| 1763 | p->flags = 0L; | 1826 | p->flags = ref; |
| 1764 | 1827 | ||
| 1765 | /* | 1828 | /* |
| 1766 | * Do the initial record conversion from mcount jump | 1829 | * Do the initial record conversion from mcount jump |
| @@ -1783,7 +1846,7 @@ static int ftrace_update_code(struct module *mod) | |||
| 1783 | * conversion puts the module to the correct state, thus | 1846 | * conversion puts the module to the correct state, thus |
| 1784 | * passing the ftrace_make_call check. | 1847 | * passing the ftrace_make_call check. |
| 1785 | */ | 1848 | */ |
| 1786 | if (ftrace_start_up) { | 1849 | if (ftrace_start_up && ref) { |
| 1787 | int failed = __ftrace_replace_code(p, 1); | 1850 | int failed = __ftrace_replace_code(p, 1); |
| 1788 | if (failed) { | 1851 | if (failed) { |
| 1789 | ftrace_bug(failed, p->ip); | 1852 | ftrace_bug(failed, p->ip); |
| @@ -2407,10 +2470,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod) | |||
| 2407 | */ | 2470 | */ |
| 2408 | 2471 | ||
| 2409 | static int | 2472 | static int |
| 2410 | ftrace_mod_callback(char *func, char *cmd, char *param, int enable) | 2473 | ftrace_mod_callback(struct ftrace_hash *hash, |
| 2474 | char *func, char *cmd, char *param, int enable) | ||
| 2411 | { | 2475 | { |
| 2412 | struct ftrace_ops *ops = &global_ops; | ||
| 2413 | struct ftrace_hash *hash; | ||
| 2414 | char *mod; | 2476 | char *mod; |
| 2415 | int ret = -EINVAL; | 2477 | int ret = -EINVAL; |
| 2416 | 2478 | ||
| @@ -2430,11 +2492,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable) | |||
| 2430 | if (!strlen(mod)) | 2492 | if (!strlen(mod)) |
| 2431 | return ret; | 2493 | return ret; |
| 2432 | 2494 | ||
| 2433 | if (enable) | ||
| 2434 | hash = ops->filter_hash; | ||
| 2435 | else | ||
| 2436 | hash = ops->notrace_hash; | ||
| 2437 | |||
| 2438 | ret = ftrace_match_module_records(hash, func, mod); | 2495 | ret = ftrace_match_module_records(hash, func, mod); |
| 2439 | if (!ret) | 2496 | if (!ret) |
| 2440 | ret = -EINVAL; | 2497 | ret = -EINVAL; |
| @@ -2740,7 +2797,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash, | |||
| 2740 | { | 2797 | { |
| 2741 | char *func, *command, *next = buff; | 2798 | char *func, *command, *next = buff; |
| 2742 | struct ftrace_func_command *p; | 2799 | struct ftrace_func_command *p; |
| 2743 | int ret; | 2800 | int ret = -EINVAL; |
| 2744 | 2801 | ||
| 2745 | func = strsep(&next, ":"); | 2802 | func = strsep(&next, ":"); |
| 2746 | 2803 | ||
| @@ -2760,7 +2817,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash, | |||
| 2760 | mutex_lock(&ftrace_cmd_mutex); | 2817 | mutex_lock(&ftrace_cmd_mutex); |
| 2761 | list_for_each_entry(p, &ftrace_commands, list) { | 2818 | list_for_each_entry(p, &ftrace_commands, list) { |
| 2762 | if (strcmp(p->name, command) == 0) { | 2819 | if (strcmp(p->name, command) == 0) { |
| 2763 | ret = p->func(func, command, next, enable); | 2820 | ret = p->func(hash, func, command, next, enable); |
| 2764 | goto out_unlock; | 2821 | goto out_unlock; |
| 2765 | } | 2822 | } |
| 2766 | } | 2823 | } |
| @@ -2857,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
| 2857 | ftrace_match_records(hash, buf, len); | 2914 | ftrace_match_records(hash, buf, len); |
| 2858 | 2915 | ||
| 2859 | mutex_lock(&ftrace_lock); | 2916 | mutex_lock(&ftrace_lock); |
| 2860 | ret = ftrace_hash_move(orig_hash, hash); | 2917 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
| 2918 | if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED | ||
| 2919 | && ftrace_enabled) | ||
| 2920 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | ||
| 2921 | |||
| 2861 | mutex_unlock(&ftrace_lock); | 2922 | mutex_unlock(&ftrace_lock); |
| 2862 | 2923 | ||
| 2863 | mutex_unlock(&ftrace_regex_lock); | 2924 | mutex_unlock(&ftrace_regex_lock); |
| @@ -3040,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file) | |||
| 3040 | orig_hash = &iter->ops->notrace_hash; | 3101 | orig_hash = &iter->ops->notrace_hash; |
| 3041 | 3102 | ||
| 3042 | mutex_lock(&ftrace_lock); | 3103 | mutex_lock(&ftrace_lock); |
| 3043 | /* | 3104 | ret = ftrace_hash_move(iter->ops, filter_hash, |
| 3044 | * Remove the current set, update the hash and add | 3105 | orig_hash, iter->hash); |
| 3045 | * them back. | 3106 | if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) |
| 3046 | */ | 3107 | && ftrace_enabled) |
| 3047 | ftrace_hash_rec_disable(iter->ops, filter_hash); | 3108 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
| 3048 | ret = ftrace_hash_move(orig_hash, iter->hash); | 3109 | |
| 3049 | if (!ret) { | ||
| 3050 | ftrace_hash_rec_enable(iter->ops, filter_hash); | ||
| 3051 | if (iter->ops->flags & FTRACE_OPS_FL_ENABLED | ||
| 3052 | && ftrace_enabled) | ||
| 3053 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | ||
| 3054 | } | ||
| 3055 | mutex_unlock(&ftrace_lock); | 3110 | mutex_unlock(&ftrace_lock); |
| 3056 | } | 3111 | } |
| 3057 | free_ftrace_hash(iter->hash); | 3112 | free_ftrace_hash(iter->hash); |
| @@ -3330,6 +3385,7 @@ static int ftrace_process_locs(struct module *mod, | |||
| 3330 | { | 3385 | { |
| 3331 | unsigned long *p; | 3386 | unsigned long *p; |
| 3332 | unsigned long addr; | 3387 | unsigned long addr; |
| 3388 | unsigned long flags = 0; /* Shut up gcc */ | ||
| 3333 | 3389 | ||
| 3334 | mutex_lock(&ftrace_lock); | 3390 | mutex_lock(&ftrace_lock); |
| 3335 | p = start; | 3391 | p = start; |
| @@ -3346,7 +3402,19 @@ static int ftrace_process_locs(struct module *mod, | |||
| 3346 | ftrace_record_ip(addr); | 3402 | ftrace_record_ip(addr); |
| 3347 | } | 3403 | } |
| 3348 | 3404 | ||
| 3405 | /* | ||
| 3406 | * We only need to disable interrupts on start up | ||
| 3407 | * because we are modifying code that an interrupt | ||
| 3408 | * may execute, and the modification is not atomic. | ||
| 3409 | * But for modules, nothing runs the code we modify | ||
| 3410 | * until we are finished with it, and there's no | ||
| 3411 | * reason to cause large interrupt latencies while we do it. | ||
| 3412 | */ | ||
| 3413 | if (!mod) | ||
| 3414 | local_irq_save(flags); | ||
| 3349 | ftrace_update_code(mod); | 3415 | ftrace_update_code(mod); |
| 3416 | if (!mod) | ||
| 3417 | local_irq_restore(flags); | ||
| 3350 | mutex_unlock(&ftrace_lock); | 3418 | mutex_unlock(&ftrace_lock); |
| 3351 | 3419 | ||
| 3352 | return 0; | 3420 | return 0; |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b0c7aa407943..731201bf4acc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 997 | unsigned nr_pages) | 997 | unsigned nr_pages) |
| 998 | { | 998 | { |
| 999 | struct buffer_page *bpage, *tmp; | 999 | struct buffer_page *bpage, *tmp; |
| 1000 | unsigned long addr; | ||
| 1001 | LIST_HEAD(pages); | 1000 | LIST_HEAD(pages); |
| 1002 | unsigned i; | 1001 | unsigned i; |
| 1003 | 1002 | ||
| 1004 | WARN_ON(!nr_pages); | 1003 | WARN_ON(!nr_pages); |
| 1005 | 1004 | ||
| 1006 | for (i = 0; i < nr_pages; i++) { | 1005 | for (i = 0; i < nr_pages; i++) { |
| 1006 | struct page *page; | ||
| 1007 | /* | ||
| 1008 | * __GFP_NORETRY flag makes sure that the allocation fails | ||
| 1009 | * gracefully without invoking oom-killer and the system is | ||
| 1010 | * not destabilized. | ||
| 1011 | */ | ||
| 1007 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1012 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
| 1008 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 1013 | GFP_KERNEL | __GFP_NORETRY, |
| 1014 | cpu_to_node(cpu_buffer->cpu)); | ||
| 1009 | if (!bpage) | 1015 | if (!bpage) |
| 1010 | goto free_pages; | 1016 | goto free_pages; |
| 1011 | 1017 | ||
| @@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1013 | 1019 | ||
| 1014 | list_add(&bpage->list, &pages); | 1020 | list_add(&bpage->list, &pages); |
| 1015 | 1021 | ||
| 1016 | addr = __get_free_page(GFP_KERNEL); | 1022 | page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), |
| 1017 | if (!addr) | 1023 | GFP_KERNEL | __GFP_NORETRY, 0); |
| 1024 | if (!page) | ||
| 1018 | goto free_pages; | 1025 | goto free_pages; |
| 1019 | bpage->page = (void *)addr; | 1026 | bpage->page = page_address(page); |
| 1020 | rb_init_page(bpage->page); | 1027 | rb_init_page(bpage->page); |
| 1021 | } | 1028 | } |
| 1022 | 1029 | ||
| @@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
| 1045 | { | 1052 | { |
| 1046 | struct ring_buffer_per_cpu *cpu_buffer; | 1053 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1047 | struct buffer_page *bpage; | 1054 | struct buffer_page *bpage; |
| 1048 | unsigned long addr; | 1055 | struct page *page; |
| 1049 | int ret; | 1056 | int ret; |
| 1050 | 1057 | ||
| 1051 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), | 1058 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), |
| @@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
| 1067 | rb_check_bpage(cpu_buffer, bpage); | 1074 | rb_check_bpage(cpu_buffer, bpage); |
| 1068 | 1075 | ||
| 1069 | cpu_buffer->reader_page = bpage; | 1076 | cpu_buffer->reader_page = bpage; |
| 1070 | addr = __get_free_page(GFP_KERNEL); | 1077 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); |
| 1071 | if (!addr) | 1078 | if (!page) |
| 1072 | goto fail_free_reader; | 1079 | goto fail_free_reader; |
| 1073 | bpage->page = (void *)addr; | 1080 | bpage->page = page_address(page); |
| 1074 | rb_init_page(bpage->page); | 1081 | rb_init_page(bpage->page); |
| 1075 | 1082 | ||
| 1076 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 1083 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
| @@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 1314 | unsigned nr_pages, rm_pages, new_pages; | 1321 | unsigned nr_pages, rm_pages, new_pages; |
| 1315 | struct buffer_page *bpage, *tmp; | 1322 | struct buffer_page *bpage, *tmp; |
| 1316 | unsigned long buffer_size; | 1323 | unsigned long buffer_size; |
| 1317 | unsigned long addr; | ||
| 1318 | LIST_HEAD(pages); | 1324 | LIST_HEAD(pages); |
| 1319 | int i, cpu; | 1325 | int i, cpu; |
| 1320 | 1326 | ||
| @@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 1375 | 1381 | ||
| 1376 | for_each_buffer_cpu(buffer, cpu) { | 1382 | for_each_buffer_cpu(buffer, cpu) { |
| 1377 | for (i = 0; i < new_pages; i++) { | 1383 | for (i = 0; i < new_pages; i++) { |
| 1384 | struct page *page; | ||
| 1385 | /* | ||
| 1386 | * __GFP_NORETRY flag makes sure that the allocation | ||
| 1387 | * fails gracefully without invoking oom-killer and | ||
| 1388 | * the system is not destabilized. | ||
| 1389 | */ | ||
| 1378 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), | 1390 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), |
| 1379 | cache_line_size()), | 1391 | cache_line_size()), |
| 1380 | GFP_KERNEL, cpu_to_node(cpu)); | 1392 | GFP_KERNEL | __GFP_NORETRY, |
| 1393 | cpu_to_node(cpu)); | ||
| 1381 | if (!bpage) | 1394 | if (!bpage) |
| 1382 | goto free_pages; | 1395 | goto free_pages; |
| 1383 | list_add(&bpage->list, &pages); | 1396 | list_add(&bpage->list, &pages); |
| 1384 | addr = __get_free_page(GFP_KERNEL); | 1397 | page = alloc_pages_node(cpu_to_node(cpu), |
| 1385 | if (!addr) | 1398 | GFP_KERNEL | __GFP_NORETRY, 0); |
| 1399 | if (!page) | ||
| 1386 | goto free_pages; | 1400 | goto free_pages; |
| 1387 | bpage->page = (void *)addr; | 1401 | bpage->page = page_address(page); |
| 1388 | rb_init_page(bpage->page); | 1402 | rb_init_page(bpage->page); |
| 1389 | } | 1403 | } |
| 1390 | } | 1404 | } |
| @@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | |||
| 3730 | * Returns: | 3744 | * Returns: |
| 3731 | * The page allocated, or NULL on error. | 3745 | * The page allocated, or NULL on error. |
| 3732 | */ | 3746 | */ |
| 3733 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | 3747 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) |
| 3734 | { | 3748 | { |
| 3735 | struct buffer_data_page *bpage; | 3749 | struct buffer_data_page *bpage; |
| 3736 | unsigned long addr; | 3750 | struct page *page; |
| 3737 | 3751 | ||
| 3738 | addr = __get_free_page(GFP_KERNEL); | 3752 | page = alloc_pages_node(cpu_to_node(cpu), |
| 3739 | if (!addr) | 3753 | GFP_KERNEL | __GFP_NORETRY, 0); |
| 3754 | if (!page) | ||
| 3740 | return NULL; | 3755 | return NULL; |
| 3741 | 3756 | ||
| 3742 | bpage = (void *)addr; | 3757 | bpage = page_address(page); |
| 3743 | 3758 | ||
| 3744 | rb_init_page(bpage); | 3759 | rb_init_page(bpage); |
| 3745 | 3760 | ||
| @@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
| 3978 | size_t cnt, loff_t *ppos) | 3993 | size_t cnt, loff_t *ppos) |
| 3979 | { | 3994 | { |
| 3980 | unsigned long *p = filp->private_data; | 3995 | unsigned long *p = filp->private_data; |
| 3981 | char buf[64]; | ||
| 3982 | unsigned long val; | 3996 | unsigned long val; |
| 3983 | int ret; | 3997 | int ret; |
| 3984 | 3998 | ||
| 3985 | if (cnt >= sizeof(buf)) | 3999 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 3986 | return -EINVAL; | 4000 | if (ret) |
| 3987 | |||
| 3988 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 3989 | return -EFAULT; | ||
| 3990 | |||
| 3991 | buf[cnt] = 0; | ||
| 3992 | |||
| 3993 | ret = strict_strtoul(buf, 10, &val); | ||
| 3994 | if (ret < 0) | ||
| 3995 | return ret; | 4001 | return ret; |
| 3996 | 4002 | ||
| 3997 | if (val) | 4003 | if (val) |
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 302f8a614635..a5457d577b98 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
| @@ -106,7 +106,7 @@ static enum event_status read_page(int cpu) | |||
| 106 | int inc; | 106 | int inc; |
| 107 | int i; | 107 | int i; |
| 108 | 108 | ||
| 109 | bpage = ring_buffer_alloc_read_page(buffer); | 109 | bpage = ring_buffer_alloc_read_page(buffer, cpu); |
| 110 | if (!bpage) | 110 | if (!bpage) |
| 111 | return EVENT_DROPPED; | 111 | return EVENT_DROPPED; |
| 112 | 112 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee9c921d7f21..e5df02c69b1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
| 343 | static int trace_stop_count; | 343 | static int trace_stop_count; |
| 344 | static DEFINE_SPINLOCK(tracing_start_lock); | 344 | static DEFINE_SPINLOCK(tracing_start_lock); |
| 345 | 345 | ||
| 346 | static void wakeup_work_handler(struct work_struct *work) | ||
| 347 | { | ||
| 348 | wake_up(&trace_wait); | ||
| 349 | } | ||
| 350 | |||
| 351 | static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); | ||
| 352 | |||
| 346 | /** | 353 | /** |
| 347 | * trace_wake_up - wake up tasks waiting for trace input | 354 | * trace_wake_up - wake up tasks waiting for trace input |
| 348 | * | 355 | * |
| 349 | * Simply wakes up any task that is blocked on the trace_wait | 356 | * Schedules a delayed work to wake up any task that is blocked on the |
| 350 | * queue. These is used with trace_poll for tasks polling the trace. | 357 | * trace_wait queue. These is used with trace_poll for tasks polling the |
| 358 | * trace. | ||
| 351 | */ | 359 | */ |
| 352 | void trace_wake_up(void) | 360 | void trace_wake_up(void) |
| 353 | { | 361 | { |
| 354 | int cpu; | 362 | const unsigned long delay = msecs_to_jiffies(2); |
| 355 | 363 | ||
| 356 | if (trace_flags & TRACE_ITER_BLOCK) | 364 | if (trace_flags & TRACE_ITER_BLOCK) |
| 357 | return; | 365 | return; |
| 358 | /* | 366 | schedule_delayed_work(&wakeup_work, delay); |
| 359 | * The runqueue_is_locked() can fail, but this is the best we | ||
| 360 | * have for now: | ||
| 361 | */ | ||
| 362 | cpu = get_cpu(); | ||
| 363 | if (!runqueue_is_locked(cpu)) | ||
| 364 | wake_up(&trace_wait); | ||
| 365 | put_cpu(); | ||
| 366 | } | 367 | } |
| 367 | 368 | ||
| 368 | static int __init set_buf_size(char *str) | 369 | static int __init set_buf_size(char *str) |
| @@ -424,6 +425,7 @@ static const char *trace_options[] = { | |||
| 424 | "graph-time", | 425 | "graph-time", |
| 425 | "record-cmd", | 426 | "record-cmd", |
| 426 | "overwrite", | 427 | "overwrite", |
| 428 | "disable_on_free", | ||
| 427 | NULL | 429 | NULL |
| 428 | }; | 430 | }; |
| 429 | 431 | ||
| @@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, | |||
| 1191 | } | 1193 | } |
| 1192 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); | 1194 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); |
| 1193 | 1195 | ||
| 1196 | void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, | ||
| 1197 | struct ring_buffer_event *event, | ||
| 1198 | unsigned long flags, int pc, | ||
| 1199 | struct pt_regs *regs) | ||
| 1200 | { | ||
| 1201 | ring_buffer_unlock_commit(buffer, event); | ||
| 1202 | |||
| 1203 | ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); | ||
| 1204 | ftrace_trace_userstack(buffer, flags, pc); | ||
| 1205 | } | ||
| 1206 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); | ||
| 1207 | |||
| 1194 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, | 1208 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, |
| 1195 | struct ring_buffer_event *event) | 1209 | struct ring_buffer_event *event) |
| 1196 | { | 1210 | { |
| @@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, | |||
| 1234 | } | 1248 | } |
| 1235 | 1249 | ||
| 1236 | #ifdef CONFIG_STACKTRACE | 1250 | #ifdef CONFIG_STACKTRACE |
| 1251 | |||
| 1252 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) | ||
| 1253 | struct ftrace_stack { | ||
| 1254 | unsigned long calls[FTRACE_STACK_MAX_ENTRIES]; | ||
| 1255 | }; | ||
| 1256 | |||
| 1257 | static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack); | ||
| 1258 | static DEFINE_PER_CPU(int, ftrace_stack_reserve); | ||
| 1259 | |||
| 1237 | static void __ftrace_trace_stack(struct ring_buffer *buffer, | 1260 | static void __ftrace_trace_stack(struct ring_buffer *buffer, |
| 1238 | unsigned long flags, | 1261 | unsigned long flags, |
| 1239 | int skip, int pc) | 1262 | int skip, int pc, struct pt_regs *regs) |
| 1240 | { | 1263 | { |
| 1241 | struct ftrace_event_call *call = &event_kernel_stack; | 1264 | struct ftrace_event_call *call = &event_kernel_stack; |
| 1242 | struct ring_buffer_event *event; | 1265 | struct ring_buffer_event *event; |
| 1243 | struct stack_entry *entry; | 1266 | struct stack_entry *entry; |
| 1244 | struct stack_trace trace; | 1267 | struct stack_trace trace; |
| 1268 | int use_stack; | ||
| 1269 | int size = FTRACE_STACK_ENTRIES; | ||
| 1270 | |||
| 1271 | trace.nr_entries = 0; | ||
| 1272 | trace.skip = skip; | ||
| 1273 | |||
| 1274 | /* | ||
| 1275 | * Since events can happen in NMIs there's no safe way to | ||
| 1276 | * use the per cpu ftrace_stacks. We reserve it and if an interrupt | ||
| 1277 | * or NMI comes in, it will just have to use the default | ||
| 1278 | * FTRACE_STACK_SIZE. | ||
| 1279 | */ | ||
| 1280 | preempt_disable_notrace(); | ||
| 1281 | |||
| 1282 | use_stack = ++__get_cpu_var(ftrace_stack_reserve); | ||
| 1283 | /* | ||
| 1284 | * We don't need any atomic variables, just a barrier. | ||
| 1285 | * If an interrupt comes in, we don't care, because it would | ||
| 1286 | * have exited and put the counter back to what we want. | ||
| 1287 | * We just need a barrier to keep gcc from moving things | ||
| 1288 | * around. | ||
| 1289 | */ | ||
| 1290 | barrier(); | ||
| 1291 | if (use_stack == 1) { | ||
| 1292 | trace.entries = &__get_cpu_var(ftrace_stack).calls[0]; | ||
| 1293 | trace.max_entries = FTRACE_STACK_MAX_ENTRIES; | ||
| 1294 | |||
| 1295 | if (regs) | ||
| 1296 | save_stack_trace_regs(regs, &trace); | ||
| 1297 | else | ||
| 1298 | save_stack_trace(&trace); | ||
| 1299 | |||
| 1300 | if (trace.nr_entries > size) | ||
| 1301 | size = trace.nr_entries; | ||
| 1302 | } else | ||
| 1303 | /* From now on, use_stack is a boolean */ | ||
| 1304 | use_stack = 0; | ||
| 1305 | |||
| 1306 | size *= sizeof(unsigned long); | ||
| 1245 | 1307 | ||
| 1246 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, | 1308 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, |
| 1247 | sizeof(*entry), flags, pc); | 1309 | sizeof(*entry) + size, flags, pc); |
| 1248 | if (!event) | 1310 | if (!event) |
| 1249 | return; | 1311 | goto out; |
| 1250 | entry = ring_buffer_event_data(event); | 1312 | entry = ring_buffer_event_data(event); |
| 1251 | memset(&entry->caller, 0, sizeof(entry->caller)); | ||
| 1252 | 1313 | ||
| 1253 | trace.nr_entries = 0; | 1314 | memset(&entry->caller, 0, size); |
| 1254 | trace.max_entries = FTRACE_STACK_ENTRIES; | 1315 | |
| 1255 | trace.skip = skip; | 1316 | if (use_stack) |
| 1256 | trace.entries = entry->caller; | 1317 | memcpy(&entry->caller, trace.entries, |
| 1318 | trace.nr_entries * sizeof(unsigned long)); | ||
| 1319 | else { | ||
| 1320 | trace.max_entries = FTRACE_STACK_ENTRIES; | ||
| 1321 | trace.entries = entry->caller; | ||
| 1322 | if (regs) | ||
| 1323 | save_stack_trace_regs(regs, &trace); | ||
| 1324 | else | ||
| 1325 | save_stack_trace(&trace); | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | entry->size = trace.nr_entries; | ||
| 1257 | 1329 | ||
| 1258 | save_stack_trace(&trace); | ||
| 1259 | if (!filter_check_discard(call, entry, buffer, event)) | 1330 | if (!filter_check_discard(call, entry, buffer, event)) |
| 1260 | ring_buffer_unlock_commit(buffer, event); | 1331 | ring_buffer_unlock_commit(buffer, event); |
| 1332 | |||
| 1333 | out: | ||
| 1334 | /* Again, don't let gcc optimize things here */ | ||
| 1335 | barrier(); | ||
| 1336 | __get_cpu_var(ftrace_stack_reserve)--; | ||
| 1337 | preempt_enable_notrace(); | ||
| 1338 | |||
| 1339 | } | ||
| 1340 | |||
| 1341 | void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, | ||
| 1342 | int skip, int pc, struct pt_regs *regs) | ||
| 1343 | { | ||
| 1344 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | ||
| 1345 | return; | ||
| 1346 | |||
| 1347 | __ftrace_trace_stack(buffer, flags, skip, pc, regs); | ||
| 1261 | } | 1348 | } |
| 1262 | 1349 | ||
| 1263 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | 1350 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
| @@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | |||
| 1266 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | 1353 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) |
| 1267 | return; | 1354 | return; |
| 1268 | 1355 | ||
| 1269 | __ftrace_trace_stack(buffer, flags, skip, pc); | 1356 | __ftrace_trace_stack(buffer, flags, skip, pc, NULL); |
| 1270 | } | 1357 | } |
| 1271 | 1358 | ||
| 1272 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, | 1359 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
| 1273 | int pc) | 1360 | int pc) |
| 1274 | { | 1361 | { |
| 1275 | __ftrace_trace_stack(tr->buffer, flags, skip, pc); | 1362 | __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL); |
| 1276 | } | 1363 | } |
| 1277 | 1364 | ||
| 1278 | /** | 1365 | /** |
| @@ -1288,7 +1375,7 @@ void trace_dump_stack(void) | |||
| 1288 | local_save_flags(flags); | 1375 | local_save_flags(flags); |
| 1289 | 1376 | ||
| 1290 | /* skipping 3 traces, seems to get us at the caller of this function */ | 1377 | /* skipping 3 traces, seems to get us at the caller of this function */ |
| 1291 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); | 1378 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL); |
| 1292 | } | 1379 | } |
| 1293 | 1380 | ||
| 1294 | static DEFINE_PER_CPU(int, user_stack_count); | 1381 | static DEFINE_PER_CPU(int, user_stack_count); |
| @@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | |||
| 1536 | 1623 | ||
| 1537 | ftrace_enable_cpu(); | 1624 | ftrace_enable_cpu(); |
| 1538 | 1625 | ||
| 1539 | return event ? ring_buffer_event_data(event) : NULL; | 1626 | if (event) { |
| 1627 | iter->ent_size = ring_buffer_event_length(event); | ||
| 1628 | return ring_buffer_event_data(event); | ||
| 1629 | } | ||
| 1630 | iter->ent_size = 0; | ||
| 1631 | return NULL; | ||
| 1540 | } | 1632 | } |
| 1541 | 1633 | ||
| 1542 | static struct trace_entry * | 1634 | static struct trace_entry * |
| @@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m) | |||
| 2051 | { | 2143 | { |
| 2052 | struct trace_iterator *iter = m->private; | 2144 | struct trace_iterator *iter = m->private; |
| 2053 | 2145 | ||
| 2146 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
| 2147 | return; | ||
| 2148 | |||
| 2054 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | 2149 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { |
| 2055 | /* print nothing if the buffers are empty */ | 2150 | /* print nothing if the buffers are empty */ |
| 2056 | if (trace_empty(iter)) | 2151 | if (trace_empty(iter)) |
| @@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, | |||
| 2701 | size_t cnt, loff_t *ppos) | 2796 | size_t cnt, loff_t *ppos) |
| 2702 | { | 2797 | { |
| 2703 | struct trace_array *tr = filp->private_data; | 2798 | struct trace_array *tr = filp->private_data; |
| 2704 | char buf[64]; | ||
| 2705 | unsigned long val; | 2799 | unsigned long val; |
| 2706 | int ret; | 2800 | int ret; |
| 2707 | 2801 | ||
| 2708 | if (cnt >= sizeof(buf)) | 2802 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 2709 | return -EINVAL; | 2803 | if (ret) |
| 2710 | |||
| 2711 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 2712 | return -EFAULT; | ||
| 2713 | |||
| 2714 | buf[cnt] = 0; | ||
| 2715 | |||
| 2716 | ret = strict_strtoul(buf, 10, &val); | ||
| 2717 | if (ret < 0) | ||
| 2718 | return ret; | 2804 | return ret; |
| 2719 | 2805 | ||
| 2720 | val = !!val; | 2806 | val = !!val; |
| @@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr) | |||
| 2767 | return t->init(tr); | 2853 | return t->init(tr); |
| 2768 | } | 2854 | } |
| 2769 | 2855 | ||
| 2770 | static int tracing_resize_ring_buffer(unsigned long size) | 2856 | static int __tracing_resize_ring_buffer(unsigned long size) |
| 2771 | { | 2857 | { |
| 2772 | int ret; | 2858 | int ret; |
| 2773 | 2859 | ||
| @@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size) | |||
| 2819 | return ret; | 2905 | return ret; |
| 2820 | } | 2906 | } |
| 2821 | 2907 | ||
| 2908 | static ssize_t tracing_resize_ring_buffer(unsigned long size) | ||
| 2909 | { | ||
| 2910 | int cpu, ret = size; | ||
| 2911 | |||
| 2912 | mutex_lock(&trace_types_lock); | ||
| 2913 | |||
| 2914 | tracing_stop(); | ||
| 2915 | |||
| 2916 | /* disable all cpu buffers */ | ||
| 2917 | for_each_tracing_cpu(cpu) { | ||
| 2918 | if (global_trace.data[cpu]) | ||
| 2919 | atomic_inc(&global_trace.data[cpu]->disabled); | ||
| 2920 | if (max_tr.data[cpu]) | ||
| 2921 | atomic_inc(&max_tr.data[cpu]->disabled); | ||
| 2922 | } | ||
| 2923 | |||
| 2924 | if (size != global_trace.entries) | ||
| 2925 | ret = __tracing_resize_ring_buffer(size); | ||
| 2926 | |||
| 2927 | if (ret < 0) | ||
| 2928 | ret = -ENOMEM; | ||
| 2929 | |||
| 2930 | for_each_tracing_cpu(cpu) { | ||
| 2931 | if (global_trace.data[cpu]) | ||
| 2932 | atomic_dec(&global_trace.data[cpu]->disabled); | ||
| 2933 | if (max_tr.data[cpu]) | ||
| 2934 | atomic_dec(&max_tr.data[cpu]->disabled); | ||
| 2935 | } | ||
| 2936 | |||
| 2937 | tracing_start(); | ||
| 2938 | mutex_unlock(&trace_types_lock); | ||
| 2939 | |||
| 2940 | return ret; | ||
| 2941 | } | ||
| 2942 | |||
| 2822 | 2943 | ||
| 2823 | /** | 2944 | /** |
| 2824 | * tracing_update_buffers - used by tracing facility to expand ring buffers | 2945 | * tracing_update_buffers - used by tracing facility to expand ring buffers |
| @@ -2836,7 +2957,7 @@ int tracing_update_buffers(void) | |||
| 2836 | 2957 | ||
| 2837 | mutex_lock(&trace_types_lock); | 2958 | mutex_lock(&trace_types_lock); |
| 2838 | if (!ring_buffer_expanded) | 2959 | if (!ring_buffer_expanded) |
| 2839 | ret = tracing_resize_ring_buffer(trace_buf_size); | 2960 | ret = __tracing_resize_ring_buffer(trace_buf_size); |
| 2840 | mutex_unlock(&trace_types_lock); | 2961 | mutex_unlock(&trace_types_lock); |
| 2841 | 2962 | ||
| 2842 | return ret; | 2963 | return ret; |
| @@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf) | |||
| 2860 | mutex_lock(&trace_types_lock); | 2981 | mutex_lock(&trace_types_lock); |
| 2861 | 2982 | ||
| 2862 | if (!ring_buffer_expanded) { | 2983 | if (!ring_buffer_expanded) { |
| 2863 | ret = tracing_resize_ring_buffer(trace_buf_size); | 2984 | ret = __tracing_resize_ring_buffer(trace_buf_size); |
| 2864 | if (ret < 0) | 2985 | if (ret < 0) |
| 2865 | goto out; | 2986 | goto out; |
| 2866 | ret = 0; | 2987 | ret = 0; |
| @@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, | |||
| 2966 | size_t cnt, loff_t *ppos) | 3087 | size_t cnt, loff_t *ppos) |
| 2967 | { | 3088 | { |
| 2968 | unsigned long *ptr = filp->private_data; | 3089 | unsigned long *ptr = filp->private_data; |
| 2969 | char buf[64]; | ||
| 2970 | unsigned long val; | 3090 | unsigned long val; |
| 2971 | int ret; | 3091 | int ret; |
| 2972 | 3092 | ||
| 2973 | if (cnt >= sizeof(buf)) | 3093 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 2974 | return -EINVAL; | 3094 | if (ret) |
| 2975 | |||
| 2976 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 2977 | return -EFAULT; | ||
| 2978 | |||
| 2979 | buf[cnt] = 0; | ||
| 2980 | |||
| 2981 | ret = strict_strtoul(buf, 10, &val); | ||
| 2982 | if (ret < 0) | ||
| 2983 | return ret; | 3095 | return ret; |
| 2984 | 3096 | ||
| 2985 | *ptr = val * 1000; | 3097 | *ptr = val * 1000; |
| @@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
| 3434 | size_t cnt, loff_t *ppos) | 3546 | size_t cnt, loff_t *ppos) |
| 3435 | { | 3547 | { |
| 3436 | unsigned long val; | 3548 | unsigned long val; |
| 3437 | char buf[64]; | 3549 | int ret; |
| 3438 | int ret, cpu; | ||
| 3439 | |||
| 3440 | if (cnt >= sizeof(buf)) | ||
| 3441 | return -EINVAL; | ||
| 3442 | |||
| 3443 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 3444 | return -EFAULT; | ||
| 3445 | |||
| 3446 | buf[cnt] = 0; | ||
| 3447 | 3550 | ||
| 3448 | ret = strict_strtoul(buf, 10, &val); | 3551 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 3449 | if (ret < 0) | 3552 | if (ret) |
| 3450 | return ret; | 3553 | return ret; |
| 3451 | 3554 | ||
| 3452 | /* must have at least 1 entry */ | 3555 | /* must have at least 1 entry */ |
| 3453 | if (!val) | 3556 | if (!val) |
| 3454 | return -EINVAL; | 3557 | return -EINVAL; |
| 3455 | 3558 | ||
| 3456 | mutex_lock(&trace_types_lock); | ||
| 3457 | |||
| 3458 | tracing_stop(); | ||
| 3459 | |||
| 3460 | /* disable all cpu buffers */ | ||
| 3461 | for_each_tracing_cpu(cpu) { | ||
| 3462 | if (global_trace.data[cpu]) | ||
| 3463 | atomic_inc(&global_trace.data[cpu]->disabled); | ||
| 3464 | if (max_tr.data[cpu]) | ||
| 3465 | atomic_inc(&max_tr.data[cpu]->disabled); | ||
| 3466 | } | ||
| 3467 | |||
| 3468 | /* value is in KB */ | 3559 | /* value is in KB */ |
| 3469 | val <<= 10; | 3560 | val <<= 10; |
| 3470 | 3561 | ||
| 3471 | if (val != global_trace.entries) { | 3562 | ret = tracing_resize_ring_buffer(val); |
| 3472 | ret = tracing_resize_ring_buffer(val); | 3563 | if (ret < 0) |
| 3473 | if (ret < 0) { | 3564 | return ret; |
| 3474 | cnt = ret; | ||
| 3475 | goto out; | ||
| 3476 | } | ||
| 3477 | } | ||
| 3478 | 3565 | ||
| 3479 | *ppos += cnt; | 3566 | *ppos += cnt; |
| 3480 | 3567 | ||
| 3481 | /* If check pages failed, return ENOMEM */ | 3568 | return cnt; |
| 3482 | if (tracing_disabled) | 3569 | } |
| 3483 | cnt = -ENOMEM; | ||
| 3484 | out: | ||
| 3485 | for_each_tracing_cpu(cpu) { | ||
| 3486 | if (global_trace.data[cpu]) | ||
| 3487 | atomic_dec(&global_trace.data[cpu]->disabled); | ||
| 3488 | if (max_tr.data[cpu]) | ||
| 3489 | atomic_dec(&max_tr.data[cpu]->disabled); | ||
| 3490 | } | ||
| 3491 | 3570 | ||
| 3492 | tracing_start(); | 3571 | static ssize_t |
| 3493 | mutex_unlock(&trace_types_lock); | 3572 | tracing_free_buffer_write(struct file *filp, const char __user *ubuf, |
| 3573 | size_t cnt, loff_t *ppos) | ||
| 3574 | { | ||
| 3575 | /* | ||
| 3576 | * There is no need to read what the user has written, this function | ||
| 3577 | * is just to make sure that there is no error when "echo" is used | ||
| 3578 | */ | ||
| 3579 | |||
| 3580 | *ppos += cnt; | ||
| 3494 | 3581 | ||
| 3495 | return cnt; | 3582 | return cnt; |
| 3496 | } | 3583 | } |
| 3497 | 3584 | ||
| 3585 | static int | ||
| 3586 | tracing_free_buffer_release(struct inode *inode, struct file *filp) | ||
| 3587 | { | ||
| 3588 | /* disable tracing ? */ | ||
| 3589 | if (trace_flags & TRACE_ITER_STOP_ON_FREE) | ||
| 3590 | tracing_off(); | ||
| 3591 | /* resize the ring buffer to 0 */ | ||
| 3592 | tracing_resize_ring_buffer(0); | ||
| 3593 | |||
| 3594 | return 0; | ||
| 3595 | } | ||
| 3596 | |||
| 3498 | static int mark_printk(const char *fmt, ...) | 3597 | static int mark_printk(const char *fmt, ...) |
| 3499 | { | 3598 | { |
| 3500 | int ret; | 3599 | int ret; |
| @@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = { | |||
| 3640 | .llseek = generic_file_llseek, | 3739 | .llseek = generic_file_llseek, |
| 3641 | }; | 3740 | }; |
| 3642 | 3741 | ||
| 3742 | static const struct file_operations tracing_free_buffer_fops = { | ||
| 3743 | .write = tracing_free_buffer_write, | ||
| 3744 | .release = tracing_free_buffer_release, | ||
| 3745 | }; | ||
| 3746 | |||
| 3643 | static const struct file_operations tracing_mark_fops = { | 3747 | static const struct file_operations tracing_mark_fops = { |
| 3644 | .open = tracing_open_generic, | 3748 | .open = tracing_open_generic, |
| 3645 | .write = tracing_mark_write, | 3749 | .write = tracing_mark_write, |
| @@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 3696 | return 0; | 3800 | return 0; |
| 3697 | 3801 | ||
| 3698 | if (!info->spare) | 3802 | if (!info->spare) |
| 3699 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | 3803 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu); |
| 3700 | if (!info->spare) | 3804 | if (!info->spare) |
| 3701 | return -ENOMEM; | 3805 | return -ENOMEM; |
| 3702 | 3806 | ||
| @@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3853 | 3957 | ||
| 3854 | ref->ref = 1; | 3958 | ref->ref = 1; |
| 3855 | ref->buffer = info->tr->buffer; | 3959 | ref->buffer = info->tr->buffer; |
| 3856 | ref->page = ring_buffer_alloc_read_page(ref->buffer); | 3960 | ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu); |
| 3857 | if (!ref->page) { | 3961 | if (!ref->page) { |
| 3858 | kfree(ref); | 3962 | kfree(ref); |
| 3859 | break; | 3963 | break; |
| @@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3862 | r = ring_buffer_read_page(ref->buffer, &ref->page, | 3966 | r = ring_buffer_read_page(ref->buffer, &ref->page, |
| 3863 | len, info->cpu, 1); | 3967 | len, info->cpu, 1); |
| 3864 | if (r < 0) { | 3968 | if (r < 0) { |
| 3865 | ring_buffer_free_read_page(ref->buffer, | 3969 | ring_buffer_free_read_page(ref->buffer, ref->page); |
| 3866 | ref->page); | ||
| 3867 | kfree(ref); | 3970 | kfree(ref); |
| 3868 | break; | 3971 | break; |
| 3869 | } | 3972 | } |
| @@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 4099 | { | 4202 | { |
| 4100 | struct trace_option_dentry *topt = filp->private_data; | 4203 | struct trace_option_dentry *topt = filp->private_data; |
| 4101 | unsigned long val; | 4204 | unsigned long val; |
| 4102 | char buf[64]; | ||
| 4103 | int ret; | 4205 | int ret; |
| 4104 | 4206 | ||
| 4105 | if (cnt >= sizeof(buf)) | 4207 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 4106 | return -EINVAL; | 4208 | if (ret) |
| 4107 | |||
| 4108 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 4109 | return -EFAULT; | ||
| 4110 | |||
| 4111 | buf[cnt] = 0; | ||
| 4112 | |||
| 4113 | ret = strict_strtoul(buf, 10, &val); | ||
| 4114 | if (ret < 0) | ||
| 4115 | return ret; | 4209 | return ret; |
| 4116 | 4210 | ||
| 4117 | if (val != 0 && val != 1) | 4211 | if (val != 0 && val != 1) |
| @@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 4159 | loff_t *ppos) | 4253 | loff_t *ppos) |
| 4160 | { | 4254 | { |
| 4161 | long index = (long)filp->private_data; | 4255 | long index = (long)filp->private_data; |
| 4162 | char buf[64]; | ||
| 4163 | unsigned long val; | 4256 | unsigned long val; |
| 4164 | int ret; | 4257 | int ret; |
| 4165 | 4258 | ||
| 4166 | if (cnt >= sizeof(buf)) | 4259 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 4167 | return -EINVAL; | 4260 | if (ret) |
| 4168 | |||
| 4169 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 4170 | return -EFAULT; | ||
| 4171 | |||
| 4172 | buf[cnt] = 0; | ||
| 4173 | |||
| 4174 | ret = strict_strtoul(buf, 10, &val); | ||
| 4175 | if (ret < 0) | ||
| 4176 | return ret; | 4261 | return ret; |
| 4177 | 4262 | ||
| 4178 | if (val != 0 && val != 1) | 4263 | if (val != 0 && val != 1) |
| @@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void) | |||
| 4365 | trace_create_file("buffer_size_kb", 0644, d_tracer, | 4450 | trace_create_file("buffer_size_kb", 0644, d_tracer, |
| 4366 | &global_trace, &tracing_entries_fops); | 4451 | &global_trace, &tracing_entries_fops); |
| 4367 | 4452 | ||
| 4453 | trace_create_file("free_buffer", 0644, d_tracer, | ||
| 4454 | &global_trace, &tracing_free_buffer_fops); | ||
| 4455 | |||
| 4368 | trace_create_file("trace_marker", 0220, d_tracer, | 4456 | trace_create_file("trace_marker", 0220, d_tracer, |
| 4369 | NULL, &tracing_mark_fops); | 4457 | NULL, &tracing_mark_fops); |
| 4370 | 4458 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 229f8591f61d..616846bcfee5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | #define _LINUX_KERNEL_TRACE_H | 2 | #define _LINUX_KERNEL_TRACE_H |
| 3 | 3 | ||
| 4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
| 5 | #include <asm/atomic.h> | 5 | #include <linux/atomic.h> |
| 6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/clocksource.h> | 7 | #include <linux/clocksource.h> |
| 8 | #include <linux/ring_buffer.h> | 8 | #include <linux/ring_buffer.h> |
| @@ -278,6 +278,29 @@ struct tracer { | |||
| 278 | }; | 278 | }; |
| 279 | 279 | ||
| 280 | 280 | ||
| 281 | /* Only current can touch trace_recursion */ | ||
| 282 | #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||
| 283 | #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||
| 284 | |||
| 285 | /* Ring buffer has the 10 LSB bits to count */ | ||
| 286 | #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | ||
| 287 | |||
| 288 | /* for function tracing recursion */ | ||
| 289 | #define TRACE_INTERNAL_BIT (1<<11) | ||
| 290 | #define TRACE_GLOBAL_BIT (1<<12) | ||
| 291 | /* | ||
| 292 | * Abuse of the trace_recursion. | ||
| 293 | * As we need a way to maintain state if we are tracing the function | ||
| 294 | * graph in irq because we want to trace a particular function that | ||
| 295 | * was called in irq context but we have irq tracing off. Since this | ||
| 296 | * can only be modified by current, we can reuse trace_recursion. | ||
| 297 | */ | ||
| 298 | #define TRACE_IRQ_BIT (1<<13) | ||
| 299 | |||
| 300 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) | ||
| 301 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) | ||
| 302 | #define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) | ||
| 303 | |||
| 281 | #define TRACE_PIPE_ALL_CPU -1 | 304 | #define TRACE_PIPE_ALL_CPU -1 |
| 282 | 305 | ||
| 283 | int tracer_init(struct tracer *t, struct trace_array *tr); | 306 | int tracer_init(struct tracer *t, struct trace_array *tr); |
| @@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr, | |||
| 389 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | 412 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
| 390 | int skip, int pc); | 413 | int skip, int pc); |
| 391 | 414 | ||
| 415 | void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, | ||
| 416 | int skip, int pc, struct pt_regs *regs); | ||
| 417 | |||
| 392 | void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, | 418 | void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, |
| 393 | int pc); | 419 | int pc); |
| 394 | 420 | ||
| @@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer, | |||
| 400 | { | 426 | { |
| 401 | } | 427 | } |
| 402 | 428 | ||
| 429 | static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer, | ||
| 430 | unsigned long flags, int skip, | ||
| 431 | int pc, struct pt_regs *regs) | ||
| 432 | { | ||
| 433 | } | ||
| 434 | |||
| 403 | static inline void ftrace_trace_userstack(struct ring_buffer *buffer, | 435 | static inline void ftrace_trace_userstack(struct ring_buffer *buffer, |
| 404 | unsigned long flags, int pc) | 436 | unsigned long flags, int pc) |
| 405 | { | 437 | { |
| @@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr) | |||
| 507 | return 1; | 539 | return 1; |
| 508 | 540 | ||
| 509 | for (i = 0; i < ftrace_graph_count; i++) { | 541 | for (i = 0; i < ftrace_graph_count; i++) { |
| 510 | if (addr == ftrace_graph_funcs[i]) | 542 | if (addr == ftrace_graph_funcs[i]) { |
| 543 | /* | ||
| 544 | * If no irqs are to be traced, but a set_graph_function | ||
| 545 | * is set, and called by an interrupt handler, we still | ||
| 546 | * want to trace it. | ||
| 547 | */ | ||
| 548 | if (in_irq()) | ||
| 549 | trace_recursion_set(TRACE_IRQ_BIT); | ||
| 550 | else | ||
| 551 | trace_recursion_clear(TRACE_IRQ_BIT); | ||
| 511 | return 1; | 552 | return 1; |
| 553 | } | ||
| 512 | } | 554 | } |
| 513 | 555 | ||
| 514 | return 0; | 556 | return 0; |
| @@ -609,6 +651,7 @@ enum trace_iterator_flags { | |||
| 609 | TRACE_ITER_GRAPH_TIME = 0x80000, | 651 | TRACE_ITER_GRAPH_TIME = 0x80000, |
| 610 | TRACE_ITER_RECORD_CMD = 0x100000, | 652 | TRACE_ITER_RECORD_CMD = 0x100000, |
| 611 | TRACE_ITER_OVERWRITE = 0x200000, | 653 | TRACE_ITER_OVERWRITE = 0x200000, |
| 654 | TRACE_ITER_STOP_ON_FREE = 0x400000, | ||
| 612 | }; | 655 | }; |
| 613 | 656 | ||
| 614 | /* | 657 | /* |
| @@ -677,6 +720,7 @@ struct event_subsystem { | |||
| 677 | struct dentry *entry; | 720 | struct dentry *entry; |
| 678 | struct event_filter *filter; | 721 | struct event_filter *filter; |
| 679 | int nr_events; | 722 | int nr_events; |
| 723 | int ref_count; | ||
| 680 | }; | 724 | }; |
| 681 | 725 | ||
| 682 | #define FILTER_PRED_INVALID ((unsigned short)-1) | 726 | #define FILTER_PRED_INVALID ((unsigned short)-1) |
| @@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[]; | |||
| 784 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 828 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) |
| 785 | #include "trace_entries.h" | 829 | #include "trace_entries.h" |
| 786 | 830 | ||
| 787 | /* Only current can touch trace_recursion */ | ||
| 788 | #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||
| 789 | #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||
| 790 | |||
| 791 | /* Ring buffer has the 10 LSB bits to count */ | ||
| 792 | #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | ||
| 793 | |||
| 794 | /* for function tracing recursion */ | ||
| 795 | #define TRACE_INTERNAL_BIT (1<<11) | ||
| 796 | #define TRACE_GLOBAL_BIT (1<<12) | ||
| 797 | |||
| 798 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) | ||
| 799 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) | ||
| 800 | #define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) | ||
| 801 | |||
| 802 | #endif /* _LINUX_KERNEL_TRACE_H */ | 831 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e32744c84d94..93365907f219 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
| @@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry, | |||
| 161 | TRACE_STACK, | 161 | TRACE_STACK, |
| 162 | 162 | ||
| 163 | F_STRUCT( | 163 | F_STRUCT( |
| 164 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | 164 | __field( int, size ) |
| 165 | __dynamic_array(unsigned long, caller ) | ||
| 165 | ), | 166 | ), |
| 166 | 167 | ||
| 167 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | 168 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 686ec399f2a8..581876f9f387 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -244,6 +244,35 @@ static void ftrace_clear_events(void) | |||
| 244 | mutex_unlock(&event_mutex); | 244 | mutex_unlock(&event_mutex); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | static void __put_system(struct event_subsystem *system) | ||
| 248 | { | ||
| 249 | struct event_filter *filter = system->filter; | ||
| 250 | |||
| 251 | WARN_ON_ONCE(system->ref_count == 0); | ||
| 252 | if (--system->ref_count) | ||
| 253 | return; | ||
| 254 | |||
| 255 | if (filter) { | ||
| 256 | kfree(filter->filter_string); | ||
| 257 | kfree(filter); | ||
| 258 | } | ||
| 259 | kfree(system->name); | ||
| 260 | kfree(system); | ||
| 261 | } | ||
| 262 | |||
| 263 | static void __get_system(struct event_subsystem *system) | ||
| 264 | { | ||
| 265 | WARN_ON_ONCE(system->ref_count == 0); | ||
| 266 | system->ref_count++; | ||
| 267 | } | ||
| 268 | |||
| 269 | static void put_system(struct event_subsystem *system) | ||
| 270 | { | ||
| 271 | mutex_lock(&event_mutex); | ||
| 272 | __put_system(system); | ||
| 273 | mutex_unlock(&event_mutex); | ||
| 274 | } | ||
| 275 | |||
| 247 | /* | 276 | /* |
| 248 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. | 277 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. |
| 249 | */ | 278 | */ |
| @@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 486 | loff_t *ppos) | 515 | loff_t *ppos) |
| 487 | { | 516 | { |
| 488 | struct ftrace_event_call *call = filp->private_data; | 517 | struct ftrace_event_call *call = filp->private_data; |
| 489 | char buf[64]; | ||
| 490 | unsigned long val; | 518 | unsigned long val; |
| 491 | int ret; | 519 | int ret; |
| 492 | 520 | ||
| 493 | if (cnt >= sizeof(buf)) | 521 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 494 | return -EINVAL; | 522 | if (ret) |
| 495 | |||
| 496 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 497 | return -EFAULT; | ||
| 498 | |||
| 499 | buf[cnt] = 0; | ||
| 500 | |||
| 501 | ret = strict_strtoul(buf, 10, &val); | ||
| 502 | if (ret < 0) | ||
| 503 | return ret; | 523 | return ret; |
| 504 | 524 | ||
| 505 | ret = tracing_update_buffers(); | 525 | ret = tracing_update_buffers(); |
| @@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
| 528 | loff_t *ppos) | 548 | loff_t *ppos) |
| 529 | { | 549 | { |
| 530 | const char set_to_char[4] = { '?', '0', '1', 'X' }; | 550 | const char set_to_char[4] = { '?', '0', '1', 'X' }; |
| 531 | const char *system = filp->private_data; | 551 | struct event_subsystem *system = filp->private_data; |
| 532 | struct ftrace_event_call *call; | 552 | struct ftrace_event_call *call; |
| 533 | char buf[2]; | 553 | char buf[2]; |
| 534 | int set = 0; | 554 | int set = 0; |
| @@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
| 539 | if (!call->name || !call->class || !call->class->reg) | 559 | if (!call->name || !call->class || !call->class->reg) |
| 540 | continue; | 560 | continue; |
| 541 | 561 | ||
| 542 | if (system && strcmp(call->class->system, system) != 0) | 562 | if (system && strcmp(call->class->system, system->name) != 0) |
| 543 | continue; | 563 | continue; |
| 544 | 564 | ||
| 545 | /* | 565 | /* |
| @@ -569,21 +589,13 @@ static ssize_t | |||
| 569 | system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | 589 | system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, |
| 570 | loff_t *ppos) | 590 | loff_t *ppos) |
| 571 | { | 591 | { |
| 572 | const char *system = filp->private_data; | 592 | struct event_subsystem *system = filp->private_data; |
| 593 | const char *name = NULL; | ||
| 573 | unsigned long val; | 594 | unsigned long val; |
| 574 | char buf[64]; | ||
| 575 | ssize_t ret; | 595 | ssize_t ret; |
| 576 | 596 | ||
| 577 | if (cnt >= sizeof(buf)) | 597 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
| 578 | return -EINVAL; | 598 | if (ret) |
| 579 | |||
| 580 | if (copy_from_user(&buf, ubuf, cnt)) | ||
| 581 | return -EFAULT; | ||
| 582 | |||
| 583 | buf[cnt] = 0; | ||
| 584 | |||
| 585 | ret = strict_strtoul(buf, 10, &val); | ||
| 586 | if (ret < 0) | ||
| 587 | return ret; | 599 | return ret; |
| 588 | 600 | ||
| 589 | ret = tracing_update_buffers(); | 601 | ret = tracing_update_buffers(); |
| @@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 593 | if (val != 0 && val != 1) | 605 | if (val != 0 && val != 1) |
| 594 | return -EINVAL; | 606 | return -EINVAL; |
| 595 | 607 | ||
| 596 | ret = __ftrace_set_clr_event(NULL, system, NULL, val); | 608 | /* |
| 609 | * Opening of "enable" adds a ref count to system, | ||
| 610 | * so the name is safe to use. | ||
| 611 | */ | ||
| 612 | if (system) | ||
| 613 | name = system->name; | ||
| 614 | |||
| 615 | ret = __ftrace_set_clr_event(NULL, name, NULL, val); | ||
| 597 | if (ret) | 616 | if (ret) |
| 598 | goto out; | 617 | goto out; |
| 599 | 618 | ||
| @@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 826 | return cnt; | 845 | return cnt; |
| 827 | } | 846 | } |
| 828 | 847 | ||
| 848 | static LIST_HEAD(event_subsystems); | ||
| 849 | |||
| 850 | static int subsystem_open(struct inode *inode, struct file *filp) | ||
| 851 | { | ||
| 852 | struct event_subsystem *system = NULL; | ||
| 853 | int ret; | ||
| 854 | |||
| 855 | if (!inode->i_private) | ||
| 856 | goto skip_search; | ||
| 857 | |||
| 858 | /* Make sure the system still exists */ | ||
| 859 | mutex_lock(&event_mutex); | ||
| 860 | list_for_each_entry(system, &event_subsystems, list) { | ||
| 861 | if (system == inode->i_private) { | ||
| 862 | /* Don't open systems with no events */ | ||
| 863 | if (!system->nr_events) { | ||
| 864 | system = NULL; | ||
| 865 | break; | ||
| 866 | } | ||
| 867 | __get_system(system); | ||
| 868 | break; | ||
| 869 | } | ||
| 870 | } | ||
| 871 | mutex_unlock(&event_mutex); | ||
| 872 | |||
| 873 | if (system != inode->i_private) | ||
| 874 | return -ENODEV; | ||
| 875 | |||
| 876 | skip_search: | ||
| 877 | ret = tracing_open_generic(inode, filp); | ||
| 878 | if (ret < 0 && system) | ||
| 879 | put_system(system); | ||
| 880 | |||
| 881 | return ret; | ||
| 882 | } | ||
| 883 | |||
| 884 | static int subsystem_release(struct inode *inode, struct file *file) | ||
| 885 | { | ||
| 886 | struct event_subsystem *system = inode->i_private; | ||
| 887 | |||
| 888 | if (system) | ||
| 889 | put_system(system); | ||
| 890 | |||
| 891 | return 0; | ||
| 892 | } | ||
| 893 | |||
| 829 | static ssize_t | 894 | static ssize_t |
| 830 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | 895 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, |
| 831 | loff_t *ppos) | 896 | loff_t *ppos) |
| @@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = { | |||
| 963 | }; | 1028 | }; |
| 964 | 1029 | ||
| 965 | static const struct file_operations ftrace_subsystem_filter_fops = { | 1030 | static const struct file_operations ftrace_subsystem_filter_fops = { |
| 966 | .open = tracing_open_generic, | 1031 | .open = subsystem_open, |
| 967 | .read = subsystem_filter_read, | 1032 | .read = subsystem_filter_read, |
| 968 | .write = subsystem_filter_write, | 1033 | .write = subsystem_filter_write, |
| 969 | .llseek = default_llseek, | 1034 | .llseek = default_llseek, |
| 1035 | .release = subsystem_release, | ||
| 970 | }; | 1036 | }; |
| 971 | 1037 | ||
| 972 | static const struct file_operations ftrace_system_enable_fops = { | 1038 | static const struct file_operations ftrace_system_enable_fops = { |
| 973 | .open = tracing_open_generic, | 1039 | .open = subsystem_open, |
| 974 | .read = system_enable_read, | 1040 | .read = system_enable_read, |
| 975 | .write = system_enable_write, | 1041 | .write = system_enable_write, |
| 976 | .llseek = default_llseek, | 1042 | .llseek = default_llseek, |
| 1043 | .release = subsystem_release, | ||
| 977 | }; | 1044 | }; |
| 978 | 1045 | ||
| 979 | static const struct file_operations ftrace_show_header_fops = { | 1046 | static const struct file_operations ftrace_show_header_fops = { |
| @@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void) | |||
| 1002 | return d_events; | 1069 | return d_events; |
| 1003 | } | 1070 | } |
| 1004 | 1071 | ||
| 1005 | static LIST_HEAD(event_subsystems); | ||
| 1006 | |||
| 1007 | static struct dentry * | 1072 | static struct dentry * |
| 1008 | event_subsystem_dir(const char *name, struct dentry *d_events) | 1073 | event_subsystem_dir(const char *name, struct dentry *d_events) |
| 1009 | { | 1074 | { |
| @@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
| 1013 | /* First see if we did not already create this dir */ | 1078 | /* First see if we did not already create this dir */ |
| 1014 | list_for_each_entry(system, &event_subsystems, list) { | 1079 | list_for_each_entry(system, &event_subsystems, list) { |
| 1015 | if (strcmp(system->name, name) == 0) { | 1080 | if (strcmp(system->name, name) == 0) { |
| 1081 | __get_system(system); | ||
| 1016 | system->nr_events++; | 1082 | system->nr_events++; |
| 1017 | return system->entry; | 1083 | return system->entry; |
| 1018 | } | 1084 | } |
| @@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
| 1035 | } | 1101 | } |
| 1036 | 1102 | ||
| 1037 | system->nr_events = 1; | 1103 | system->nr_events = 1; |
| 1104 | system->ref_count = 1; | ||
| 1038 | system->name = kstrdup(name, GFP_KERNEL); | 1105 | system->name = kstrdup(name, GFP_KERNEL); |
| 1039 | if (!system->name) { | 1106 | if (!system->name) { |
| 1040 | debugfs_remove(system->entry); | 1107 | debugfs_remove(system->entry); |
| @@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
| 1062 | "'%s/filter' entry\n", name); | 1129 | "'%s/filter' entry\n", name); |
| 1063 | } | 1130 | } |
| 1064 | 1131 | ||
| 1065 | trace_create_file("enable", 0644, system->entry, | 1132 | trace_create_file("enable", 0644, system->entry, system, |
| 1066 | (void *)system->name, | ||
| 1067 | &ftrace_system_enable_fops); | 1133 | &ftrace_system_enable_fops); |
| 1068 | 1134 | ||
| 1069 | return system->entry; | 1135 | return system->entry; |
| @@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name) | |||
| 1184 | list_for_each_entry(system, &event_subsystems, list) { | 1250 | list_for_each_entry(system, &event_subsystems, list) { |
| 1185 | if (strcmp(system->name, name) == 0) { | 1251 | if (strcmp(system->name, name) == 0) { |
| 1186 | if (!--system->nr_events) { | 1252 | if (!--system->nr_events) { |
| 1187 | struct event_filter *filter = system->filter; | ||
| 1188 | |||
| 1189 | debugfs_remove_recursive(system->entry); | 1253 | debugfs_remove_recursive(system->entry); |
| 1190 | list_del(&system->list); | 1254 | list_del(&system->list); |
| 1191 | if (filter) { | 1255 | __put_system(system); |
| 1192 | kfree(filter->filter_string); | ||
| 1193 | kfree(filter); | ||
| 1194 | } | ||
| 1195 | kfree(system->name); | ||
| 1196 | kfree(system); | ||
| 1197 | } | 1256 | } |
| 1198 | break; | 1257 | break; |
| 1199 | } | 1258 | } |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8008ddcfbf20..256764ecccd6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
| 1886 | 1886 | ||
| 1887 | mutex_lock(&event_mutex); | 1887 | mutex_lock(&event_mutex); |
| 1888 | 1888 | ||
| 1889 | /* Make sure the system still has events */ | ||
| 1890 | if (!system->nr_events) { | ||
| 1891 | err = -ENODEV; | ||
| 1892 | goto out_unlock; | ||
| 1893 | } | ||
| 1894 | |||
| 1889 | if (!strcmp(strstrip(filter_string), "0")) { | 1895 | if (!strcmp(strstrip(filter_string), "0")) { |
| 1890 | filter_free_subsystem_preds(system); | 1896 | filter_free_subsystem_preds(system); |
| 1891 | remove_filter_string(system->filter); | 1897 | remove_filter_string(system->filter); |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8d0e1cc4e974..c7b0c6a7db09 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
| @@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param) | |||
| 324 | } | 324 | } |
| 325 | 325 | ||
| 326 | static int | 326 | static int |
| 327 | ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) | 327 | ftrace_trace_onoff_callback(struct ftrace_hash *hash, |
| 328 | char *glob, char *cmd, char *param, int enable) | ||
| 328 | { | 329 | { |
| 329 | struct ftrace_probe_ops *ops; | 330 | struct ftrace_probe_ops *ops; |
| 330 | void *count = (void *)-1; | 331 | void *count = (void *)-1; |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 962cdb24ed81..a7d2a4c653d8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = { | |||
| 74 | 74 | ||
| 75 | static struct trace_array *graph_array; | 75 | static struct trace_array *graph_array; |
| 76 | 76 | ||
| 77 | /* | ||
| 78 | * DURATION column is being also used to display IRQ signs, | ||
| 79 | * following values are used by print_graph_irq and others | ||
| 80 | * to fill in space into DURATION column. | ||
| 81 | */ | ||
| 82 | enum { | ||
| 83 | DURATION_FILL_FULL = -1, | ||
| 84 | DURATION_FILL_START = -2, | ||
| 85 | DURATION_FILL_END = -3, | ||
| 86 | }; | ||
| 87 | |||
| 88 | static enum print_line_t | ||
| 89 | print_graph_duration(unsigned long long duration, struct trace_seq *s, | ||
| 90 | u32 flags); | ||
| 77 | 91 | ||
| 78 | /* Add a function return address to the trace stack on thread info.*/ | 92 | /* Add a function return address to the trace stack on thread info.*/ |
| 79 | int | 93 | int |
| @@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr, | |||
| 213 | 227 | ||
| 214 | static inline int ftrace_graph_ignore_irqs(void) | 228 | static inline int ftrace_graph_ignore_irqs(void) |
| 215 | { | 229 | { |
| 216 | if (!ftrace_graph_skip_irqs) | 230 | if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) |
| 217 | return 0; | 231 | return 0; |
| 218 | 232 | ||
| 219 | return in_irq(); | 233 | return in_irq(); |
| @@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
| 577 | return next; | 591 | return next; |
| 578 | } | 592 | } |
| 579 | 593 | ||
| 580 | /* Signal a overhead of time execution to the output */ | ||
| 581 | static int | ||
| 582 | print_graph_overhead(unsigned long long duration, struct trace_seq *s, | ||
| 583 | u32 flags) | ||
| 584 | { | ||
| 585 | /* If duration disappear, we don't need anything */ | ||
| 586 | if (!(flags & TRACE_GRAPH_PRINT_DURATION)) | ||
| 587 | return 1; | ||
| 588 | |||
| 589 | /* Non nested entry or return */ | ||
| 590 | if (duration == -1) | ||
| 591 | return trace_seq_printf(s, " "); | ||
| 592 | |||
| 593 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | ||
| 594 | /* Duration exceeded 100 msecs */ | ||
| 595 | if (duration > 100000ULL) | ||
| 596 | return trace_seq_printf(s, "! "); | ||
| 597 | |||
| 598 | /* Duration exceeded 10 msecs */ | ||
| 599 | if (duration > 10000ULL) | ||
| 600 | return trace_seq_printf(s, "+ "); | ||
| 601 | } | ||
| 602 | |||
| 603 | return trace_seq_printf(s, " "); | ||
| 604 | } | ||
| 605 | |||
| 606 | static int print_graph_abs_time(u64 t, struct trace_seq *s) | 594 | static int print_graph_abs_time(u64 t, struct trace_seq *s) |
| 607 | { | 595 | { |
| 608 | unsigned long usecs_rem; | 596 | unsigned long usecs_rem; |
| @@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
| 625 | addr >= (unsigned long)__irqentry_text_end) | 613 | addr >= (unsigned long)__irqentry_text_end) |
| 626 | return TRACE_TYPE_UNHANDLED; | 614 | return TRACE_TYPE_UNHANDLED; |
| 627 | 615 | ||
| 628 | /* Absolute time */ | 616 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
| 629 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { | 617 | /* Absolute time */ |
| 630 | ret = print_graph_abs_time(iter->ts, s); | 618 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { |
| 631 | if (!ret) | 619 | ret = print_graph_abs_time(iter->ts, s); |
| 632 | return TRACE_TYPE_PARTIAL_LINE; | 620 | if (!ret) |
| 633 | } | 621 | return TRACE_TYPE_PARTIAL_LINE; |
| 622 | } | ||
| 634 | 623 | ||
| 635 | /* Cpu */ | 624 | /* Cpu */ |
| 636 | if (flags & TRACE_GRAPH_PRINT_CPU) { | 625 | if (flags & TRACE_GRAPH_PRINT_CPU) { |
| 637 | ret = print_graph_cpu(s, cpu); | 626 | ret = print_graph_cpu(s, cpu); |
| 638 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 627 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 639 | return TRACE_TYPE_PARTIAL_LINE; | 628 | return TRACE_TYPE_PARTIAL_LINE; |
| 640 | } | 629 | } |
| 641 | 630 | ||
| 642 | /* Proc */ | 631 | /* Proc */ |
| 643 | if (flags & TRACE_GRAPH_PRINT_PROC) { | 632 | if (flags & TRACE_GRAPH_PRINT_PROC) { |
| 644 | ret = print_graph_proc(s, pid); | 633 | ret = print_graph_proc(s, pid); |
| 645 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 634 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 646 | return TRACE_TYPE_PARTIAL_LINE; | 635 | return TRACE_TYPE_PARTIAL_LINE; |
| 647 | ret = trace_seq_printf(s, " | "); | 636 | ret = trace_seq_printf(s, " | "); |
| 648 | if (!ret) | 637 | if (!ret) |
| 649 | return TRACE_TYPE_PARTIAL_LINE; | 638 | return TRACE_TYPE_PARTIAL_LINE; |
| 639 | } | ||
| 650 | } | 640 | } |
| 651 | 641 | ||
| 652 | /* No overhead */ | 642 | /* No overhead */ |
| 653 | ret = print_graph_overhead(-1, s, flags); | 643 | ret = print_graph_duration(DURATION_FILL_START, s, flags); |
| 654 | if (!ret) | 644 | if (ret != TRACE_TYPE_HANDLED) |
| 655 | return TRACE_TYPE_PARTIAL_LINE; | 645 | return ret; |
| 656 | 646 | ||
| 657 | if (type == TRACE_GRAPH_ENT) | 647 | if (type == TRACE_GRAPH_ENT) |
| 658 | ret = trace_seq_printf(s, "==========>"); | 648 | ret = trace_seq_printf(s, "==========>"); |
| @@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
| 662 | if (!ret) | 652 | if (!ret) |
| 663 | return TRACE_TYPE_PARTIAL_LINE; | 653 | return TRACE_TYPE_PARTIAL_LINE; |
| 664 | 654 | ||
| 665 | /* Don't close the duration column if haven't one */ | 655 | ret = print_graph_duration(DURATION_FILL_END, s, flags); |
| 666 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 656 | if (ret != TRACE_TYPE_HANDLED) |
| 667 | trace_seq_printf(s, " |"); | 657 | return ret; |
| 658 | |||
| 668 | ret = trace_seq_printf(s, "\n"); | 659 | ret = trace_seq_printf(s, "\n"); |
| 669 | 660 | ||
| 670 | if (!ret) | 661 | if (!ret) |
| @@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
| 716 | } | 707 | } |
| 717 | 708 | ||
| 718 | static enum print_line_t | 709 | static enum print_line_t |
| 719 | print_graph_duration(unsigned long long duration, struct trace_seq *s) | 710 | print_graph_duration(unsigned long long duration, struct trace_seq *s, |
| 711 | u32 flags) | ||
| 720 | { | 712 | { |
| 721 | int ret; | 713 | int ret = -1; |
| 714 | |||
| 715 | if (!(flags & TRACE_GRAPH_PRINT_DURATION) || | ||
| 716 | !(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
| 717 | return TRACE_TYPE_HANDLED; | ||
| 718 | |||
| 719 | /* No real adata, just filling the column with spaces */ | ||
| 720 | switch (duration) { | ||
| 721 | case DURATION_FILL_FULL: | ||
| 722 | ret = trace_seq_printf(s, " | "); | ||
| 723 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
| 724 | case DURATION_FILL_START: | ||
| 725 | ret = trace_seq_printf(s, " "); | ||
| 726 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
| 727 | case DURATION_FILL_END: | ||
| 728 | ret = trace_seq_printf(s, " |"); | ||
| 729 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
| 730 | } | ||
| 731 | |||
| 732 | /* Signal a overhead of time execution to the output */ | ||
| 733 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | ||
| 734 | /* Duration exceeded 100 msecs */ | ||
| 735 | if (duration > 100000ULL) | ||
| 736 | ret = trace_seq_printf(s, "! "); | ||
| 737 | /* Duration exceeded 10 msecs */ | ||
| 738 | else if (duration > 10000ULL) | ||
| 739 | ret = trace_seq_printf(s, "+ "); | ||
| 740 | } | ||
| 741 | |||
| 742 | /* | ||
| 743 | * The -1 means we either did not exceed the duration tresholds | ||
| 744 | * or we dont want to print out the overhead. Either way we need | ||
| 745 | * to fill out the space. | ||
| 746 | */ | ||
| 747 | if (ret == -1) | ||
| 748 | ret = trace_seq_printf(s, " "); | ||
| 749 | |||
| 750 | /* Catching here any failure happenned above */ | ||
| 751 | if (!ret) | ||
| 752 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 722 | 753 | ||
| 723 | ret = trace_print_graph_duration(duration, s); | 754 | ret = trace_print_graph_duration(duration, s); |
| 724 | if (ret != TRACE_TYPE_HANDLED) | 755 | if (ret != TRACE_TYPE_HANDLED) |
| @@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
| 767 | cpu_data->enter_funcs[call->depth] = 0; | 798 | cpu_data->enter_funcs[call->depth] = 0; |
| 768 | } | 799 | } |
| 769 | 800 | ||
| 770 | /* Overhead */ | 801 | /* Overhead and duration */ |
| 771 | ret = print_graph_overhead(duration, s, flags); | 802 | ret = print_graph_duration(duration, s, flags); |
| 772 | if (!ret) | 803 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 773 | return TRACE_TYPE_PARTIAL_LINE; | 804 | return TRACE_TYPE_PARTIAL_LINE; |
| 774 | 805 | ||
| 775 | /* Duration */ | ||
| 776 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | ||
| 777 | ret = print_graph_duration(duration, s); | ||
| 778 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
| 779 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 780 | } | ||
| 781 | |||
| 782 | /* Function */ | 806 | /* Function */ |
| 783 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 807 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
| 784 | ret = trace_seq_printf(s, " "); | 808 | ret = trace_seq_printf(s, " "); |
| @@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
| 815 | cpu_data->enter_funcs[call->depth] = call->func; | 839 | cpu_data->enter_funcs[call->depth] = call->func; |
| 816 | } | 840 | } |
| 817 | 841 | ||
| 818 | /* No overhead */ | ||
| 819 | ret = print_graph_overhead(-1, s, flags); | ||
| 820 | if (!ret) | ||
| 821 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 822 | |||
| 823 | /* No time */ | 842 | /* No time */ |
| 824 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | 843 | ret = print_graph_duration(DURATION_FILL_FULL, s, flags); |
| 825 | ret = trace_seq_printf(s, " | "); | 844 | if (ret != TRACE_TYPE_HANDLED) |
| 826 | if (!ret) | 845 | return ret; |
| 827 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 828 | } | ||
| 829 | 846 | ||
| 830 | /* Function */ | 847 | /* Function */ |
| 831 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 848 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
| @@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
| 865 | return TRACE_TYPE_PARTIAL_LINE; | 882 | return TRACE_TYPE_PARTIAL_LINE; |
| 866 | } | 883 | } |
| 867 | 884 | ||
| 885 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
| 886 | return 0; | ||
| 887 | |||
| 868 | /* Absolute time */ | 888 | /* Absolute time */ |
| 869 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { | 889 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { |
| 870 | ret = print_graph_abs_time(iter->ts, s); | 890 | ret = print_graph_abs_time(iter->ts, s); |
| @@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
| 1078 | if (print_graph_prologue(iter, s, 0, 0, flags)) | 1098 | if (print_graph_prologue(iter, s, 0, 0, flags)) |
| 1079 | return TRACE_TYPE_PARTIAL_LINE; | 1099 | return TRACE_TYPE_PARTIAL_LINE; |
| 1080 | 1100 | ||
| 1081 | /* Overhead */ | 1101 | /* Overhead and duration */ |
| 1082 | ret = print_graph_overhead(duration, s, flags); | 1102 | ret = print_graph_duration(duration, s, flags); |
| 1083 | if (!ret) | 1103 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 1084 | return TRACE_TYPE_PARTIAL_LINE; | 1104 | return TRACE_TYPE_PARTIAL_LINE; |
| 1085 | 1105 | ||
| 1086 | /* Duration */ | ||
| 1087 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | ||
| 1088 | ret = print_graph_duration(duration, s); | ||
| 1089 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
| 1090 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | /* Closing brace */ | 1106 | /* Closing brace */ |
| 1094 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { | 1107 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { |
| 1095 | ret = trace_seq_printf(s, " "); | 1108 | ret = trace_seq_printf(s, " "); |
| @@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 1146 | if (print_graph_prologue(iter, s, 0, 0, flags)) | 1159 | if (print_graph_prologue(iter, s, 0, 0, flags)) |
| 1147 | return TRACE_TYPE_PARTIAL_LINE; | 1160 | return TRACE_TYPE_PARTIAL_LINE; |
| 1148 | 1161 | ||
| 1149 | /* No overhead */ | ||
| 1150 | ret = print_graph_overhead(-1, s, flags); | ||
| 1151 | if (!ret) | ||
| 1152 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 1153 | |||
| 1154 | /* No time */ | 1162 | /* No time */ |
| 1155 | if (flags & TRACE_GRAPH_PRINT_DURATION) { | 1163 | ret = print_graph_duration(DURATION_FILL_FULL, s, flags); |
| 1156 | ret = trace_seq_printf(s, " | "); | 1164 | if (ret != TRACE_TYPE_HANDLED) |
| 1157 | if (!ret) | 1165 | return ret; |
| 1158 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 1159 | } | ||
| 1160 | 1166 | ||
| 1161 | /* Indentation */ | 1167 | /* Indentation */ |
| 1162 | if (depth > 0) | 1168 | if (depth > 0) |
| @@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 1207 | 1213 | ||
| 1208 | 1214 | ||
| 1209 | enum print_line_t | 1215 | enum print_line_t |
| 1210 | __print_graph_function_flags(struct trace_iterator *iter, u32 flags) | 1216 | print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
| 1211 | { | 1217 | { |
| 1212 | struct ftrace_graph_ent_entry *field; | 1218 | struct ftrace_graph_ent_entry *field; |
| 1213 | struct fgraph_data *data = iter->private; | 1219 | struct fgraph_data *data = iter->private; |
| @@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
| 1270 | static enum print_line_t | 1276 | static enum print_line_t |
| 1271 | print_graph_function(struct trace_iterator *iter) | 1277 | print_graph_function(struct trace_iterator *iter) |
| 1272 | { | 1278 | { |
| 1273 | return __print_graph_function_flags(iter, tracer_flags.val); | 1279 | return print_graph_function_flags(iter, tracer_flags.val); |
| 1274 | } | ||
| 1275 | |||
| 1276 | enum print_line_t print_graph_function_flags(struct trace_iterator *iter, | ||
| 1277 | u32 flags) | ||
| 1278 | { | ||
| 1279 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
| 1280 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 1281 | else | ||
| 1282 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 1283 | |||
| 1284 | return __print_graph_function_flags(iter, flags); | ||
| 1285 | } | 1280 | } |
| 1286 | 1281 | ||
| 1287 | static enum print_line_t | 1282 | static enum print_line_t |
| @@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags) | |||
| 1309 | seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); | 1304 | seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); |
| 1310 | seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); | 1305 | seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); |
| 1311 | seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); | 1306 | seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); |
| 1312 | seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); | 1307 | seq_printf(s, "#%.*s||| / \n", size, spaces); |
| 1313 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | ||
| 1314 | } | 1308 | } |
| 1315 | 1309 | ||
| 1316 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | 1310 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) |
| @@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
| 1329 | if (flags & TRACE_GRAPH_PRINT_PROC) | 1323 | if (flags & TRACE_GRAPH_PRINT_PROC) |
| 1330 | seq_printf(s, " TASK/PID "); | 1324 | seq_printf(s, " TASK/PID "); |
| 1331 | if (lat) | 1325 | if (lat) |
| 1332 | seq_printf(s, "|||||"); | 1326 | seq_printf(s, "||||"); |
| 1333 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 1327 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
| 1334 | seq_printf(s, " DURATION "); | 1328 | seq_printf(s, " DURATION "); |
| 1335 | seq_printf(s, " FUNCTION CALLS\n"); | 1329 | seq_printf(s, " FUNCTION CALLS\n"); |
| @@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
| 1343 | if (flags & TRACE_GRAPH_PRINT_PROC) | 1337 | if (flags & TRACE_GRAPH_PRINT_PROC) |
| 1344 | seq_printf(s, " | | "); | 1338 | seq_printf(s, " | | "); |
| 1345 | if (lat) | 1339 | if (lat) |
| 1346 | seq_printf(s, "|||||"); | 1340 | seq_printf(s, "||||"); |
| 1347 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 1341 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
| 1348 | seq_printf(s, " | | "); | 1342 | seq_printf(s, " | | "); |
| 1349 | seq_printf(s, " | | | |\n"); | 1343 | seq_printf(s, " | | | |\n"); |
| @@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
| 1358 | { | 1352 | { |
| 1359 | struct trace_iterator *iter = s->private; | 1353 | struct trace_iterator *iter = s->private; |
| 1360 | 1354 | ||
| 1355 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | ||
| 1356 | return; | ||
| 1357 | |||
| 1361 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | 1358 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { |
| 1362 | /* print nothing if the buffers are empty */ | 1359 | /* print nothing if the buffers are empty */ |
| 1363 | if (trace_empty(iter)) | 1360 | if (trace_empty(iter)) |
| 1364 | return; | 1361 | return; |
| 1365 | 1362 | ||
| 1366 | print_trace_header(s, iter); | 1363 | print_trace_header(s, iter); |
| 1367 | flags |= TRACE_GRAPH_PRINT_DURATION; | 1364 | } |
| 1368 | } else | ||
| 1369 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 1370 | 1365 | ||
| 1371 | __print_graph_headers_flags(s, flags); | 1366 | __print_graph_headers_flags(s, flags); |
| 1372 | } | 1367 | } |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index c77424be284d..667aa8cc0cfc 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
| @@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter) | |||
| 226 | } | 226 | } |
| 227 | 227 | ||
| 228 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ | 228 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ |
| 229 | TRACE_GRAPH_PRINT_PROC) | 229 | TRACE_GRAPH_PRINT_PROC | \ |
| 230 | TRACE_GRAPH_PRINT_ABS_TIME | \ | ||
| 231 | TRACE_GRAPH_PRINT_DURATION) | ||
| 230 | 232 | ||
| 231 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | 233 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) |
| 232 | { | 234 | { |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f925c45f0afa..5fb3697bf0e5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref) | |||
| 343 | DEFINE_FETCH_deref(string) | 343 | DEFINE_FETCH_deref(string) |
| 344 | DEFINE_FETCH_deref(string_size) | 344 | DEFINE_FETCH_deref(string_size) |
| 345 | 345 | ||
| 346 | static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) | ||
| 347 | { | ||
| 348 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | ||
| 349 | update_deref_fetch_param(data->orig.data); | ||
| 350 | else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) | ||
| 351 | update_symbol_cache(data->orig.data); | ||
| 352 | } | ||
| 353 | |||
| 346 | static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) | 354 | static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) |
| 347 | { | 355 | { |
| 348 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | 356 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) |
| @@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield) | |||
| 377 | #define fetch_bitfield_string_size NULL | 385 | #define fetch_bitfield_string_size NULL |
| 378 | 386 | ||
| 379 | static __kprobes void | 387 | static __kprobes void |
| 388 | update_bitfield_fetch_param(struct bitfield_fetch_param *data) | ||
| 389 | { | ||
| 390 | /* | ||
| 391 | * Don't check the bitfield itself, because this must be the | ||
| 392 | * last fetch function. | ||
| 393 | */ | ||
| 394 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | ||
| 395 | update_deref_fetch_param(data->orig.data); | ||
| 396 | else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) | ||
| 397 | update_symbol_cache(data->orig.data); | ||
| 398 | } | ||
| 399 | |||
| 400 | static __kprobes void | ||
| 380 | free_bitfield_fetch_param(struct bitfield_fetch_param *data) | 401 | free_bitfield_fetch_param(struct bitfield_fetch_param *data) |
| 381 | { | 402 | { |
| 382 | /* | 403 | /* |
| @@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data) | |||
| 389 | free_symbol_cache(data->orig.data); | 410 | free_symbol_cache(data->orig.data); |
| 390 | kfree(data); | 411 | kfree(data); |
| 391 | } | 412 | } |
| 413 | |||
| 392 | /* Default (unsigned long) fetch type */ | 414 | /* Default (unsigned long) fetch type */ |
| 393 | #define __DEFAULT_FETCH_TYPE(t) u##t | 415 | #define __DEFAULT_FETCH_TYPE(t) u##t |
| 394 | #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) | 416 | #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) |
| @@ -536,6 +558,7 @@ struct probe_arg { | |||
| 536 | /* Flags for trace_probe */ | 558 | /* Flags for trace_probe */ |
| 537 | #define TP_FLAG_TRACE 1 | 559 | #define TP_FLAG_TRACE 1 |
| 538 | #define TP_FLAG_PROFILE 2 | 560 | #define TP_FLAG_PROFILE 2 |
| 561 | #define TP_FLAG_REGISTERED 4 | ||
| 539 | 562 | ||
| 540 | struct trace_probe { | 563 | struct trace_probe { |
| 541 | struct list_head list; | 564 | struct list_head list; |
| @@ -555,16 +578,49 @@ struct trace_probe { | |||
| 555 | (sizeof(struct probe_arg) * (n))) | 578 | (sizeof(struct probe_arg) * (n))) |
| 556 | 579 | ||
| 557 | 580 | ||
| 558 | static __kprobes int probe_is_return(struct trace_probe *tp) | 581 | static __kprobes int trace_probe_is_return(struct trace_probe *tp) |
| 559 | { | 582 | { |
| 560 | return tp->rp.handler != NULL; | 583 | return tp->rp.handler != NULL; |
| 561 | } | 584 | } |
| 562 | 585 | ||
| 563 | static __kprobes const char *probe_symbol(struct trace_probe *tp) | 586 | static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) |
| 564 | { | 587 | { |
| 565 | return tp->symbol ? tp->symbol : "unknown"; | 588 | return tp->symbol ? tp->symbol : "unknown"; |
| 566 | } | 589 | } |
| 567 | 590 | ||
| 591 | static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) | ||
| 592 | { | ||
| 593 | return tp->rp.kp.offset; | ||
| 594 | } | ||
| 595 | |||
| 596 | static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) | ||
| 597 | { | ||
| 598 | return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); | ||
| 599 | } | ||
| 600 | |||
| 601 | static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) | ||
| 602 | { | ||
| 603 | return !!(tp->flags & TP_FLAG_REGISTERED); | ||
| 604 | } | ||
| 605 | |||
| 606 | static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) | ||
| 607 | { | ||
| 608 | return !!(kprobe_gone(&tp->rp.kp)); | ||
| 609 | } | ||
| 610 | |||
| 611 | static __kprobes bool trace_probe_within_module(struct trace_probe *tp, | ||
| 612 | struct module *mod) | ||
| 613 | { | ||
| 614 | int len = strlen(mod->name); | ||
| 615 | const char *name = trace_probe_symbol(tp); | ||
| 616 | return strncmp(mod->name, name, len) == 0 && name[len] == ':'; | ||
| 617 | } | ||
| 618 | |||
| 619 | static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) | ||
| 620 | { | ||
| 621 | return !!strchr(trace_probe_symbol(tp), ':'); | ||
| 622 | } | ||
| 623 | |||
| 568 | static int register_probe_event(struct trace_probe *tp); | 624 | static int register_probe_event(struct trace_probe *tp); |
| 569 | static void unregister_probe_event(struct trace_probe *tp); | 625 | static void unregister_probe_event(struct trace_probe *tp); |
| 570 | 626 | ||
| @@ -646,6 +702,16 @@ error: | |||
| 646 | return ERR_PTR(ret); | 702 | return ERR_PTR(ret); |
| 647 | } | 703 | } |
| 648 | 704 | ||
| 705 | static void update_probe_arg(struct probe_arg *arg) | ||
| 706 | { | ||
| 707 | if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) | ||
| 708 | update_bitfield_fetch_param(arg->fetch.data); | ||
| 709 | else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) | ||
| 710 | update_deref_fetch_param(arg->fetch.data); | ||
| 711 | else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) | ||
| 712 | update_symbol_cache(arg->fetch.data); | ||
| 713 | } | ||
| 714 | |||
| 649 | static void free_probe_arg(struct probe_arg *arg) | 715 | static void free_probe_arg(struct probe_arg *arg) |
| 650 | { | 716 | { |
| 651 | if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) | 717 | if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) |
| @@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp) | |||
| 671 | kfree(tp); | 737 | kfree(tp); |
| 672 | } | 738 | } |
| 673 | 739 | ||
| 674 | static struct trace_probe *find_probe_event(const char *event, | 740 | static struct trace_probe *find_trace_probe(const char *event, |
| 675 | const char *group) | 741 | const char *group) |
| 676 | { | 742 | { |
| 677 | struct trace_probe *tp; | 743 | struct trace_probe *tp; |
| @@ -683,13 +749,96 @@ static struct trace_probe *find_probe_event(const char *event, | |||
| 683 | return NULL; | 749 | return NULL; |
| 684 | } | 750 | } |
| 685 | 751 | ||
| 752 | /* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ | ||
| 753 | static int enable_trace_probe(struct trace_probe *tp, int flag) | ||
| 754 | { | ||
| 755 | int ret = 0; | ||
| 756 | |||
| 757 | tp->flags |= flag; | ||
| 758 | if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && | ||
| 759 | !trace_probe_has_gone(tp)) { | ||
| 760 | if (trace_probe_is_return(tp)) | ||
| 761 | ret = enable_kretprobe(&tp->rp); | ||
| 762 | else | ||
| 763 | ret = enable_kprobe(&tp->rp.kp); | ||
| 764 | } | ||
| 765 | |||
| 766 | return ret; | ||
| 767 | } | ||
| 768 | |||
| 769 | /* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ | ||
| 770 | static void disable_trace_probe(struct trace_probe *tp, int flag) | ||
| 771 | { | ||
| 772 | tp->flags &= ~flag; | ||
| 773 | if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { | ||
| 774 | if (trace_probe_is_return(tp)) | ||
| 775 | disable_kretprobe(&tp->rp); | ||
| 776 | else | ||
| 777 | disable_kprobe(&tp->rp.kp); | ||
| 778 | } | ||
| 779 | } | ||
| 780 | |||
| 781 | /* Internal register function - just handle k*probes and flags */ | ||
| 782 | static int __register_trace_probe(struct trace_probe *tp) | ||
| 783 | { | ||
| 784 | int i, ret; | ||
| 785 | |||
| 786 | if (trace_probe_is_registered(tp)) | ||
| 787 | return -EINVAL; | ||
| 788 | |||
| 789 | for (i = 0; i < tp->nr_args; i++) | ||
| 790 | update_probe_arg(&tp->args[i]); | ||
| 791 | |||
| 792 | /* Set/clear disabled flag according to tp->flag */ | ||
| 793 | if (trace_probe_is_enabled(tp)) | ||
| 794 | tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; | ||
| 795 | else | ||
| 796 | tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; | ||
| 797 | |||
| 798 | if (trace_probe_is_return(tp)) | ||
| 799 | ret = register_kretprobe(&tp->rp); | ||
| 800 | else | ||
| 801 | ret = register_kprobe(&tp->rp.kp); | ||
| 802 | |||
| 803 | if (ret == 0) | ||
| 804 | tp->flags |= TP_FLAG_REGISTERED; | ||
| 805 | else { | ||
| 806 | pr_warning("Could not insert probe at %s+%lu: %d\n", | ||
| 807 | trace_probe_symbol(tp), trace_probe_offset(tp), ret); | ||
| 808 | if (ret == -ENOENT && trace_probe_is_on_module(tp)) { | ||
| 809 | pr_warning("This probe might be able to register after" | ||
| 810 | "target module is loaded. Continue.\n"); | ||
| 811 | ret = 0; | ||
| 812 | } else if (ret == -EILSEQ) { | ||
| 813 | pr_warning("Probing address(0x%p) is not an " | ||
| 814 | "instruction boundary.\n", | ||
| 815 | tp->rp.kp.addr); | ||
| 816 | ret = -EINVAL; | ||
| 817 | } | ||
| 818 | } | ||
| 819 | |||
| 820 | return ret; | ||
| 821 | } | ||
| 822 | |||
| 823 | /* Internal unregister function - just handle k*probes and flags */ | ||
| 824 | static void __unregister_trace_probe(struct trace_probe *tp) | ||
| 825 | { | ||
| 826 | if (trace_probe_is_registered(tp)) { | ||
| 827 | if (trace_probe_is_return(tp)) | ||
| 828 | unregister_kretprobe(&tp->rp); | ||
| 829 | else | ||
| 830 | unregister_kprobe(&tp->rp.kp); | ||
| 831 | tp->flags &= ~TP_FLAG_REGISTERED; | ||
| 832 | /* Cleanup kprobe for reuse */ | ||
| 833 | if (tp->rp.kp.symbol_name) | ||
| 834 | tp->rp.kp.addr = NULL; | ||
| 835 | } | ||
| 836 | } | ||
| 837 | |||
| 686 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ | 838 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ |
| 687 | static void unregister_trace_probe(struct trace_probe *tp) | 839 | static void unregister_trace_probe(struct trace_probe *tp) |
| 688 | { | 840 | { |
| 689 | if (probe_is_return(tp)) | 841 | __unregister_trace_probe(tp); |
| 690 | unregister_kretprobe(&tp->rp); | ||
| 691 | else | ||
| 692 | unregister_kprobe(&tp->rp.kp); | ||
| 693 | list_del(&tp->list); | 842 | list_del(&tp->list); |
| 694 | unregister_probe_event(tp); | 843 | unregister_probe_event(tp); |
| 695 | } | 844 | } |
| @@ -702,41 +851,65 @@ static int register_trace_probe(struct trace_probe *tp) | |||
| 702 | 851 | ||
| 703 | mutex_lock(&probe_lock); | 852 | mutex_lock(&probe_lock); |
| 704 | 853 | ||
| 705 | /* register as an event */ | 854 | /* Delete old (same name) event if exist */ |
| 706 | old_tp = find_probe_event(tp->call.name, tp->call.class->system); | 855 | old_tp = find_trace_probe(tp->call.name, tp->call.class->system); |
| 707 | if (old_tp) { | 856 | if (old_tp) { |
| 708 | /* delete old event */ | ||
| 709 | unregister_trace_probe(old_tp); | 857 | unregister_trace_probe(old_tp); |
| 710 | free_trace_probe(old_tp); | 858 | free_trace_probe(old_tp); |
| 711 | } | 859 | } |
| 860 | |||
| 861 | /* Register new event */ | ||
| 712 | ret = register_probe_event(tp); | 862 | ret = register_probe_event(tp); |
| 713 | if (ret) { | 863 | if (ret) { |
| 714 | pr_warning("Failed to register probe event(%d)\n", ret); | 864 | pr_warning("Failed to register probe event(%d)\n", ret); |
| 715 | goto end; | 865 | goto end; |
| 716 | } | 866 | } |
| 717 | 867 | ||
| 718 | tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; | 868 | /* Register k*probe */ |
| 719 | if (probe_is_return(tp)) | 869 | ret = __register_trace_probe(tp); |
| 720 | ret = register_kretprobe(&tp->rp); | 870 | if (ret < 0) |
| 721 | else | ||
| 722 | ret = register_kprobe(&tp->rp.kp); | ||
| 723 | |||
| 724 | if (ret) { | ||
| 725 | pr_warning("Could not insert probe(%d)\n", ret); | ||
| 726 | if (ret == -EILSEQ) { | ||
| 727 | pr_warning("Probing address(0x%p) is not an " | ||
| 728 | "instruction boundary.\n", | ||
| 729 | tp->rp.kp.addr); | ||
| 730 | ret = -EINVAL; | ||
| 731 | } | ||
| 732 | unregister_probe_event(tp); | 871 | unregister_probe_event(tp); |
| 733 | } else | 872 | else |
| 734 | list_add_tail(&tp->list, &probe_list); | 873 | list_add_tail(&tp->list, &probe_list); |
| 874 | |||
| 735 | end: | 875 | end: |
| 736 | mutex_unlock(&probe_lock); | 876 | mutex_unlock(&probe_lock); |
| 737 | return ret; | 877 | return ret; |
| 738 | } | 878 | } |
| 739 | 879 | ||
| 880 | /* Module notifier call back, checking event on the module */ | ||
| 881 | static int trace_probe_module_callback(struct notifier_block *nb, | ||
| 882 | unsigned long val, void *data) | ||
| 883 | { | ||
| 884 | struct module *mod = data; | ||
| 885 | struct trace_probe *tp; | ||
| 886 | int ret; | ||
| 887 | |||
| 888 | if (val != MODULE_STATE_COMING) | ||
| 889 | return NOTIFY_DONE; | ||
| 890 | |||
| 891 | /* Update probes on coming module */ | ||
| 892 | mutex_lock(&probe_lock); | ||
| 893 | list_for_each_entry(tp, &probe_list, list) { | ||
| 894 | if (trace_probe_within_module(tp, mod)) { | ||
| 895 | __unregister_trace_probe(tp); | ||
| 896 | ret = __register_trace_probe(tp); | ||
| 897 | if (ret) | ||
| 898 | pr_warning("Failed to re-register probe %s on" | ||
| 899 | "%s: %d\n", | ||
| 900 | tp->call.name, mod->name, ret); | ||
| 901 | } | ||
| 902 | } | ||
| 903 | mutex_unlock(&probe_lock); | ||
| 904 | |||
| 905 | return NOTIFY_DONE; | ||
| 906 | } | ||
| 907 | |||
| 908 | static struct notifier_block trace_probe_module_nb = { | ||
| 909 | .notifier_call = trace_probe_module_callback, | ||
| 910 | .priority = 1 /* Invoked after kprobe module callback */ | ||
| 911 | }; | ||
| 912 | |||
| 740 | /* Split symbol and offset. */ | 913 | /* Split symbol and offset. */ |
| 741 | static int split_symbol_offset(char *symbol, unsigned long *offset) | 914 | static int split_symbol_offset(char *symbol, unsigned long *offset) |
| 742 | { | 915 | { |
| @@ -962,8 +1135,8 @@ static int create_trace_probe(int argc, char **argv) | |||
| 962 | { | 1135 | { |
| 963 | /* | 1136 | /* |
| 964 | * Argument syntax: | 1137 | * Argument syntax: |
| 965 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] | 1138 | * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] |
| 966 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] | 1139 | * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] |
| 967 | * Fetch args: | 1140 | * Fetch args: |
| 968 | * $retval : fetch return value | 1141 | * $retval : fetch return value |
| 969 | * $stack : fetch stack address | 1142 | * $stack : fetch stack address |
| @@ -1025,7 +1198,7 @@ static int create_trace_probe(int argc, char **argv) | |||
| 1025 | return -EINVAL; | 1198 | return -EINVAL; |
| 1026 | } | 1199 | } |
| 1027 | mutex_lock(&probe_lock); | 1200 | mutex_lock(&probe_lock); |
| 1028 | tp = find_probe_event(event, group); | 1201 | tp = find_trace_probe(event, group); |
| 1029 | if (!tp) { | 1202 | if (!tp) { |
| 1030 | mutex_unlock(&probe_lock); | 1203 | mutex_unlock(&probe_lock); |
| 1031 | pr_info("Event %s/%s doesn't exist.\n", group, event); | 1204 | pr_info("Event %s/%s doesn't exist.\n", group, event); |
| @@ -1144,7 +1317,7 @@ error: | |||
| 1144 | return ret; | 1317 | return ret; |
| 1145 | } | 1318 | } |
| 1146 | 1319 | ||
| 1147 | static void cleanup_all_probes(void) | 1320 | static void release_all_trace_probes(void) |
| 1148 | { | 1321 | { |
| 1149 | struct trace_probe *tp; | 1322 | struct trace_probe *tp; |
| 1150 | 1323 | ||
| @@ -1158,7 +1331,6 @@ static void cleanup_all_probes(void) | |||
| 1158 | mutex_unlock(&probe_lock); | 1331 | mutex_unlock(&probe_lock); |
| 1159 | } | 1332 | } |
| 1160 | 1333 | ||
| 1161 | |||
| 1162 | /* Probes listing interfaces */ | 1334 | /* Probes listing interfaces */ |
| 1163 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) | 1335 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) |
| 1164 | { | 1336 | { |
| @@ -1181,15 +1353,16 @@ static int probes_seq_show(struct seq_file *m, void *v) | |||
| 1181 | struct trace_probe *tp = v; | 1353 | struct trace_probe *tp = v; |
| 1182 | int i; | 1354 | int i; |
| 1183 | 1355 | ||
| 1184 | seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); | 1356 | seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); |
| 1185 | seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); | 1357 | seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); |
| 1186 | 1358 | ||
| 1187 | if (!tp->symbol) | 1359 | if (!tp->symbol) |
| 1188 | seq_printf(m, " 0x%p", tp->rp.kp.addr); | 1360 | seq_printf(m, " 0x%p", tp->rp.kp.addr); |
| 1189 | else if (tp->rp.kp.offset) | 1361 | else if (tp->rp.kp.offset) |
| 1190 | seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); | 1362 | seq_printf(m, " %s+%u", trace_probe_symbol(tp), |
| 1363 | tp->rp.kp.offset); | ||
| 1191 | else | 1364 | else |
| 1192 | seq_printf(m, " %s", probe_symbol(tp)); | 1365 | seq_printf(m, " %s", trace_probe_symbol(tp)); |
| 1193 | 1366 | ||
| 1194 | for (i = 0; i < tp->nr_args; i++) | 1367 | for (i = 0; i < tp->nr_args; i++) |
| 1195 | seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); | 1368 | seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); |
| @@ -1209,7 +1382,7 @@ static int probes_open(struct inode *inode, struct file *file) | |||
| 1209 | { | 1382 | { |
| 1210 | if ((file->f_mode & FMODE_WRITE) && | 1383 | if ((file->f_mode & FMODE_WRITE) && |
| 1211 | (file->f_flags & O_TRUNC)) | 1384 | (file->f_flags & O_TRUNC)) |
| 1212 | cleanup_all_probes(); | 1385 | release_all_trace_probes(); |
| 1213 | 1386 | ||
| 1214 | return seq_open(file, &probes_seq_op); | 1387 | return seq_open(file, &probes_seq_op); |
| 1215 | } | 1388 | } |
| @@ -1397,7 +1570,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | |||
| 1397 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1570 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
| 1398 | 1571 | ||
| 1399 | if (!filter_current_check_discard(buffer, call, entry, event)) | 1572 | if (!filter_current_check_discard(buffer, call, entry, event)) |
| 1400 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 1573 | trace_nowake_buffer_unlock_commit_regs(buffer, event, |
| 1574 | irq_flags, pc, regs); | ||
| 1401 | } | 1575 | } |
| 1402 | 1576 | ||
| 1403 | /* Kretprobe handler */ | 1577 | /* Kretprobe handler */ |
| @@ -1429,7 +1603,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, | |||
| 1429 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1603 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
| 1430 | 1604 | ||
| 1431 | if (!filter_current_check_discard(buffer, call, entry, event)) | 1605 | if (!filter_current_check_discard(buffer, call, entry, event)) |
| 1432 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 1606 | trace_nowake_buffer_unlock_commit_regs(buffer, event, |
| 1607 | irq_flags, pc, regs); | ||
| 1433 | } | 1608 | } |
| 1434 | 1609 | ||
| 1435 | /* Event entry printers */ | 1610 | /* Event entry printers */ |
| @@ -1511,30 +1686,6 @@ partial: | |||
| 1511 | return TRACE_TYPE_PARTIAL_LINE; | 1686 | return TRACE_TYPE_PARTIAL_LINE; |
| 1512 | } | 1687 | } |
| 1513 | 1688 | ||
| 1514 | static int probe_event_enable(struct ftrace_event_call *call) | ||
| 1515 | { | ||
| 1516 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
| 1517 | |||
| 1518 | tp->flags |= TP_FLAG_TRACE; | ||
| 1519 | if (probe_is_return(tp)) | ||
| 1520 | return enable_kretprobe(&tp->rp); | ||
| 1521 | else | ||
| 1522 | return enable_kprobe(&tp->rp.kp); | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | static void probe_event_disable(struct ftrace_event_call *call) | ||
| 1526 | { | ||
| 1527 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
| 1528 | |||
| 1529 | tp->flags &= ~TP_FLAG_TRACE; | ||
| 1530 | if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { | ||
| 1531 | if (probe_is_return(tp)) | ||
| 1532 | disable_kretprobe(&tp->rp); | ||
| 1533 | else | ||
| 1534 | disable_kprobe(&tp->rp.kp); | ||
| 1535 | } | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | #undef DEFINE_FIELD | 1689 | #undef DEFINE_FIELD |
| 1539 | #define DEFINE_FIELD(type, item, name, is_signed) \ | 1690 | #define DEFINE_FIELD(type, item, name, is_signed) \ |
| 1540 | do { \ | 1691 | do { \ |
| @@ -1596,7 +1747,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) | |||
| 1596 | 1747 | ||
| 1597 | const char *fmt, *arg; | 1748 | const char *fmt, *arg; |
| 1598 | 1749 | ||
| 1599 | if (!probe_is_return(tp)) { | 1750 | if (!trace_probe_is_return(tp)) { |
| 1600 | fmt = "(%lx)"; | 1751 | fmt = "(%lx)"; |
| 1601 | arg = "REC->" FIELD_STRING_IP; | 1752 | arg = "REC->" FIELD_STRING_IP; |
| 1602 | } else { | 1753 | } else { |
| @@ -1713,49 +1864,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
| 1713 | head = this_cpu_ptr(call->perf_events); | 1864 | head = this_cpu_ptr(call->perf_events); |
| 1714 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); | 1865 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); |
| 1715 | } | 1866 | } |
| 1716 | |||
| 1717 | static int probe_perf_enable(struct ftrace_event_call *call) | ||
| 1718 | { | ||
| 1719 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
| 1720 | |||
| 1721 | tp->flags |= TP_FLAG_PROFILE; | ||
| 1722 | |||
| 1723 | if (probe_is_return(tp)) | ||
| 1724 | return enable_kretprobe(&tp->rp); | ||
| 1725 | else | ||
| 1726 | return enable_kprobe(&tp->rp.kp); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | static void probe_perf_disable(struct ftrace_event_call *call) | ||
| 1730 | { | ||
| 1731 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
| 1732 | |||
| 1733 | tp->flags &= ~TP_FLAG_PROFILE; | ||
| 1734 | |||
| 1735 | if (!(tp->flags & TP_FLAG_TRACE)) { | ||
| 1736 | if (probe_is_return(tp)) | ||
| 1737 | disable_kretprobe(&tp->rp); | ||
| 1738 | else | ||
| 1739 | disable_kprobe(&tp->rp.kp); | ||
| 1740 | } | ||
| 1741 | } | ||
| 1742 | #endif /* CONFIG_PERF_EVENTS */ | 1867 | #endif /* CONFIG_PERF_EVENTS */ |
| 1743 | 1868 | ||
| 1744 | static __kprobes | 1869 | static __kprobes |
| 1745 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | 1870 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) |
| 1746 | { | 1871 | { |
| 1872 | struct trace_probe *tp = (struct trace_probe *)event->data; | ||
| 1873 | |||
| 1747 | switch (type) { | 1874 | switch (type) { |
| 1748 | case TRACE_REG_REGISTER: | 1875 | case TRACE_REG_REGISTER: |
| 1749 | return probe_event_enable(event); | 1876 | return enable_trace_probe(tp, TP_FLAG_TRACE); |
| 1750 | case TRACE_REG_UNREGISTER: | 1877 | case TRACE_REG_UNREGISTER: |
| 1751 | probe_event_disable(event); | 1878 | disable_trace_probe(tp, TP_FLAG_TRACE); |
| 1752 | return 0; | 1879 | return 0; |
| 1753 | 1880 | ||
| 1754 | #ifdef CONFIG_PERF_EVENTS | 1881 | #ifdef CONFIG_PERF_EVENTS |
| 1755 | case TRACE_REG_PERF_REGISTER: | 1882 | case TRACE_REG_PERF_REGISTER: |
| 1756 | return probe_perf_enable(event); | 1883 | return enable_trace_probe(tp, TP_FLAG_PROFILE); |
| 1757 | case TRACE_REG_PERF_UNREGISTER: | 1884 | case TRACE_REG_PERF_UNREGISTER: |
| 1758 | probe_perf_disable(event); | 1885 | disable_trace_probe(tp, TP_FLAG_PROFILE); |
| 1759 | return 0; | 1886 | return 0; |
| 1760 | #endif | 1887 | #endif |
| 1761 | } | 1888 | } |
| @@ -1805,7 +1932,7 @@ static int register_probe_event(struct trace_probe *tp) | |||
| 1805 | 1932 | ||
| 1806 | /* Initialize ftrace_event_call */ | 1933 | /* Initialize ftrace_event_call */ |
| 1807 | INIT_LIST_HEAD(&call->class->fields); | 1934 | INIT_LIST_HEAD(&call->class->fields); |
| 1808 | if (probe_is_return(tp)) { | 1935 | if (trace_probe_is_return(tp)) { |
| 1809 | call->event.funcs = &kretprobe_funcs; | 1936 | call->event.funcs = &kretprobe_funcs; |
| 1810 | call->class->define_fields = kretprobe_event_define_fields; | 1937 | call->class->define_fields = kretprobe_event_define_fields; |
| 1811 | } else { | 1938 | } else { |
| @@ -1844,6 +1971,9 @@ static __init int init_kprobe_trace(void) | |||
| 1844 | struct dentry *d_tracer; | 1971 | struct dentry *d_tracer; |
| 1845 | struct dentry *entry; | 1972 | struct dentry *entry; |
| 1846 | 1973 | ||
| 1974 | if (register_module_notifier(&trace_probe_module_nb)) | ||
| 1975 | return -EINVAL; | ||
| 1976 | |||
| 1847 | d_tracer = tracing_init_dentry(); | 1977 | d_tracer = tracing_init_dentry(); |
| 1848 | if (!d_tracer) | 1978 | if (!d_tracer) |
| 1849 | return 0; | 1979 | return 0; |
| @@ -1870,8 +2000,12 @@ fs_initcall(init_kprobe_trace); | |||
| 1870 | 2000 | ||
| 1871 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 2001 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
| 1872 | 2002 | ||
| 1873 | static int kprobe_trace_selftest_target(int a1, int a2, int a3, | 2003 | /* |
| 1874 | int a4, int a5, int a6) | 2004 | * The "__used" keeps gcc from removing the function symbol |
| 2005 | * from the kallsyms table. | ||
| 2006 | */ | ||
| 2007 | static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, | ||
| 2008 | int a4, int a5, int a6) | ||
| 1875 | { | 2009 | { |
| 1876 | return a1 + a2 + a3 + a4 + a5 + a6; | 2010 | return a1 + a2 + a3 + a4 + a5 + a6; |
| 1877 | } | 2011 | } |
| @@ -1893,12 +2027,12 @@ static __init int kprobe_trace_self_tests_init(void) | |||
| 1893 | warn++; | 2027 | warn++; |
| 1894 | } else { | 2028 | } else { |
| 1895 | /* Enable trace point */ | 2029 | /* Enable trace point */ |
| 1896 | tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); | 2030 | tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); |
| 1897 | if (WARN_ON_ONCE(tp == NULL)) { | 2031 | if (WARN_ON_ONCE(tp == NULL)) { |
| 1898 | pr_warning("error on getting new probe.\n"); | 2032 | pr_warning("error on getting new probe.\n"); |
| 1899 | warn++; | 2033 | warn++; |
| 1900 | } else | 2034 | } else |
| 1901 | probe_event_enable(&tp->call); | 2035 | enable_trace_probe(tp, TP_FLAG_TRACE); |
| 1902 | } | 2036 | } |
| 1903 | 2037 | ||
| 1904 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " | 2038 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " |
| @@ -1908,12 +2042,12 @@ static __init int kprobe_trace_self_tests_init(void) | |||
| 1908 | warn++; | 2042 | warn++; |
| 1909 | } else { | 2043 | } else { |
| 1910 | /* Enable trace point */ | 2044 | /* Enable trace point */ |
| 1911 | tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); | 2045 | tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); |
| 1912 | if (WARN_ON_ONCE(tp == NULL)) { | 2046 | if (WARN_ON_ONCE(tp == NULL)) { |
| 1913 | pr_warning("error on getting new probe.\n"); | 2047 | pr_warning("error on getting new probe.\n"); |
| 1914 | warn++; | 2048 | warn++; |
| 1915 | } else | 2049 | } else |
| 1916 | probe_event_enable(&tp->call); | 2050 | enable_trace_probe(tp, TP_FLAG_TRACE); |
| 1917 | } | 2051 | } |
| 1918 | 2052 | ||
| 1919 | if (warn) | 2053 | if (warn) |
| @@ -1934,7 +2068,7 @@ static __init int kprobe_trace_self_tests_init(void) | |||
| 1934 | } | 2068 | } |
| 1935 | 2069 | ||
| 1936 | end: | 2070 | end: |
| 1937 | cleanup_all_probes(); | 2071 | release_all_trace_probes(); |
| 1938 | if (warn) | 2072 | if (warn) |
| 1939 | pr_cont("NG: Some tests are failed. Please check them.\n"); | 2073 | pr_cont("NG: Some tests are failed. Please check them.\n"); |
| 1940 | else | 2074 | else |
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 017fa376505d..fd3c8aae55e5 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
| 14 | 14 | ||
| 15 | #include <asm/atomic.h> | 15 | #include <linux/atomic.h> |
| 16 | 16 | ||
| 17 | #include "trace.h" | 17 | #include "trace.h" |
| 18 | #include "trace_output.h" | 18 | #include "trace_output.h" |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e37de492a9e1..51999309a6cf 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
| 1107 | { | 1107 | { |
| 1108 | struct stack_entry *field; | 1108 | struct stack_entry *field; |
| 1109 | struct trace_seq *s = &iter->seq; | 1109 | struct trace_seq *s = &iter->seq; |
| 1110 | int i; | 1110 | unsigned long *p; |
| 1111 | unsigned long *end; | ||
| 1111 | 1112 | ||
| 1112 | trace_assign_type(field, iter->ent); | 1113 | trace_assign_type(field, iter->ent); |
| 1114 | end = (unsigned long *)((long)iter->ent + iter->ent_size); | ||
| 1113 | 1115 | ||
| 1114 | if (!trace_seq_puts(s, "<stack trace>\n")) | 1116 | if (!trace_seq_puts(s, "<stack trace>\n")) |
| 1115 | goto partial; | 1117 | goto partial; |
| 1116 | for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { | 1118 | |
| 1117 | if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) | 1119 | for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { |
| 1118 | break; | ||
| 1119 | if (!trace_seq_puts(s, " => ")) | 1120 | if (!trace_seq_puts(s, " => ")) |
| 1120 | goto partial; | 1121 | goto partial; |
| 1121 | 1122 | ||
| 1122 | if (!seq_print_ip_sym(s, field->caller[i], flags)) | 1123 | if (!seq_print_ip_sym(s, *p, flags)) |
| 1123 | goto partial; | 1124 | goto partial; |
| 1124 | if (!trace_seq_puts(s, "\n")) | 1125 | if (!trace_seq_puts(s, "\n")) |
| 1125 | goto partial; | 1126 | goto partial; |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index dff763b7baf1..1f06468a10d7 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
| @@ -240,13 +240,10 @@ static const char **find_next(void *v, loff_t *pos) | |||
| 240 | const char **fmt = v; | 240 | const char **fmt = v; |
| 241 | int start_index; | 241 | int start_index; |
| 242 | 242 | ||
| 243 | if (!fmt) | ||
| 244 | fmt = __start___trace_bprintk_fmt + *pos; | ||
| 245 | |||
| 246 | start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; | 243 | start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; |
| 247 | 244 | ||
| 248 | if (*pos < start_index) | 245 | if (*pos < start_index) |
| 249 | return fmt; | 246 | return __start___trace_bprintk_fmt + *pos; |
| 250 | 247 | ||
| 251 | return find_next_mod_format(start_index, v, fmt, pos); | 248 | return find_next_mod_format(start_index, v, fmt, pos); |
| 252 | } | 249 | } |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index f029dd4fd2ca..e4a70c0c71b6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter) | |||
| 227 | graph_trace_close(iter); | 227 | graph_trace_close(iter); |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) | 230 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \ |
| 231 | TRACE_GRAPH_PRINT_ABS_TIME | \ | ||
| 232 | TRACE_GRAPH_PRINT_DURATION) | ||
| 231 | 233 | ||
| 232 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | 234 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) |
| 233 | { | 235 | { |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b0b53b8e4c25..77575b386d97 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
| @@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
| 156 | { | 156 | { |
| 157 | long *ptr = filp->private_data; | 157 | long *ptr = filp->private_data; |
| 158 | unsigned long val, flags; | 158 | unsigned long val, flags; |
| 159 | char buf[64]; | ||
| 160 | int ret; | 159 | int ret; |
| 161 | int cpu; | 160 | int cpu; |
| 162 | 161 | ||
| 163 | if (count >= sizeof(buf)) | 162 | ret = kstrtoul_from_user(ubuf, count, 10, &val); |
| 164 | return -EINVAL; | 163 | if (ret) |
| 165 | |||
| 166 | if (copy_from_user(&buf, ubuf, count)) | ||
| 167 | return -EFAULT; | ||
| 168 | |||
| 169 | buf[count] = 0; | ||
| 170 | |||
| 171 | ret = strict_strtoul(buf, 10, &val); | ||
| 172 | if (ret < 0) | ||
| 173 | return ret; | 164 | return ret; |
| 174 | 165 | ||
| 175 | local_irq_save(flags); | 166 | local_irq_save(flags); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 3d0c56ad4792..36491cd5b7d4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -200,6 +200,7 @@ static int is_softlockup(unsigned long touch_ts) | |||
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 202 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
| 203 | |||
| 203 | static struct perf_event_attr wd_hw_attr = { | 204 | static struct perf_event_attr wd_hw_attr = { |
| 204 | .type = PERF_TYPE_HARDWARE, | 205 | .type = PERF_TYPE_HARDWARE, |
| 205 | .config = PERF_COUNT_HW_CPU_CYCLES, | 206 | .config = PERF_COUNT_HW_CPU_CYCLES, |
| @@ -209,7 +210,7 @@ static struct perf_event_attr wd_hw_attr = { | |||
| 209 | }; | 210 | }; |
| 210 | 211 | ||
| 211 | /* Callback function for perf event subsystem */ | 212 | /* Callback function for perf event subsystem */ |
| 212 | static void watchdog_overflow_callback(struct perf_event *event, int nmi, | 213 | static void watchdog_overflow_callback(struct perf_event *event, |
| 213 | struct perf_sample_data *data, | 214 | struct perf_sample_data *data, |
| 214 | struct pt_regs *regs) | 215 | struct pt_regs *regs) |
| 215 | { | 216 | { |
| @@ -368,10 +369,11 @@ static int watchdog_nmi_enable(int cpu) | |||
| 368 | if (event != NULL) | 369 | if (event != NULL) |
| 369 | goto out_enable; | 370 | goto out_enable; |
| 370 | 371 | ||
| 371 | /* Try to register using hardware perf events */ | ||
| 372 | wd_attr = &wd_hw_attr; | 372 | wd_attr = &wd_hw_attr; |
| 373 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); | 373 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); |
| 374 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); | 374 | |
| 375 | /* Try to register using hardware perf events */ | ||
| 376 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | ||
| 375 | if (!IS_ERR(event)) { | 377 | if (!IS_ERR(event)) { |
| 376 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); | 378 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); |
| 377 | goto out_save; | 379 | goto out_save; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0400553f0d04..25fb1b0e53fa 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -221,7 +221,7 @@ typedef unsigned long mayday_mask_t; | |||
| 221 | * per-CPU workqueues: | 221 | * per-CPU workqueues: |
| 222 | */ | 222 | */ |
| 223 | struct workqueue_struct { | 223 | struct workqueue_struct { |
| 224 | unsigned int flags; /* I: WQ_* flags */ | 224 | unsigned int flags; /* W: WQ_* flags */ |
| 225 | union { | 225 | union { |
| 226 | struct cpu_workqueue_struct __percpu *pcpu; | 226 | struct cpu_workqueue_struct __percpu *pcpu; |
| 227 | struct cpu_workqueue_struct *single; | 227 | struct cpu_workqueue_struct *single; |
| @@ -240,6 +240,7 @@ struct workqueue_struct { | |||
| 240 | mayday_mask_t mayday_mask; /* cpus requesting rescue */ | 240 | mayday_mask_t mayday_mask; /* cpus requesting rescue */ |
| 241 | struct worker *rescuer; /* I: rescue worker */ | 241 | struct worker *rescuer; /* I: rescue worker */ |
| 242 | 242 | ||
| 243 | int nr_drainers; /* W: drain in progress */ | ||
| 243 | int saved_max_active; /* W: saved cwq max_active */ | 244 | int saved_max_active; /* W: saved cwq max_active */ |
| 244 | const char *name; /* I: workqueue name */ | 245 | const char *name; /* I: workqueue name */ |
| 245 | #ifdef CONFIG_LOCKDEP | 246 | #ifdef CONFIG_LOCKDEP |
| @@ -990,7 +991,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 990 | debug_work_activate(work); | 991 | debug_work_activate(work); |
| 991 | 992 | ||
| 992 | /* if dying, only works from the same workqueue are allowed */ | 993 | /* if dying, only works from the same workqueue are allowed */ |
| 993 | if (unlikely(wq->flags & WQ_DYING) && | 994 | if (unlikely(wq->flags & WQ_DRAINING) && |
| 994 | WARN_ON_ONCE(!is_chained_work(wq))) | 995 | WARN_ON_ONCE(!is_chained_work(wq))) |
| 995 | return; | 996 | return; |
| 996 | 997 | ||
| @@ -2381,6 +2382,54 @@ out_unlock: | |||
| 2381 | } | 2382 | } |
| 2382 | EXPORT_SYMBOL_GPL(flush_workqueue); | 2383 | EXPORT_SYMBOL_GPL(flush_workqueue); |
| 2383 | 2384 | ||
| 2385 | /** | ||
| 2386 | * drain_workqueue - drain a workqueue | ||
| 2387 | * @wq: workqueue to drain | ||
| 2388 | * | ||
| 2389 | * Wait until the workqueue becomes empty. While draining is in progress, | ||
| 2390 | * only chain queueing is allowed. IOW, only currently pending or running | ||
| 2391 | * work items on @wq can queue further work items on it. @wq is flushed | ||
| 2392 | * repeatedly until it becomes empty. The number of flushing is detemined | ||
| 2393 | * by the depth of chaining and should be relatively short. Whine if it | ||
| 2394 | * takes too long. | ||
| 2395 | */ | ||
| 2396 | void drain_workqueue(struct workqueue_struct *wq) | ||
| 2397 | { | ||
| 2398 | unsigned int flush_cnt = 0; | ||
| 2399 | unsigned int cpu; | ||
| 2400 | |||
| 2401 | /* | ||
| 2402 | * __queue_work() needs to test whether there are drainers, is much | ||
| 2403 | * hotter than drain_workqueue() and already looks at @wq->flags. | ||
| 2404 | * Use WQ_DRAINING so that queue doesn't have to check nr_drainers. | ||
| 2405 | */ | ||
| 2406 | spin_lock(&workqueue_lock); | ||
| 2407 | if (!wq->nr_drainers++) | ||
| 2408 | wq->flags |= WQ_DRAINING; | ||
| 2409 | spin_unlock(&workqueue_lock); | ||
| 2410 | reflush: | ||
| 2411 | flush_workqueue(wq); | ||
| 2412 | |||
| 2413 | for_each_cwq_cpu(cpu, wq) { | ||
| 2414 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | ||
| 2415 | |||
| 2416 | if (!cwq->nr_active && list_empty(&cwq->delayed_works)) | ||
| 2417 | continue; | ||
| 2418 | |||
| 2419 | if (++flush_cnt == 10 || | ||
| 2420 | (flush_cnt % 100 == 0 && flush_cnt <= 1000)) | ||
| 2421 | pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n", | ||
| 2422 | wq->name, flush_cnt); | ||
| 2423 | goto reflush; | ||
| 2424 | } | ||
| 2425 | |||
| 2426 | spin_lock(&workqueue_lock); | ||
| 2427 | if (!--wq->nr_drainers) | ||
| 2428 | wq->flags &= ~WQ_DRAINING; | ||
| 2429 | spin_unlock(&workqueue_lock); | ||
| 2430 | } | ||
| 2431 | EXPORT_SYMBOL_GPL(drain_workqueue); | ||
| 2432 | |||
| 2384 | static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, | 2433 | static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, |
| 2385 | bool wait_executing) | 2434 | bool wait_executing) |
| 2386 | { | 2435 | { |
| @@ -3009,34 +3058,10 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key); | |||
| 3009 | */ | 3058 | */ |
| 3010 | void destroy_workqueue(struct workqueue_struct *wq) | 3059 | void destroy_workqueue(struct workqueue_struct *wq) |
| 3011 | { | 3060 | { |
| 3012 | unsigned int flush_cnt = 0; | ||
| 3013 | unsigned int cpu; | 3061 | unsigned int cpu; |
| 3014 | 3062 | ||
| 3015 | /* | 3063 | /* drain it before proceeding with destruction */ |
| 3016 | * Mark @wq dying and drain all pending works. Once WQ_DYING is | 3064 | drain_workqueue(wq); |
| 3017 | * set, only chain queueing is allowed. IOW, only currently | ||
| 3018 | * pending or running work items on @wq can queue further work | ||
| 3019 | * items on it. @wq is flushed repeatedly until it becomes empty. | ||
| 3020 | * The number of flushing is detemined by the depth of chaining and | ||
| 3021 | * should be relatively short. Whine if it takes too long. | ||
| 3022 | */ | ||
| 3023 | wq->flags |= WQ_DYING; | ||
| 3024 | reflush: | ||
| 3025 | flush_workqueue(wq); | ||
| 3026 | |||
| 3027 | for_each_cwq_cpu(cpu, wq) { | ||
| 3028 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | ||
| 3029 | |||
| 3030 | if (!cwq->nr_active && list_empty(&cwq->delayed_works)) | ||
| 3031 | continue; | ||
| 3032 | |||
| 3033 | if (++flush_cnt == 10 || | ||
| 3034 | (flush_cnt % 100 == 0 && flush_cnt <= 1000)) | ||
| 3035 | printk(KERN_WARNING "workqueue %s: flush on " | ||
| 3036 | "destruction isn't complete after %u tries\n", | ||
| 3037 | wq->name, flush_cnt); | ||
| 3038 | goto reflush; | ||
| 3039 | } | ||
| 3040 | 3065 | ||
| 3041 | /* | 3066 | /* |
| 3042 | * wq list is used to freeze wq, remove from list after | 3067 | * wq list is used to freeze wq, remove from list after |
