diff options
author | Herbert Xu <herbert@gondor.apana.org.au> | 2013-09-06 22:53:35 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2013-09-06 22:53:35 -0400 |
commit | eeca9fad52fc4bfdf42c38bfcf383e932eb3e9d6 (patch) | |
tree | cc51c880459d41c0e8d7576405bef4c987bc7aa0 /kernel | |
parent | ff6f83fc9d44db09997937c3475d525a6866fbb4 (diff) | |
parent | b48a97be8e6c2afdba2f3b61fd88c3c7743fbd73 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
Merge upstream tree in order to reinstate crct10dif.
Diffstat (limited to 'kernel')
69 files changed, 2886 insertions, 1908 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 271fd3119af9..470839d1a30e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ | |||
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o cred.o \ | 12 | notifier.o ksysfs.o cred.o reboot.o \ |
13 | async.o range.o groups.o lglock.o smpboot.o | 13 | async.o range.o groups.o lglock.o smpboot.o |
14 | 14 | ||
15 | ifdef CONFIG_FUNCTION_TRACER | 15 | ifdef CONFIG_FUNCTION_TRACER |
diff --git a/kernel/audit.h b/kernel/audit.h index 1c95131ef760..123c9b7c3979 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -85,6 +85,7 @@ struct audit_names { | |||
85 | 85 | ||
86 | struct filename *name; | 86 | struct filename *name; |
87 | int name_len; /* number of chars to log */ | 87 | int name_len; /* number of chars to log */ |
88 | bool hidden; /* don't log this record */ | ||
88 | bool name_put; /* call __putname()? */ | 89 | bool name_put; /* call __putname()? */ |
89 | 90 | ||
90 | unsigned long ino; | 91 | unsigned long ino; |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6bd4a90d1991..f7aee8be7fb2 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -423,7 +423,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
423 | f->lsm_rule = NULL; | 423 | f->lsm_rule = NULL; |
424 | 424 | ||
425 | /* Support legacy tests for a valid loginuid */ | 425 | /* Support legacy tests for a valid loginuid */ |
426 | if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) { | 426 | if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { |
427 | f->type = AUDIT_LOGINUID_SET; | 427 | f->type = AUDIT_LOGINUID_SET; |
428 | f->val = 0; | 428 | f->val = 0; |
429 | } | 429 | } |
@@ -865,6 +865,12 @@ static inline int audit_add_rule(struct audit_entry *entry) | |||
865 | err = audit_add_watch(&entry->rule, &list); | 865 | err = audit_add_watch(&entry->rule, &list); |
866 | if (err) { | 866 | if (err) { |
867 | mutex_unlock(&audit_filter_mutex); | 867 | mutex_unlock(&audit_filter_mutex); |
868 | /* | ||
869 | * normally audit_add_tree_rule() will free it | ||
870 | * on failure | ||
871 | */ | ||
872 | if (tree) | ||
873 | audit_put_tree(tree); | ||
868 | goto error; | 874 | goto error; |
869 | } | 875 | } |
870 | } | 876 | } |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3c8a601324a2..9845cb32b60a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1399,8 +1399,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1399 | } | 1399 | } |
1400 | 1400 | ||
1401 | i = 0; | 1401 | i = 0; |
1402 | list_for_each_entry(n, &context->names_list, list) | 1402 | list_for_each_entry(n, &context->names_list, list) { |
1403 | if (n->hidden) | ||
1404 | continue; | ||
1403 | audit_log_name(context, n, NULL, i++, &call_panic); | 1405 | audit_log_name(context, n, NULL, i++, &call_panic); |
1406 | } | ||
1404 | 1407 | ||
1405 | /* Send end of event record to help user space know we are finished */ | 1408 | /* Send end of event record to help user space know we are finished */ |
1406 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); | 1409 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); |
@@ -1769,14 +1772,15 @@ void audit_putname(struct filename *name) | |||
1769 | * __audit_inode - store the inode and device from a lookup | 1772 | * __audit_inode - store the inode and device from a lookup |
1770 | * @name: name being audited | 1773 | * @name: name being audited |
1771 | * @dentry: dentry being audited | 1774 | * @dentry: dentry being audited |
1772 | * @parent: does this dentry represent the parent? | 1775 | * @flags: attributes for this particular entry |
1773 | */ | 1776 | */ |
1774 | void __audit_inode(struct filename *name, const struct dentry *dentry, | 1777 | void __audit_inode(struct filename *name, const struct dentry *dentry, |
1775 | unsigned int parent) | 1778 | unsigned int flags) |
1776 | { | 1779 | { |
1777 | struct audit_context *context = current->audit_context; | 1780 | struct audit_context *context = current->audit_context; |
1778 | const struct inode *inode = dentry->d_inode; | 1781 | const struct inode *inode = dentry->d_inode; |
1779 | struct audit_names *n; | 1782 | struct audit_names *n; |
1783 | bool parent = flags & AUDIT_INODE_PARENT; | ||
1780 | 1784 | ||
1781 | if (!context->in_syscall) | 1785 | if (!context->in_syscall) |
1782 | return; | 1786 | return; |
@@ -1831,6 +1835,8 @@ out: | |||
1831 | if (parent) { | 1835 | if (parent) { |
1832 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; | 1836 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; |
1833 | n->type = AUDIT_TYPE_PARENT; | 1837 | n->type = AUDIT_TYPE_PARENT; |
1838 | if (flags & AUDIT_INODE_HIDDEN) | ||
1839 | n->hidden = true; | ||
1834 | } else { | 1840 | } else { |
1835 | n->name_len = AUDIT_NAME_FULL; | 1841 | n->name_len = AUDIT_NAME_FULL; |
1836 | n->type = AUDIT_TYPE_NORMAL; | 1842 | n->type = AUDIT_TYPE_NORMAL; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e5583d10a325..789ec4683db3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -802,7 +802,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
802 | */ | 802 | */ |
803 | 803 | ||
804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); | 804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
805 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); | ||
806 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 805 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
807 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 806 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, |
808 | unsigned long subsys_mask); | 807 | unsigned long subsys_mask); |
@@ -1846,36 +1845,43 @@ out: | |||
1846 | EXPORT_SYMBOL_GPL(cgroup_path); | 1845 | EXPORT_SYMBOL_GPL(cgroup_path); |
1847 | 1846 | ||
1848 | /** | 1847 | /** |
1849 | * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy | 1848 | * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy |
1850 | * @task: target task | 1849 | * @task: target task |
1851 | * @hierarchy_id: the hierarchy to look up @task's cgroup from | ||
1852 | * @buf: the buffer to write the path into | 1850 | * @buf: the buffer to write the path into |
1853 | * @buflen: the length of the buffer | 1851 | * @buflen: the length of the buffer |
1854 | * | 1852 | * |
1855 | * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and | 1853 | * Determine @task's cgroup on the first (the one with the lowest non-zero |
1856 | * copy its path into @buf. This function grabs cgroup_mutex and shouldn't | 1854 | * hierarchy_id) cgroup hierarchy and copy its path into @buf. This |
1857 | * be used inside locks used by cgroup controller callbacks. | 1855 | * function grabs cgroup_mutex and shouldn't be used inside locks used by |
1856 | * cgroup controller callbacks. | ||
1857 | * | ||
1858 | * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short. | ||
1858 | */ | 1859 | */ |
1859 | int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, | 1860 | int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) |
1860 | char *buf, size_t buflen) | ||
1861 | { | 1861 | { |
1862 | struct cgroupfs_root *root; | 1862 | struct cgroupfs_root *root; |
1863 | struct cgroup *cgrp = NULL; | 1863 | struct cgroup *cgrp; |
1864 | int ret = -ENOENT; | 1864 | int hierarchy_id = 1, ret = 0; |
1865 | |||
1866 | if (buflen < 2) | ||
1867 | return -ENAMETOOLONG; | ||
1865 | 1868 | ||
1866 | mutex_lock(&cgroup_mutex); | 1869 | mutex_lock(&cgroup_mutex); |
1867 | 1870 | ||
1868 | root = idr_find(&cgroup_hierarchy_idr, hierarchy_id); | 1871 | root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); |
1872 | |||
1869 | if (root) { | 1873 | if (root) { |
1870 | cgrp = task_cgroup_from_root(task, root); | 1874 | cgrp = task_cgroup_from_root(task, root); |
1871 | ret = cgroup_path(cgrp, buf, buflen); | 1875 | ret = cgroup_path(cgrp, buf, buflen); |
1876 | } else { | ||
1877 | /* if no hierarchy exists, everyone is in "/" */ | ||
1878 | memcpy(buf, "/", 2); | ||
1872 | } | 1879 | } |
1873 | 1880 | ||
1874 | mutex_unlock(&cgroup_mutex); | 1881 | mutex_unlock(&cgroup_mutex); |
1875 | |||
1876 | return ret; | 1882 | return ret; |
1877 | } | 1883 | } |
1878 | EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy); | 1884 | EXPORT_SYMBOL_GPL(task_cgroup_path); |
1879 | 1885 | ||
1880 | /* | 1886 | /* |
1881 | * Control Group taskset | 1887 | * Control Group taskset |
@@ -2642,7 +2648,7 @@ static const struct inode_operations cgroup_file_inode_operations = { | |||
2642 | }; | 2648 | }; |
2643 | 2649 | ||
2644 | static const struct inode_operations cgroup_dir_inode_operations = { | 2650 | static const struct inode_operations cgroup_dir_inode_operations = { |
2645 | .lookup = cgroup_lookup, | 2651 | .lookup = simple_lookup, |
2646 | .mkdir = cgroup_mkdir, | 2652 | .mkdir = cgroup_mkdir, |
2647 | .rmdir = cgroup_rmdir, | 2653 | .rmdir = cgroup_rmdir, |
2648 | .rename = cgroup_rename, | 2654 | .rename = cgroup_rename, |
@@ -2652,14 +2658,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { | |||
2652 | .removexattr = cgroup_removexattr, | 2658 | .removexattr = cgroup_removexattr, |
2653 | }; | 2659 | }; |
2654 | 2660 | ||
2655 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | ||
2656 | { | ||
2657 | if (dentry->d_name.len > NAME_MAX) | ||
2658 | return ERR_PTR(-ENAMETOOLONG); | ||
2659 | d_add(dentry, NULL); | ||
2660 | return NULL; | ||
2661 | } | ||
2662 | |||
2663 | /* | 2661 | /* |
2664 | * Check if a file is a control file | 2662 | * Check if a file is a control file |
2665 | */ | 2663 | */ |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 198a38883e64..b2b227b82123 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -366,7 +366,7 @@ EXPORT_SYMBOL(cpu_down); | |||
366 | #endif /*CONFIG_HOTPLUG_CPU*/ | 366 | #endif /*CONFIG_HOTPLUG_CPU*/ |
367 | 367 | ||
368 | /* Requires cpu_add_remove_lock to be held */ | 368 | /* Requires cpu_add_remove_lock to be held */ |
369 | static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | 369 | static int _cpu_up(unsigned int cpu, int tasks_frozen) |
370 | { | 370 | { |
371 | int ret, nr_calls = 0; | 371 | int ret, nr_calls = 0; |
372 | void *hcpu = (void *)(long)cpu; | 372 | void *hcpu = (void *)(long)cpu; |
@@ -419,7 +419,7 @@ out: | |||
419 | return ret; | 419 | return ret; |
420 | } | 420 | } |
421 | 421 | ||
422 | int __cpuinit cpu_up(unsigned int cpu) | 422 | int cpu_up(unsigned int cpu) |
423 | { | 423 | { |
424 | int err = 0; | 424 | int err = 0; |
425 | 425 | ||
@@ -618,7 +618,7 @@ core_initcall(cpu_hotplug_pm_sync_init); | |||
618 | * It must be called by the arch code on the new cpu, before the new cpu | 618 | * It must be called by the arch code on the new cpu, before the new cpu |
619 | * enables interrupts and before the "boot" cpu returns from __cpu_up(). | 619 | * enables interrupts and before the "boot" cpu returns from __cpu_up(). |
620 | */ | 620 | */ |
621 | void __cpuinit notify_cpu_starting(unsigned int cpu) | 621 | void notify_cpu_starting(unsigned int cpu) |
622 | { | 622 | { |
623 | unsigned long val = CPU_STARTING; | 623 | unsigned long val = CPU_STARTING; |
624 | 624 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1db3af933704..f86599e8c123 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -182,7 +182,7 @@ void update_perf_cpu_limits(void) | |||
182 | u64 tmp = perf_sample_period_ns; | 182 | u64 tmp = perf_sample_period_ns; |
183 | 183 | ||
184 | tmp *= sysctl_perf_cpu_time_max_percent; | 184 | tmp *= sysctl_perf_cpu_time_max_percent; |
185 | tmp = do_div(tmp, 100); | 185 | do_div(tmp, 100); |
186 | atomic_set(&perf_sample_allowed_ns, tmp); | 186 | atomic_set(&perf_sample_allowed_ns, tmp); |
187 | } | 187 | } |
188 | 188 | ||
@@ -232,7 +232,7 @@ DEFINE_PER_CPU(u64, running_sample_length); | |||
232 | void perf_sample_event_took(u64 sample_len_ns) | 232 | void perf_sample_event_took(u64 sample_len_ns) |
233 | { | 233 | { |
234 | u64 avg_local_sample_len; | 234 | u64 avg_local_sample_len; |
235 | u64 local_samples_len = __get_cpu_var(running_sample_length); | 235 | u64 local_samples_len; |
236 | 236 | ||
237 | if (atomic_read(&perf_sample_allowed_ns) == 0) | 237 | if (atomic_read(&perf_sample_allowed_ns) == 0) |
238 | return; | 238 | return; |
@@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) | |||
947 | { | 947 | { |
948 | struct perf_event_context *ctx; | 948 | struct perf_event_context *ctx; |
949 | 949 | ||
950 | rcu_read_lock(); | ||
951 | retry: | 950 | retry: |
951 | /* | ||
952 | * One of the few rules of preemptible RCU is that one cannot do | ||
953 | * rcu_read_unlock() while holding a scheduler (or nested) lock when | ||
954 | * part of the read side critical section was preemptible -- see | ||
955 | * rcu_read_unlock_special(). | ||
956 | * | ||
957 | * Since ctx->lock nests under rq->lock we must ensure the entire read | ||
958 | * side critical section is non-preemptible. | ||
959 | */ | ||
960 | preempt_disable(); | ||
961 | rcu_read_lock(); | ||
952 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); | 962 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); |
953 | if (ctx) { | 963 | if (ctx) { |
954 | /* | 964 | /* |
@@ -964,6 +974,8 @@ retry: | |||
964 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 974 | raw_spin_lock_irqsave(&ctx->lock, *flags); |
965 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { | 975 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { |
966 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 976 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
977 | rcu_read_unlock(); | ||
978 | preempt_enable(); | ||
967 | goto retry; | 979 | goto retry; |
968 | } | 980 | } |
969 | 981 | ||
@@ -973,6 +985,7 @@ retry: | |||
973 | } | 985 | } |
974 | } | 986 | } |
975 | rcu_read_unlock(); | 987 | rcu_read_unlock(); |
988 | preempt_enable(); | ||
976 | return ctx; | 989 | return ctx; |
977 | } | 990 | } |
978 | 991 | ||
@@ -1950,7 +1963,16 @@ static int __perf_event_enable(void *info) | |||
1950 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1963 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1951 | int err; | 1964 | int err; |
1952 | 1965 | ||
1953 | if (WARN_ON_ONCE(!ctx->is_active)) | 1966 | /* |
1967 | * There's a time window between 'ctx->is_active' check | ||
1968 | * in perf_event_enable function and this place having: | ||
1969 | * - IRQs on | ||
1970 | * - ctx->lock unlocked | ||
1971 | * | ||
1972 | * where the task could be killed and 'ctx' deactivated | ||
1973 | * by perf_event_exit_task. | ||
1974 | */ | ||
1975 | if (!ctx->is_active) | ||
1954 | return -EINVAL; | 1976 | return -EINVAL; |
1955 | 1977 | ||
1956 | raw_spin_lock(&ctx->lock); | 1978 | raw_spin_lock(&ctx->lock); |
@@ -6212,8 +6234,6 @@ perf_event_mux_interval_ms_store(struct device *dev, | |||
6212 | return count; | 6234 | return count; |
6213 | } | 6235 | } |
6214 | 6236 | ||
6215 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | ||
6216 | |||
6217 | static struct device_attribute pmu_dev_attrs[] = { | 6237 | static struct device_attribute pmu_dev_attrs[] = { |
6218 | __ATTR_RO(type), | 6238 | __ATTR_RO(type), |
6219 | __ATTR_RW(perf_event_mux_interval_ms), | 6239 | __ATTR_RW(perf_event_mux_interval_ms), |
@@ -7465,7 +7485,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
7465 | * child. | 7485 | * child. |
7466 | */ | 7486 | */ |
7467 | 7487 | ||
7468 | child_ctx = alloc_perf_context(event->pmu, child); | 7488 | child_ctx = alloc_perf_context(parent_ctx->pmu, child); |
7469 | if (!child_ctx) | 7489 | if (!child_ctx) |
7470 | return -ENOMEM; | 7490 | return -ENOMEM; |
7471 | 7491 | ||
@@ -7608,7 +7628,7 @@ static void __init perf_event_init_all_cpus(void) | |||
7608 | } | 7628 | } |
7609 | } | 7629 | } |
7610 | 7630 | ||
7611 | static void __cpuinit perf_event_init_cpu(int cpu) | 7631 | static void perf_event_init_cpu(int cpu) |
7612 | { | 7632 | { |
7613 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 7633 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
7614 | 7634 | ||
@@ -7697,7 +7717,7 @@ static struct notifier_block perf_reboot_notifier = { | |||
7697 | .priority = INT_MIN, | 7717 | .priority = INT_MIN, |
7698 | }; | 7718 | }; |
7699 | 7719 | ||
7700 | static int __cpuinit | 7720 | static int |
7701 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | 7721 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) |
7702 | { | 7722 | { |
7703 | unsigned int cpu = (long)hcpu; | 7723 | unsigned int cpu = (long)hcpu; |
diff --git a/kernel/exit.c b/kernel/exit.c index fafe75d9e6f6..a949819055d5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -808,7 +808,7 @@ void do_exit(long code) | |||
808 | /* | 808 | /* |
809 | * FIXME: do that only when needed, using sched_exit tracepoint | 809 | * FIXME: do that only when needed, using sched_exit tracepoint |
810 | */ | 810 | */ |
811 | ptrace_put_breakpoints(tsk); | 811 | flush_ptrace_hw_breakpoint(tsk); |
812 | 812 | ||
813 | exit_notify(tsk, group_dead); | 813 | exit_notify(tsk, group_dead); |
814 | #ifdef CONFIG_NUMA | 814 | #ifdef CONFIG_NUMA |
diff --git a/kernel/fork.c b/kernel/fork.c index 6e6a1c11b3e5..403d2bb8a968 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -365,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
365 | mm->locked_vm = 0; | 365 | mm->locked_vm = 0; |
366 | mm->mmap = NULL; | 366 | mm->mmap = NULL; |
367 | mm->mmap_cache = NULL; | 367 | mm->mmap_cache = NULL; |
368 | mm->free_area_cache = oldmm->mmap_base; | ||
369 | mm->cached_hole_size = ~0UL; | ||
370 | mm->map_count = 0; | 368 | mm->map_count = 0; |
371 | cpumask_clear(mm_cpumask(mm)); | 369 | cpumask_clear(mm_cpumask(mm)); |
372 | mm->mm_rb = RB_ROOT; | 370 | mm->mm_rb = RB_ROOT; |
@@ -540,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
540 | mm->nr_ptes = 0; | 538 | mm->nr_ptes = 0; |
541 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); | 539 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
542 | spin_lock_init(&mm->page_table_lock); | 540 | spin_lock_init(&mm->page_table_lock); |
543 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
544 | mm->cached_hole_size = ~0UL; | ||
545 | mm_init_aio(mm); | 541 | mm_init_aio(mm); |
546 | mm_init_owner(mm, p); | 542 | mm_init_owner(mm, p); |
547 | 543 | ||
@@ -1550,7 +1546,7 @@ static inline void init_idle_pids(struct pid_link *links) | |||
1550 | } | 1546 | } |
1551 | } | 1547 | } |
1552 | 1548 | ||
1553 | struct task_struct * __cpuinit fork_idle(int cpu) | 1549 | struct task_struct *fork_idle(int cpu) |
1554 | { | 1550 | { |
1555 | struct task_struct *task; | 1551 | struct task_struct *task; |
1556 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); | 1552 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3ee4d06c6fc2..383319bae3f7 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -722,17 +722,20 @@ static int hrtimer_switch_to_hres(void) | |||
722 | return 1; | 722 | return 1; |
723 | } | 723 | } |
724 | 724 | ||
725 | static void clock_was_set_work(struct work_struct *work) | ||
726 | { | ||
727 | clock_was_set(); | ||
728 | } | ||
729 | |||
730 | static DECLARE_WORK(hrtimer_work, clock_was_set_work); | ||
731 | |||
725 | /* | 732 | /* |
726 | * Called from timekeeping code to reprogramm the hrtimer interrupt | 733 | * Called from timekeeping and resume code to reprogramm the hrtimer |
727 | * device. If called from the timer interrupt context we defer it to | 734 | * interrupt device on all cpus. |
728 | * softirq context. | ||
729 | */ | 735 | */ |
730 | void clock_was_set_delayed(void) | 736 | void clock_was_set_delayed(void) |
731 | { | 737 | { |
732 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 738 | schedule_work(&hrtimer_work); |
733 | |||
734 | cpu_base->clock_was_set = 1; | ||
735 | __raise_softirq_irqoff(HRTIMER_SOFTIRQ); | ||
736 | } | 739 | } |
737 | 740 | ||
738 | #else | 741 | #else |
@@ -774,15 +777,19 @@ void clock_was_set(void) | |||
774 | 777 | ||
775 | /* | 778 | /* |
776 | * During resume we might have to reprogram the high resolution timer | 779 | * During resume we might have to reprogram the high resolution timer |
777 | * interrupt (on the local CPU): | 780 | * interrupt on all online CPUs. However, all other CPUs will be |
781 | * stopped with IRQs interrupts disabled so the clock_was_set() call | ||
782 | * must be deferred. | ||
778 | */ | 783 | */ |
779 | void hrtimers_resume(void) | 784 | void hrtimers_resume(void) |
780 | { | 785 | { |
781 | WARN_ONCE(!irqs_disabled(), | 786 | WARN_ONCE(!irqs_disabled(), |
782 | KERN_INFO "hrtimers_resume() called with IRQs enabled!"); | 787 | KERN_INFO "hrtimers_resume() called with IRQs enabled!"); |
783 | 788 | ||
789 | /* Retrigger on the local CPU */ | ||
784 | retrigger_next_event(NULL); | 790 | retrigger_next_event(NULL); |
785 | timerfd_clock_was_set(); | 791 | /* And schedule a retrigger for all others */ |
792 | clock_was_set_delayed(); | ||
786 | } | 793 | } |
787 | 794 | ||
788 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | 795 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) |
@@ -1433,13 +1440,6 @@ void hrtimer_peek_ahead_timers(void) | |||
1433 | 1440 | ||
1434 | static void run_hrtimer_softirq(struct softirq_action *h) | 1441 | static void run_hrtimer_softirq(struct softirq_action *h) |
1435 | { | 1442 | { |
1436 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
1437 | |||
1438 | if (cpu_base->clock_was_set) { | ||
1439 | cpu_base->clock_was_set = 0; | ||
1440 | clock_was_set(); | ||
1441 | } | ||
1442 | |||
1443 | hrtimer_peek_ahead_timers(); | 1443 | hrtimer_peek_ahead_timers(); |
1444 | } | 1444 | } |
1445 | 1445 | ||
@@ -1659,7 +1659,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |||
1659 | /* | 1659 | /* |
1660 | * Functions related to boot-time initialization: | 1660 | * Functions related to boot-time initialization: |
1661 | */ | 1661 | */ |
1662 | static void __cpuinit init_hrtimers_cpu(int cpu) | 1662 | static void init_hrtimers_cpu(int cpu) |
1663 | { | 1663 | { |
1664 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); | 1664 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); |
1665 | int i; | 1665 | int i; |
@@ -1740,7 +1740,7 @@ static void migrate_hrtimers(int scpu) | |||
1740 | 1740 | ||
1741 | #endif /* CONFIG_HOTPLUG_CPU */ | 1741 | #endif /* CONFIG_HOTPLUG_CPU */ |
1742 | 1742 | ||
1743 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | 1743 | static int hrtimer_cpu_notify(struct notifier_block *self, |
1744 | unsigned long action, void *hcpu) | 1744 | unsigned long action, void *hcpu) |
1745 | { | 1745 | { |
1746 | int scpu = (long)hcpu; | 1746 | int scpu = (long)hcpu; |
@@ -1773,7 +1773,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
1773 | return NOTIFY_OK; | 1773 | return NOTIFY_OK; |
1774 | } | 1774 | } |
1775 | 1775 | ||
1776 | static struct notifier_block __cpuinitdata hrtimers_nb = { | 1776 | static struct notifier_block hrtimers_nb = { |
1777 | .notifier_call = hrtimer_cpu_notify, | 1777 | .notifier_call = hrtimer_cpu_notify, |
1778 | }; | 1778 | }; |
1779 | 1779 | ||
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index e3544c19bdd2..452d6f2ba21d 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
@@ -275,10 +275,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, | |||
275 | if (d->gc) | 275 | if (d->gc) |
276 | return -EBUSY; | 276 | return -EBUSY; |
277 | 277 | ||
278 | if (d->revmap_type != IRQ_DOMAIN_MAP_LINEAR) | 278 | numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); |
279 | return -EINVAL; | ||
280 | |||
281 | numchips = d->revmap_data.linear.size / irqs_per_chip; | ||
282 | if (!numchips) | 279 | if (!numchips) |
283 | return -EINVAL; | 280 | return -EINVAL; |
284 | 281 | ||
@@ -310,6 +307,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, | |||
310 | /* Calc pointer to the next generic chip */ | 307 | /* Calc pointer to the next generic chip */ |
311 | tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); | 308 | tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); |
312 | } | 309 | } |
310 | d->name = name; | ||
313 | return 0; | 311 | return 0; |
314 | } | 312 | } |
315 | EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); | 313 | EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1ed8dff17eb9..706724e9835d 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -23,9 +23,11 @@ static DEFINE_MUTEX(revmap_trees_mutex); | |||
23 | static struct irq_domain *irq_default_domain; | 23 | static struct irq_domain *irq_default_domain; |
24 | 24 | ||
25 | /** | 25 | /** |
26 | * irq_domain_alloc() - Allocate a new irq_domain data structure | 26 | * __irq_domain_add() - Allocate a new irq_domain data structure |
27 | * @of_node: optional device-tree node of the interrupt controller | 27 | * @of_node: optional device-tree node of the interrupt controller |
28 | * @revmap_type: type of reverse mapping to use | 28 | * @size: Size of linear map; 0 for radix mapping only |
29 | * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no | ||
30 | * direct mapping | ||
29 | * @ops: map/unmap domain callbacks | 31 | * @ops: map/unmap domain callbacks |
30 | * @host_data: Controller private data pointer | 32 | * @host_data: Controller private data pointer |
31 | * | 33 | * |
@@ -33,41 +35,35 @@ static struct irq_domain *irq_default_domain; | |||
33 | * register allocated irq_domain with irq_domain_register(). Returns pointer | 35 | * register allocated irq_domain with irq_domain_register(). Returns pointer |
34 | * to IRQ domain, or NULL on failure. | 36 | * to IRQ domain, or NULL on failure. |
35 | */ | 37 | */ |
36 | static struct irq_domain *irq_domain_alloc(struct device_node *of_node, | 38 | struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, |
37 | unsigned int revmap_type, | 39 | irq_hw_number_t hwirq_max, int direct_max, |
38 | const struct irq_domain_ops *ops, | 40 | const struct irq_domain_ops *ops, |
39 | void *host_data) | 41 | void *host_data) |
40 | { | 42 | { |
41 | struct irq_domain *domain; | 43 | struct irq_domain *domain; |
42 | 44 | ||
43 | domain = kzalloc_node(sizeof(*domain), GFP_KERNEL, | 45 | domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), |
44 | of_node_to_nid(of_node)); | 46 | GFP_KERNEL, of_node_to_nid(of_node)); |
45 | if (WARN_ON(!domain)) | 47 | if (WARN_ON(!domain)) |
46 | return NULL; | 48 | return NULL; |
47 | 49 | ||
48 | /* Fill structure */ | 50 | /* Fill structure */ |
49 | domain->revmap_type = revmap_type; | 51 | INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); |
50 | domain->ops = ops; | 52 | domain->ops = ops; |
51 | domain->host_data = host_data; | 53 | domain->host_data = host_data; |
52 | domain->of_node = of_node_get(of_node); | 54 | domain->of_node = of_node_get(of_node); |
55 | domain->hwirq_max = hwirq_max; | ||
56 | domain->revmap_size = size; | ||
57 | domain->revmap_direct_max_irq = direct_max; | ||
53 | 58 | ||
54 | return domain; | ||
55 | } | ||
56 | |||
57 | static void irq_domain_free(struct irq_domain *domain) | ||
58 | { | ||
59 | of_node_put(domain->of_node); | ||
60 | kfree(domain); | ||
61 | } | ||
62 | |||
63 | static void irq_domain_add(struct irq_domain *domain) | ||
64 | { | ||
65 | mutex_lock(&irq_domain_mutex); | 59 | mutex_lock(&irq_domain_mutex); |
66 | list_add(&domain->link, &irq_domain_list); | 60 | list_add(&domain->link, &irq_domain_list); |
67 | mutex_unlock(&irq_domain_mutex); | 61 | mutex_unlock(&irq_domain_mutex); |
68 | pr_debug("Allocated domain of type %d @0x%p\n", | 62 | |
69 | domain->revmap_type, domain); | 63 | pr_debug("Added domain %s\n", domain->name); |
64 | return domain; | ||
70 | } | 65 | } |
66 | EXPORT_SYMBOL_GPL(__irq_domain_add); | ||
71 | 67 | ||
72 | /** | 68 | /** |
73 | * irq_domain_remove() - Remove an irq domain. | 69 | * irq_domain_remove() - Remove an irq domain. |
@@ -81,29 +77,12 @@ void irq_domain_remove(struct irq_domain *domain) | |||
81 | { | 77 | { |
82 | mutex_lock(&irq_domain_mutex); | 78 | mutex_lock(&irq_domain_mutex); |
83 | 79 | ||
84 | switch (domain->revmap_type) { | 80 | /* |
85 | case IRQ_DOMAIN_MAP_LEGACY: | 81 | * radix_tree_delete() takes care of destroying the root |
86 | /* | 82 | * node when all entries are removed. Shout if there are |
87 | * Legacy domains don't manage their own irq_desc | 83 | * any mappings left. |
88 | * allocations, we expect the caller to handle irq_desc | 84 | */ |
89 | * freeing on their own. | 85 | WARN_ON(domain->revmap_tree.height); |
90 | */ | ||
91 | break; | ||
92 | case IRQ_DOMAIN_MAP_TREE: | ||
93 | /* | ||
94 | * radix_tree_delete() takes care of destroying the root | ||
95 | * node when all entries are removed. Shout if there are | ||
96 | * any mappings left. | ||
97 | */ | ||
98 | WARN_ON(domain->revmap_data.tree.height); | ||
99 | break; | ||
100 | case IRQ_DOMAIN_MAP_LINEAR: | ||
101 | kfree(domain->revmap_data.linear.revmap); | ||
102 | domain->revmap_data.linear.size = 0; | ||
103 | break; | ||
104 | case IRQ_DOMAIN_MAP_NOMAP: | ||
105 | break; | ||
106 | } | ||
107 | 86 | ||
108 | list_del(&domain->link); | 87 | list_del(&domain->link); |
109 | 88 | ||
@@ -115,44 +94,30 @@ void irq_domain_remove(struct irq_domain *domain) | |||
115 | 94 | ||
116 | mutex_unlock(&irq_domain_mutex); | 95 | mutex_unlock(&irq_domain_mutex); |
117 | 96 | ||
118 | pr_debug("Removed domain of type %d @0x%p\n", | 97 | pr_debug("Removed domain %s\n", domain->name); |
119 | domain->revmap_type, domain); | ||
120 | 98 | ||
121 | irq_domain_free(domain); | 99 | of_node_put(domain->of_node); |
100 | kfree(domain); | ||
122 | } | 101 | } |
123 | EXPORT_SYMBOL_GPL(irq_domain_remove); | 102 | EXPORT_SYMBOL_GPL(irq_domain_remove); |
124 | 103 | ||
125 | static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | ||
126 | irq_hw_number_t hwirq) | ||
127 | { | ||
128 | irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; | ||
129 | int size = domain->revmap_data.legacy.size; | ||
130 | |||
131 | if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) | ||
132 | return 0; | ||
133 | return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; | ||
134 | } | ||
135 | |||
136 | /** | 104 | /** |
137 | * irq_domain_add_simple() - Allocate and register a simple irq_domain. | 105 | * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs |
138 | * @of_node: pointer to interrupt controller's device tree node. | 106 | * @of_node: pointer to interrupt controller's device tree node. |
139 | * @size: total number of irqs in mapping | 107 | * @size: total number of irqs in mapping |
140 | * @first_irq: first number of irq block assigned to the domain, | 108 | * @first_irq: first number of irq block assigned to the domain, |
141 | * pass zero to assign irqs on-the-fly. This will result in a | 109 | * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then |
142 | * linear IRQ domain so it is important to use irq_create_mapping() | 110 | * pre-map all of the irqs in the domain to virqs starting at first_irq. |
143 | * for each used IRQ, especially when SPARSE_IRQ is enabled. | ||
144 | * @ops: map/unmap domain callbacks | 111 | * @ops: map/unmap domain callbacks |
145 | * @host_data: Controller private data pointer | 112 | * @host_data: Controller private data pointer |
146 | * | 113 | * |
147 | * Allocates a legacy irq_domain if irq_base is positive or a linear | 114 | * Allocates an irq_domain, and optionally if first_irq is positive then also |
148 | * domain otherwise. For the legacy domain, IRQ descriptors will also | 115 | * allocate irq_descs and map all of the hwirqs to virqs starting at first_irq. |
149 | * be allocated. | ||
150 | * | 116 | * |
151 | * This is intended to implement the expected behaviour for most | 117 | * This is intended to implement the expected behaviour for most |
152 | * interrupt controllers which is that a linear mapping should | 118 | * interrupt controllers. If device tree is used, then first_irq will be 0 and |
153 | * normally be used unless the system requires a legacy mapping in | 119 | * irqs get mapped dynamically on the fly. However, if the controller requires |
154 | * order to support supplying interrupt numbers during non-DT | 120 | * static virq assignments (non-DT boot) then it will set that up correctly. |
155 | * registration of devices. | ||
156 | */ | 121 | */ |
157 | struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | 122 | struct irq_domain *irq_domain_add_simple(struct device_node *of_node, |
158 | unsigned int size, | 123 | unsigned int size, |
@@ -160,33 +125,25 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | |||
160 | const struct irq_domain_ops *ops, | 125 | const struct irq_domain_ops *ops, |
161 | void *host_data) | 126 | void *host_data) |
162 | { | 127 | { |
163 | if (first_irq > 0) { | 128 | struct irq_domain *domain; |
164 | int irq_base; | 129 | |
130 | domain = __irq_domain_add(of_node, size, size, 0, ops, host_data); | ||
131 | if (!domain) | ||
132 | return NULL; | ||
165 | 133 | ||
134 | if (first_irq > 0) { | ||
166 | if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { | 135 | if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { |
167 | /* | 136 | /* attempt to allocated irq_descs */ |
168 | * Set the descriptor allocator to search for a | 137 | int rc = irq_alloc_descs(first_irq, first_irq, size, |
169 | * 1-to-1 mapping, such as irq_alloc_desc_at(). | 138 | of_node_to_nid(of_node)); |
170 | * Use of_node_to_nid() which is defined to | 139 | if (rc < 0) |
171 | * numa_node_id() on platforms that have no custom | ||
172 | * implementation. | ||
173 | */ | ||
174 | irq_base = irq_alloc_descs(first_irq, first_irq, size, | ||
175 | of_node_to_nid(of_node)); | ||
176 | if (irq_base < 0) { | ||
177 | pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", | 140 | pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", |
178 | first_irq); | 141 | first_irq); |
179 | irq_base = first_irq; | 142 | } |
180 | } | 143 | irq_domain_associate_many(domain, first_irq, 0, size); |
181 | } else | ||
182 | irq_base = first_irq; | ||
183 | |||
184 | return irq_domain_add_legacy(of_node, size, irq_base, 0, | ||
185 | ops, host_data); | ||
186 | } | 144 | } |
187 | 145 | ||
188 | /* A linear domain is the default */ | 146 | return domain; |
189 | return irq_domain_add_linear(of_node, size, ops, host_data); | ||
190 | } | 147 | } |
191 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | 148 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); |
192 | 149 | ||
@@ -213,131 +170,19 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, | |||
213 | void *host_data) | 170 | void *host_data) |
214 | { | 171 | { |
215 | struct irq_domain *domain; | 172 | struct irq_domain *domain; |
216 | unsigned int i; | ||
217 | 173 | ||
218 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); | 174 | domain = __irq_domain_add(of_node, first_hwirq + size, |
175 | first_hwirq + size, 0, ops, host_data); | ||
219 | if (!domain) | 176 | if (!domain) |
220 | return NULL; | 177 | return NULL; |
221 | 178 | ||
222 | domain->revmap_data.legacy.first_irq = first_irq; | 179 | irq_domain_associate_many(domain, first_irq, first_hwirq, size); |
223 | domain->revmap_data.legacy.first_hwirq = first_hwirq; | ||
224 | domain->revmap_data.legacy.size = size; | ||
225 | |||
226 | mutex_lock(&irq_domain_mutex); | ||
227 | /* Verify that all the irqs are available */ | ||
228 | for (i = 0; i < size; i++) { | ||
229 | int irq = first_irq + i; | ||
230 | struct irq_data *irq_data = irq_get_irq_data(irq); | ||
231 | |||
232 | if (WARN_ON(!irq_data || irq_data->domain)) { | ||
233 | mutex_unlock(&irq_domain_mutex); | ||
234 | irq_domain_free(domain); | ||
235 | return NULL; | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /* Claim all of the irqs before registering a legacy domain */ | ||
240 | for (i = 0; i < size; i++) { | ||
241 | struct irq_data *irq_data = irq_get_irq_data(first_irq + i); | ||
242 | irq_data->hwirq = first_hwirq + i; | ||
243 | irq_data->domain = domain; | ||
244 | } | ||
245 | mutex_unlock(&irq_domain_mutex); | ||
246 | |||
247 | for (i = 0; i < size; i++) { | ||
248 | int irq = first_irq + i; | ||
249 | int hwirq = first_hwirq + i; | ||
250 | |||
251 | /* IRQ0 gets ignored */ | ||
252 | if (!irq) | ||
253 | continue; | ||
254 | |||
255 | /* Legacy flags are left to default at this point, | ||
256 | * one can then use irq_create_mapping() to | ||
257 | * explicitly change them | ||
258 | */ | ||
259 | if (ops->map) | ||
260 | ops->map(domain, irq, hwirq); | ||
261 | |||
262 | /* Clear norequest flags */ | ||
263 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | ||
264 | } | ||
265 | 180 | ||
266 | irq_domain_add(domain); | ||
267 | return domain; | 181 | return domain; |
268 | } | 182 | } |
269 | EXPORT_SYMBOL_GPL(irq_domain_add_legacy); | 183 | EXPORT_SYMBOL_GPL(irq_domain_add_legacy); |
270 | 184 | ||
271 | /** | 185 | /** |
272 | * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. | ||
273 | * @of_node: pointer to interrupt controller's device tree node. | ||
274 | * @size: Number of interrupts in the domain. | ||
275 | * @ops: map/unmap domain callbacks | ||
276 | * @host_data: Controller private data pointer | ||
277 | */ | ||
278 | struct irq_domain *irq_domain_add_linear(struct device_node *of_node, | ||
279 | unsigned int size, | ||
280 | const struct irq_domain_ops *ops, | ||
281 | void *host_data) | ||
282 | { | ||
283 | struct irq_domain *domain; | ||
284 | unsigned int *revmap; | ||
285 | |||
286 | revmap = kzalloc_node(sizeof(*revmap) * size, GFP_KERNEL, | ||
287 | of_node_to_nid(of_node)); | ||
288 | if (WARN_ON(!revmap)) | ||
289 | return NULL; | ||
290 | |||
291 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data); | ||
292 | if (!domain) { | ||
293 | kfree(revmap); | ||
294 | return NULL; | ||
295 | } | ||
296 | domain->revmap_data.linear.size = size; | ||
297 | domain->revmap_data.linear.revmap = revmap; | ||
298 | irq_domain_add(domain); | ||
299 | return domain; | ||
300 | } | ||
301 | EXPORT_SYMBOL_GPL(irq_domain_add_linear); | ||
302 | |||
303 | struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, | ||
304 | unsigned int max_irq, | ||
305 | const struct irq_domain_ops *ops, | ||
306 | void *host_data) | ||
307 | { | ||
308 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
309 | IRQ_DOMAIN_MAP_NOMAP, ops, host_data); | ||
310 | if (domain) { | ||
311 | domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; | ||
312 | irq_domain_add(domain); | ||
313 | } | ||
314 | return domain; | ||
315 | } | ||
316 | EXPORT_SYMBOL_GPL(irq_domain_add_nomap); | ||
317 | |||
318 | /** | ||
319 | * irq_domain_add_tree() | ||
320 | * @of_node: pointer to interrupt controller's device tree node. | ||
321 | * @ops: map/unmap domain callbacks | ||
322 | * | ||
323 | * Note: The radix tree will be allocated later during boot automatically | ||
324 | * (the reverse mapping will use the slow path until that happens). | ||
325 | */ | ||
326 | struct irq_domain *irq_domain_add_tree(struct device_node *of_node, | ||
327 | const struct irq_domain_ops *ops, | ||
328 | void *host_data) | ||
329 | { | ||
330 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
331 | IRQ_DOMAIN_MAP_TREE, ops, host_data); | ||
332 | if (domain) { | ||
333 | INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL); | ||
334 | irq_domain_add(domain); | ||
335 | } | ||
336 | return domain; | ||
337 | } | ||
338 | EXPORT_SYMBOL_GPL(irq_domain_add_tree); | ||
339 | |||
340 | /** | ||
341 | * irq_find_host() - Locates a domain for a given device node | 186 | * irq_find_host() - Locates a domain for a given device node |
342 | * @node: device-tree node of the interrupt controller | 187 | * @node: device-tree node of the interrupt controller |
343 | */ | 188 | */ |
@@ -385,125 +230,108 @@ void irq_set_default_host(struct irq_domain *domain) | |||
385 | } | 230 | } |
386 | EXPORT_SYMBOL_GPL(irq_set_default_host); | 231 | EXPORT_SYMBOL_GPL(irq_set_default_host); |
387 | 232 | ||
388 | static void irq_domain_disassociate_many(struct irq_domain *domain, | 233 | static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) |
389 | unsigned int irq_base, int count) | ||
390 | { | 234 | { |
391 | /* | 235 | struct irq_data *irq_data = irq_get_irq_data(irq); |
392 | * disassociate in reverse order; | 236 | irq_hw_number_t hwirq; |
393 | * not strictly necessary, but nice for unwinding | ||
394 | */ | ||
395 | while (count--) { | ||
396 | int irq = irq_base + count; | ||
397 | struct irq_data *irq_data = irq_get_irq_data(irq); | ||
398 | irq_hw_number_t hwirq; | ||
399 | 237 | ||
400 | if (WARN_ON(!irq_data || irq_data->domain != domain)) | 238 | if (WARN(!irq_data || irq_data->domain != domain, |
401 | continue; | 239 | "virq%i doesn't exist; cannot disassociate\n", irq)) |
240 | return; | ||
402 | 241 | ||
403 | hwirq = irq_data->hwirq; | 242 | hwirq = irq_data->hwirq; |
404 | irq_set_status_flags(irq, IRQ_NOREQUEST); | 243 | irq_set_status_flags(irq, IRQ_NOREQUEST); |
405 | 244 | ||
406 | /* remove chip and handler */ | 245 | /* remove chip and handler */ |
407 | irq_set_chip_and_handler(irq, NULL, NULL); | 246 | irq_set_chip_and_handler(irq, NULL, NULL); |
408 | 247 | ||
409 | /* Make sure it's completed */ | 248 | /* Make sure it's completed */ |
410 | synchronize_irq(irq); | 249 | synchronize_irq(irq); |
411 | 250 | ||
412 | /* Tell the PIC about it */ | 251 | /* Tell the PIC about it */ |
413 | if (domain->ops->unmap) | 252 | if (domain->ops->unmap) |
414 | domain->ops->unmap(domain, irq); | 253 | domain->ops->unmap(domain, irq); |
415 | smp_mb(); | 254 | smp_mb(); |
416 | 255 | ||
417 | irq_data->domain = NULL; | 256 | irq_data->domain = NULL; |
418 | irq_data->hwirq = 0; | 257 | irq_data->hwirq = 0; |
419 | 258 | ||
420 | /* Clear reverse map */ | 259 | /* Clear reverse map for this hwirq */ |
421 | switch(domain->revmap_type) { | 260 | if (hwirq < domain->revmap_size) { |
422 | case IRQ_DOMAIN_MAP_LINEAR: | 261 | domain->linear_revmap[hwirq] = 0; |
423 | if (hwirq < domain->revmap_data.linear.size) | 262 | } else { |
424 | domain->revmap_data.linear.revmap[hwirq] = 0; | 263 | mutex_lock(&revmap_trees_mutex); |
425 | break; | 264 | radix_tree_delete(&domain->revmap_tree, hwirq); |
426 | case IRQ_DOMAIN_MAP_TREE: | 265 | mutex_unlock(&revmap_trees_mutex); |
427 | mutex_lock(&revmap_trees_mutex); | ||
428 | radix_tree_delete(&domain->revmap_data.tree, hwirq); | ||
429 | mutex_unlock(&revmap_trees_mutex); | ||
430 | break; | ||
431 | } | ||
432 | } | 266 | } |
433 | } | 267 | } |
434 | 268 | ||
435 | int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, | 269 | int irq_domain_associate(struct irq_domain *domain, unsigned int virq, |
436 | irq_hw_number_t hwirq_base, int count) | 270 | irq_hw_number_t hwirq) |
437 | { | 271 | { |
438 | unsigned int virq = irq_base; | 272 | struct irq_data *irq_data = irq_get_irq_data(virq); |
439 | irq_hw_number_t hwirq = hwirq_base; | 273 | int ret; |
440 | int i, ret; | ||
441 | 274 | ||
442 | pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, | 275 | if (WARN(hwirq >= domain->hwirq_max, |
443 | of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); | 276 | "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name)) |
277 | return -EINVAL; | ||
278 | if (WARN(!irq_data, "error: virq%i is not allocated", virq)) | ||
279 | return -EINVAL; | ||
280 | if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) | ||
281 | return -EINVAL; | ||
444 | 282 | ||
445 | for (i = 0; i < count; i++) { | 283 | mutex_lock(&irq_domain_mutex); |
446 | struct irq_data *irq_data = irq_get_irq_data(virq + i); | 284 | irq_data->hwirq = hwirq; |
447 | 285 | irq_data->domain = domain; | |
448 | if (WARN(!irq_data, "error: irq_desc not allocated; " | 286 | if (domain->ops->map) { |
449 | "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) | 287 | ret = domain->ops->map(domain, virq, hwirq); |
450 | return -EINVAL; | 288 | if (ret != 0) { |
451 | if (WARN(irq_data->domain, "error: irq_desc already associated; " | 289 | /* |
452 | "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) | 290 | * If map() returns -EPERM, this interrupt is protected |
453 | return -EINVAL; | 291 | * by the firmware or some other service and shall not |
454 | }; | 292 | * be mapped. Don't bother telling the user about it. |
455 | 293 | */ | |
456 | for (i = 0; i < count; i++, virq++, hwirq++) { | 294 | if (ret != -EPERM) { |
457 | struct irq_data *irq_data = irq_get_irq_data(virq); | 295 | pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", |
458 | 296 | domain->name, hwirq, virq, ret); | |
459 | irq_data->hwirq = hwirq; | ||
460 | irq_data->domain = domain; | ||
461 | if (domain->ops->map) { | ||
462 | ret = domain->ops->map(domain, virq, hwirq); | ||
463 | if (ret != 0) { | ||
464 | /* | ||
465 | * If map() returns -EPERM, this interrupt is protected | ||
466 | * by the firmware or some other service and shall not | ||
467 | * be mapped. | ||
468 | * | ||
469 | * Since on some platforms we blindly try to map everything | ||
470 | * we end up with a log full of backtraces. | ||
471 | * | ||
472 | * So instead, we silently fail on -EPERM, it is the | ||
473 | * responsibility of the PIC driver to display a relevant | ||
474 | * message if needed. | ||
475 | */ | ||
476 | if (ret != -EPERM) { | ||
477 | pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n", | ||
478 | virq, hwirq, ret); | ||
479 | WARN_ON(1); | ||
480 | } | ||
481 | irq_data->domain = NULL; | ||
482 | irq_data->hwirq = 0; | ||
483 | goto err_unmap; | ||
484 | } | 297 | } |
298 | irq_data->domain = NULL; | ||
299 | irq_data->hwirq = 0; | ||
300 | mutex_unlock(&irq_domain_mutex); | ||
301 | return ret; | ||
485 | } | 302 | } |
486 | 303 | ||
487 | switch (domain->revmap_type) { | 304 | /* If not already assigned, give the domain the chip's name */ |
488 | case IRQ_DOMAIN_MAP_LINEAR: | 305 | if (!domain->name && irq_data->chip) |
489 | if (hwirq < domain->revmap_data.linear.size) | 306 | domain->name = irq_data->chip->name; |
490 | domain->revmap_data.linear.revmap[hwirq] = virq; | 307 | } |
491 | break; | ||
492 | case IRQ_DOMAIN_MAP_TREE: | ||
493 | mutex_lock(&revmap_trees_mutex); | ||
494 | radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); | ||
495 | mutex_unlock(&revmap_trees_mutex); | ||
496 | break; | ||
497 | } | ||
498 | 308 | ||
499 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | 309 | if (hwirq < domain->revmap_size) { |
310 | domain->linear_revmap[hwirq] = virq; | ||
311 | } else { | ||
312 | mutex_lock(&revmap_trees_mutex); | ||
313 | radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); | ||
314 | mutex_unlock(&revmap_trees_mutex); | ||
500 | } | 315 | } |
316 | mutex_unlock(&irq_domain_mutex); | ||
317 | |||
318 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | ||
501 | 319 | ||
502 | return 0; | 320 | return 0; |
321 | } | ||
322 | EXPORT_SYMBOL_GPL(irq_domain_associate); | ||
503 | 323 | ||
504 | err_unmap: | 324 | void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, |
505 | irq_domain_disassociate_many(domain, irq_base, i); | 325 | irq_hw_number_t hwirq_base, int count) |
506 | return -EINVAL; | 326 | { |
327 | int i; | ||
328 | |||
329 | pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, | ||
330 | of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); | ||
331 | |||
332 | for (i = 0; i < count; i++) { | ||
333 | irq_domain_associate(domain, irq_base + i, hwirq_base + i); | ||
334 | } | ||
507 | } | 335 | } |
508 | EXPORT_SYMBOL_GPL(irq_domain_associate_many); | 336 | EXPORT_SYMBOL_GPL(irq_domain_associate_many); |
509 | 337 | ||
@@ -513,7 +341,9 @@ EXPORT_SYMBOL_GPL(irq_domain_associate_many); | |||
513 | * | 341 | * |
514 | * This routine is used for irq controllers which can choose the hardware | 342 | * This routine is used for irq controllers which can choose the hardware |
515 | * interrupt numbers they generate. In such a case it's simplest to use | 343 | * interrupt numbers they generate. In such a case it's simplest to use |
516 | * the linux irq as the hardware interrupt number. | 344 | * the linux irq as the hardware interrupt number. It still uses the linear |
345 | * or radix tree to store the mapping, but the irq controller can optimize | ||
346 | * the revmap path by using the hwirq directly. | ||
517 | */ | 347 | */ |
518 | unsigned int irq_create_direct_mapping(struct irq_domain *domain) | 348 | unsigned int irq_create_direct_mapping(struct irq_domain *domain) |
519 | { | 349 | { |
@@ -522,17 +352,14 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) | |||
522 | if (domain == NULL) | 352 | if (domain == NULL) |
523 | domain = irq_default_domain; | 353 | domain = irq_default_domain; |
524 | 354 | ||
525 | if (WARN_ON(!domain || domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP)) | ||
526 | return 0; | ||
527 | |||
528 | virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); | 355 | virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); |
529 | if (!virq) { | 356 | if (!virq) { |
530 | pr_debug("create_direct virq allocation failed\n"); | 357 | pr_debug("create_direct virq allocation failed\n"); |
531 | return 0; | 358 | return 0; |
532 | } | 359 | } |
533 | if (virq >= domain->revmap_data.nomap.max_irq) { | 360 | if (virq >= domain->revmap_direct_max_irq) { |
534 | pr_err("ERROR: no free irqs available below %i maximum\n", | 361 | pr_err("ERROR: no free irqs available below %i maximum\n", |
535 | domain->revmap_data.nomap.max_irq); | 362 | domain->revmap_direct_max_irq); |
536 | irq_free_desc(virq); | 363 | irq_free_desc(virq); |
537 | return 0; | 364 | return 0; |
538 | } | 365 | } |
@@ -569,9 +396,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, | |||
569 | if (domain == NULL) | 396 | if (domain == NULL) |
570 | domain = irq_default_domain; | 397 | domain = irq_default_domain; |
571 | if (domain == NULL) { | 398 | if (domain == NULL) { |
572 | pr_warning("irq_create_mapping called for" | 399 | WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); |
573 | " NULL domain, hwirq=%lx\n", hwirq); | ||
574 | WARN_ON(1); | ||
575 | return 0; | 400 | return 0; |
576 | } | 401 | } |
577 | pr_debug("-> using domain @%p\n", domain); | 402 | pr_debug("-> using domain @%p\n", domain); |
@@ -583,10 +408,6 @@ unsigned int irq_create_mapping(struct irq_domain *domain, | |||
583 | return virq; | 408 | return virq; |
584 | } | 409 | } |
585 | 410 | ||
586 | /* Get a virtual interrupt number */ | ||
587 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
588 | return irq_domain_legacy_revmap(domain, hwirq); | ||
589 | |||
590 | /* Allocate a virtual interrupt number */ | 411 | /* Allocate a virtual interrupt number */ |
591 | hint = hwirq % nr_irqs; | 412 | hint = hwirq % nr_irqs; |
592 | if (hint == 0) | 413 | if (hint == 0) |
@@ -639,12 +460,7 @@ int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, | |||
639 | if (unlikely(ret < 0)) | 460 | if (unlikely(ret < 0)) |
640 | return ret; | 461 | return ret; |
641 | 462 | ||
642 | ret = irq_domain_associate_many(domain, irq_base, hwirq_base, count); | 463 | irq_domain_associate_many(domain, irq_base, hwirq_base, count); |
643 | if (unlikely(ret < 0)) { | ||
644 | irq_free_descs(irq_base, count); | ||
645 | return ret; | ||
646 | } | ||
647 | |||
648 | return 0; | 464 | return 0; |
649 | } | 465 | } |
650 | EXPORT_SYMBOL_GPL(irq_create_strict_mappings); | 466 | EXPORT_SYMBOL_GPL(irq_create_strict_mappings); |
@@ -659,20 +475,8 @@ unsigned int irq_create_of_mapping(struct device_node *controller, | |||
659 | 475 | ||
660 | domain = controller ? irq_find_host(controller) : irq_default_domain; | 476 | domain = controller ? irq_find_host(controller) : irq_default_domain; |
661 | if (!domain) { | 477 | if (!domain) { |
662 | #ifdef CONFIG_MIPS | 478 | pr_warn("no irq domain found for %s !\n", |
663 | /* | 479 | of_node_full_name(controller)); |
664 | * Workaround to avoid breaking interrupt controller drivers | ||
665 | * that don't yet register an irq_domain. This is temporary | ||
666 | * code. ~~~gcl, Feb 24, 2012 | ||
667 | * | ||
668 | * Scheduled for removal in Linux v3.6. That should be enough | ||
669 | * time. | ||
670 | */ | ||
671 | if (intsize > 0) | ||
672 | return intspec[0]; | ||
673 | #endif | ||
674 | pr_warning("no irq domain found for %s !\n", | ||
675 | of_node_full_name(controller)); | ||
676 | return 0; | 480 | return 0; |
677 | } | 481 | } |
678 | 482 | ||
@@ -714,11 +518,7 @@ void irq_dispose_mapping(unsigned int virq) | |||
714 | if (WARN_ON(domain == NULL)) | 518 | if (WARN_ON(domain == NULL)) |
715 | return; | 519 | return; |
716 | 520 | ||
717 | /* Never unmap legacy interrupts */ | 521 | irq_domain_disassociate(domain, virq); |
718 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
719 | return; | ||
720 | |||
721 | irq_domain_disassociate_many(domain, virq, 1); | ||
722 | irq_free_desc(virq); | 522 | irq_free_desc(virq); |
723 | } | 523 | } |
724 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | 524 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); |
@@ -739,63 +539,51 @@ unsigned int irq_find_mapping(struct irq_domain *domain, | |||
739 | if (domain == NULL) | 539 | if (domain == NULL) |
740 | return 0; | 540 | return 0; |
741 | 541 | ||
742 | switch (domain->revmap_type) { | 542 | if (hwirq < domain->revmap_direct_max_irq) { |
743 | case IRQ_DOMAIN_MAP_LEGACY: | ||
744 | return irq_domain_legacy_revmap(domain, hwirq); | ||
745 | case IRQ_DOMAIN_MAP_LINEAR: | ||
746 | return irq_linear_revmap(domain, hwirq); | ||
747 | case IRQ_DOMAIN_MAP_TREE: | ||
748 | rcu_read_lock(); | ||
749 | data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); | ||
750 | rcu_read_unlock(); | ||
751 | if (data) | ||
752 | return data->irq; | ||
753 | break; | ||
754 | case IRQ_DOMAIN_MAP_NOMAP: | ||
755 | data = irq_get_irq_data(hwirq); | 543 | data = irq_get_irq_data(hwirq); |
756 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) | 544 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) |
757 | return hwirq; | 545 | return hwirq; |
758 | break; | ||
759 | } | 546 | } |
760 | 547 | ||
761 | return 0; | 548 | /* Check if the hwirq is in the linear revmap. */ |
762 | } | 549 | if (hwirq < domain->revmap_size) |
763 | EXPORT_SYMBOL_GPL(irq_find_mapping); | 550 | return domain->linear_revmap[hwirq]; |
764 | 551 | ||
765 | /** | 552 | rcu_read_lock(); |
766 | * irq_linear_revmap() - Find a linux irq from a hw irq number. | 553 | data = radix_tree_lookup(&domain->revmap_tree, hwirq); |
767 | * @domain: domain owning this hardware interrupt | 554 | rcu_read_unlock(); |
768 | * @hwirq: hardware irq number in that domain space | 555 | return data ? data->irq : 0; |
769 | * | ||
770 | * This is a fast path that can be called directly by irq controller code to | ||
771 | * save a handful of instructions. | ||
772 | */ | ||
773 | unsigned int irq_linear_revmap(struct irq_domain *domain, | ||
774 | irq_hw_number_t hwirq) | ||
775 | { | ||
776 | BUG_ON(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR); | ||
777 | |||
778 | /* Check revmap bounds; complain if exceeded */ | ||
779 | if (WARN_ON(hwirq >= domain->revmap_data.linear.size)) | ||
780 | return 0; | ||
781 | |||
782 | return domain->revmap_data.linear.revmap[hwirq]; | ||
783 | } | 556 | } |
784 | EXPORT_SYMBOL_GPL(irq_linear_revmap); | 557 | EXPORT_SYMBOL_GPL(irq_find_mapping); |
785 | 558 | ||
786 | #ifdef CONFIG_IRQ_DOMAIN_DEBUG | 559 | #ifdef CONFIG_IRQ_DOMAIN_DEBUG |
787 | static int virq_debug_show(struct seq_file *m, void *private) | 560 | static int virq_debug_show(struct seq_file *m, void *private) |
788 | { | 561 | { |
789 | unsigned long flags; | 562 | unsigned long flags; |
790 | struct irq_desc *desc; | 563 | struct irq_desc *desc; |
791 | const char *p; | 564 | struct irq_domain *domain; |
792 | static const char none[] = "none"; | 565 | struct radix_tree_iter iter; |
793 | void *data; | 566 | void *data, **slot; |
794 | int i; | 567 | int i; |
795 | 568 | ||
796 | seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", | 569 | seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", |
570 | "name", "mapped", "linear-max", "direct-max", "devtree-node"); | ||
571 | mutex_lock(&irq_domain_mutex); | ||
572 | list_for_each_entry(domain, &irq_domain_list, link) { | ||
573 | int count = 0; | ||
574 | radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) | ||
575 | count++; | ||
576 | seq_printf(m, "%c%-16s %6u %10u %10u %s\n", | ||
577 | domain == irq_default_domain ? '*' : ' ', domain->name, | ||
578 | domain->revmap_size + count, domain->revmap_size, | ||
579 | domain->revmap_direct_max_irq, | ||
580 | domain->of_node ? of_node_full_name(domain->of_node) : ""); | ||
581 | } | ||
582 | mutex_unlock(&irq_domain_mutex); | ||
583 | |||
584 | seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq", | ||
797 | "chip name", (int)(2 * sizeof(void *) + 2), "chip data", | 585 | "chip name", (int)(2 * sizeof(void *) + 2), "chip data", |
798 | "domain name"); | 586 | "active", "type", "domain"); |
799 | 587 | ||
800 | for (i = 1; i < nr_irqs; i++) { | 588 | for (i = 1; i < nr_irqs; i++) { |
801 | desc = irq_to_desc(i); | 589 | desc = irq_to_desc(i); |
@@ -803,28 +591,28 @@ static int virq_debug_show(struct seq_file *m, void *private) | |||
803 | continue; | 591 | continue; |
804 | 592 | ||
805 | raw_spin_lock_irqsave(&desc->lock, flags); | 593 | raw_spin_lock_irqsave(&desc->lock, flags); |
594 | domain = desc->irq_data.domain; | ||
806 | 595 | ||
807 | if (desc->action && desc->action->handler) { | 596 | if (domain) { |
808 | struct irq_chip *chip; | 597 | struct irq_chip *chip; |
598 | int hwirq = desc->irq_data.hwirq; | ||
599 | bool direct; | ||
809 | 600 | ||
810 | seq_printf(m, "%5d ", i); | 601 | seq_printf(m, "%5d ", i); |
811 | seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); | 602 | seq_printf(m, "0x%05x ", hwirq); |
812 | 603 | ||
813 | chip = irq_desc_get_chip(desc); | 604 | chip = irq_desc_get_chip(desc); |
814 | if (chip && chip->name) | 605 | seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); |
815 | p = chip->name; | ||
816 | else | ||
817 | p = none; | ||
818 | seq_printf(m, "%-15s ", p); | ||
819 | 606 | ||
820 | data = irq_desc_get_chip_data(desc); | 607 | data = irq_desc_get_chip_data(desc); |
821 | seq_printf(m, data ? "0x%p " : " %p ", data); | 608 | seq_printf(m, data ? "0x%p " : " %p ", data); |
822 | 609 | ||
823 | if (desc->irq_data.domain) | 610 | seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); |
824 | p = of_node_full_name(desc->irq_data.domain->of_node); | 611 | direct = (i == hwirq) && (i < domain->revmap_direct_max_irq); |
825 | else | 612 | seq_printf(m, "%6s%-8s ", |
826 | p = none; | 613 | (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", |
827 | seq_printf(m, "%s\n", p); | 614 | direct ? "(DIRECT)" : ""); |
615 | seq_printf(m, "%s\n", desc->irq_data.domain->name); | ||
828 | } | 616 | } |
829 | 617 | ||
830 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 618 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
@@ -921,18 +709,3 @@ const struct irq_domain_ops irq_domain_simple_ops = { | |||
921 | .xlate = irq_domain_xlate_onetwocell, | 709 | .xlate = irq_domain_xlate_onetwocell, |
922 | }; | 710 | }; |
923 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | 711 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); |
924 | |||
925 | #ifdef CONFIG_OF_IRQ | ||
926 | void irq_domain_generate_simple(const struct of_device_id *match, | ||
927 | u64 phys_base, unsigned int irq_start) | ||
928 | { | ||
929 | struct device_node *node; | ||
930 | pr_debug("looking for phys_base=%llx, irq_start=%i\n", | ||
931 | (unsigned long long) phys_base, (int) irq_start); | ||
932 | node = of_find_matching_node_by_address(NULL, match, phys_base); | ||
933 | if (node) | ||
934 | irq_domain_add_legacy(node, 32, irq_start, 0, | ||
935 | &irq_domain_simple_ops, NULL); | ||
936 | } | ||
937 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); | ||
938 | #endif | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 19ed5c425c3b..36f6ee181b0c 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -462,6 +462,8 @@ int show_interrupts(struct seq_file *p, void *v) | |||
462 | } else { | 462 | } else { |
463 | seq_printf(p, " %8s", "None"); | 463 | seq_printf(p, " %8s", "None"); |
464 | } | 464 | } |
465 | if (desc->irq_data.domain) | ||
466 | seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq); | ||
465 | #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL | 467 | #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL |
466 | seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); | 468 | seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); |
467 | #endif | 469 | #endif |
diff --git a/kernel/module.c b/kernel/module.c index cab4bce49c23..206915830d29 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -455,7 +455,7 @@ const struct kernel_symbol *find_symbol(const char *name, | |||
455 | EXPORT_SYMBOL_GPL(find_symbol); | 455 | EXPORT_SYMBOL_GPL(find_symbol); |
456 | 456 | ||
457 | /* Search for module by name: must hold module_mutex. */ | 457 | /* Search for module by name: must hold module_mutex. */ |
458 | static struct module *find_module_all(const char *name, | 458 | static struct module *find_module_all(const char *name, size_t len, |
459 | bool even_unformed) | 459 | bool even_unformed) |
460 | { | 460 | { |
461 | struct module *mod; | 461 | struct module *mod; |
@@ -463,7 +463,7 @@ static struct module *find_module_all(const char *name, | |||
463 | list_for_each_entry(mod, &modules, list) { | 463 | list_for_each_entry(mod, &modules, list) { |
464 | if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) | 464 | if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) |
465 | continue; | 465 | continue; |
466 | if (strcmp(mod->name, name) == 0) | 466 | if (strlen(mod->name) == len && !memcmp(mod->name, name, len)) |
467 | return mod; | 467 | return mod; |
468 | } | 468 | } |
469 | return NULL; | 469 | return NULL; |
@@ -471,7 +471,7 @@ static struct module *find_module_all(const char *name, | |||
471 | 471 | ||
472 | struct module *find_module(const char *name) | 472 | struct module *find_module(const char *name) |
473 | { | 473 | { |
474 | return find_module_all(name, false); | 474 | return find_module_all(name, strlen(name), false); |
475 | } | 475 | } |
476 | EXPORT_SYMBOL_GPL(find_module); | 476 | EXPORT_SYMBOL_GPL(find_module); |
477 | 477 | ||
@@ -482,23 +482,28 @@ static inline void __percpu *mod_percpu(struct module *mod) | |||
482 | return mod->percpu; | 482 | return mod->percpu; |
483 | } | 483 | } |
484 | 484 | ||
485 | static int percpu_modalloc(struct module *mod, | 485 | static int percpu_modalloc(struct module *mod, struct load_info *info) |
486 | unsigned long size, unsigned long align) | ||
487 | { | 486 | { |
487 | Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu]; | ||
488 | unsigned long align = pcpusec->sh_addralign; | ||
489 | |||
490 | if (!pcpusec->sh_size) | ||
491 | return 0; | ||
492 | |||
488 | if (align > PAGE_SIZE) { | 493 | if (align > PAGE_SIZE) { |
489 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 494 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
490 | mod->name, align, PAGE_SIZE); | 495 | mod->name, align, PAGE_SIZE); |
491 | align = PAGE_SIZE; | 496 | align = PAGE_SIZE; |
492 | } | 497 | } |
493 | 498 | ||
494 | mod->percpu = __alloc_reserved_percpu(size, align); | 499 | mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align); |
495 | if (!mod->percpu) { | 500 | if (!mod->percpu) { |
496 | printk(KERN_WARNING | 501 | printk(KERN_WARNING |
497 | "%s: Could not allocate %lu bytes percpu data\n", | 502 | "%s: Could not allocate %lu bytes percpu data\n", |
498 | mod->name, size); | 503 | mod->name, (unsigned long)pcpusec->sh_size); |
499 | return -ENOMEM; | 504 | return -ENOMEM; |
500 | } | 505 | } |
501 | mod->percpu_size = size; | 506 | mod->percpu_size = pcpusec->sh_size; |
502 | return 0; | 507 | return 0; |
503 | } | 508 | } |
504 | 509 | ||
@@ -563,10 +568,12 @@ static inline void __percpu *mod_percpu(struct module *mod) | |||
563 | { | 568 | { |
564 | return NULL; | 569 | return NULL; |
565 | } | 570 | } |
566 | static inline int percpu_modalloc(struct module *mod, | 571 | static int percpu_modalloc(struct module *mod, struct load_info *info) |
567 | unsigned long size, unsigned long align) | ||
568 | { | 572 | { |
569 | return -ENOMEM; | 573 | /* UP modules shouldn't have this section: ENOMEM isn't quite right */ |
574 | if (info->sechdrs[info->index.pcpu].sh_size != 0) | ||
575 | return -ENOMEM; | ||
576 | return 0; | ||
570 | } | 577 | } |
571 | static inline void percpu_modfree(struct module *mod) | 578 | static inline void percpu_modfree(struct module *mod) |
572 | { | 579 | { |
@@ -2927,7 +2934,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
2927 | { | 2934 | { |
2928 | /* Module within temporary copy. */ | 2935 | /* Module within temporary copy. */ |
2929 | struct module *mod; | 2936 | struct module *mod; |
2930 | Elf_Shdr *pcpusec; | ||
2931 | int err; | 2937 | int err; |
2932 | 2938 | ||
2933 | mod = setup_load_info(info, flags); | 2939 | mod = setup_load_info(info, flags); |
@@ -2942,17 +2948,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
2942 | err = module_frob_arch_sections(info->hdr, info->sechdrs, | 2948 | err = module_frob_arch_sections(info->hdr, info->sechdrs, |
2943 | info->secstrings, mod); | 2949 | info->secstrings, mod); |
2944 | if (err < 0) | 2950 | if (err < 0) |
2945 | goto out; | 2951 | return ERR_PTR(err); |
2946 | 2952 | ||
2947 | pcpusec = &info->sechdrs[info->index.pcpu]; | 2953 | /* We will do a special allocation for per-cpu sections later. */ |
2948 | if (pcpusec->sh_size) { | 2954 | info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; |
2949 | /* We have a special allocation for this section. */ | ||
2950 | err = percpu_modalloc(mod, | ||
2951 | pcpusec->sh_size, pcpusec->sh_addralign); | ||
2952 | if (err) | ||
2953 | goto out; | ||
2954 | pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC; | ||
2955 | } | ||
2956 | 2955 | ||
2957 | /* Determine total sizes, and put offsets in sh_entsize. For now | 2956 | /* Determine total sizes, and put offsets in sh_entsize. For now |
2958 | this is done generically; there doesn't appear to be any | 2957 | this is done generically; there doesn't appear to be any |
@@ -2963,17 +2962,12 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
2963 | /* Allocate and move to the final place */ | 2962 | /* Allocate and move to the final place */ |
2964 | err = move_module(mod, info); | 2963 | err = move_module(mod, info); |
2965 | if (err) | 2964 | if (err) |
2966 | goto free_percpu; | 2965 | return ERR_PTR(err); |
2967 | 2966 | ||
2968 | /* Module has been copied to its final place now: return it. */ | 2967 | /* Module has been copied to its final place now: return it. */ |
2969 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; | 2968 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; |
2970 | kmemleak_load_module(mod, info); | 2969 | kmemleak_load_module(mod, info); |
2971 | return mod; | 2970 | return mod; |
2972 | |||
2973 | free_percpu: | ||
2974 | percpu_modfree(mod); | ||
2975 | out: | ||
2976 | return ERR_PTR(err); | ||
2977 | } | 2971 | } |
2978 | 2972 | ||
2979 | /* mod is no longer valid after this! */ | 2973 | /* mod is no longer valid after this! */ |
@@ -3014,7 +3008,7 @@ static bool finished_loading(const char *name) | |||
3014 | bool ret; | 3008 | bool ret; |
3015 | 3009 | ||
3016 | mutex_lock(&module_mutex); | 3010 | mutex_lock(&module_mutex); |
3017 | mod = find_module_all(name, true); | 3011 | mod = find_module_all(name, strlen(name), true); |
3018 | ret = !mod || mod->state == MODULE_STATE_LIVE | 3012 | ret = !mod || mod->state == MODULE_STATE_LIVE |
3019 | || mod->state == MODULE_STATE_GOING; | 3013 | || mod->state == MODULE_STATE_GOING; |
3020 | mutex_unlock(&module_mutex); | 3014 | mutex_unlock(&module_mutex); |
@@ -3152,7 +3146,8 @@ static int add_unformed_module(struct module *mod) | |||
3152 | 3146 | ||
3153 | again: | 3147 | again: |
3154 | mutex_lock(&module_mutex); | 3148 | mutex_lock(&module_mutex); |
3155 | if ((old = find_module_all(mod->name, true)) != NULL) { | 3149 | old = find_module_all(mod->name, strlen(mod->name), true); |
3150 | if (old != NULL) { | ||
3156 | if (old->state == MODULE_STATE_COMING | 3151 | if (old->state == MODULE_STATE_COMING |
3157 | || old->state == MODULE_STATE_UNFORMED) { | 3152 | || old->state == MODULE_STATE_UNFORMED) { |
3158 | /* Wait in case it fails to load. */ | 3153 | /* Wait in case it fails to load. */ |
@@ -3198,6 +3193,17 @@ out: | |||
3198 | return err; | 3193 | return err; |
3199 | } | 3194 | } |
3200 | 3195 | ||
3196 | static int unknown_module_param_cb(char *param, char *val, const char *modname) | ||
3197 | { | ||
3198 | /* Check for magic 'dyndbg' arg */ | ||
3199 | int ret = ddebug_dyndbg_module_param_cb(param, val, modname); | ||
3200 | if (ret != 0) { | ||
3201 | printk(KERN_WARNING "%s: unknown parameter '%s' ignored\n", | ||
3202 | modname, param); | ||
3203 | } | ||
3204 | return 0; | ||
3205 | } | ||
3206 | |||
3201 | /* Allocate and load the module: note that size of section 0 is always | 3207 | /* Allocate and load the module: note that size of section 0 is always |
3202 | zero, and we rely on this for optional sections. */ | 3208 | zero, and we rely on this for optional sections. */ |
3203 | static int load_module(struct load_info *info, const char __user *uargs, | 3209 | static int load_module(struct load_info *info, const char __user *uargs, |
@@ -3237,6 +3243,11 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3237 | } | 3243 | } |
3238 | #endif | 3244 | #endif |
3239 | 3245 | ||
3246 | /* To avoid stressing percpu allocator, do this once we're unique. */ | ||
3247 | err = percpu_modalloc(mod, info); | ||
3248 | if (err) | ||
3249 | goto unlink_mod; | ||
3250 | |||
3240 | /* Now module is in final location, initialize linked lists, etc. */ | 3251 | /* Now module is in final location, initialize linked lists, etc. */ |
3241 | err = module_unload_init(mod); | 3252 | err = module_unload_init(mod); |
3242 | if (err) | 3253 | if (err) |
@@ -3284,7 +3295,7 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3284 | 3295 | ||
3285 | /* Module is ready to execute: parsing args may do that. */ | 3296 | /* Module is ready to execute: parsing args may do that. */ |
3286 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, | 3297 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, |
3287 | -32768, 32767, &ddebug_dyndbg_module_param_cb); | 3298 | -32768, 32767, unknown_module_param_cb); |
3288 | if (err < 0) | 3299 | if (err < 0) |
3289 | goto bug_cleanup; | 3300 | goto bug_cleanup; |
3290 | 3301 | ||
@@ -3563,10 +3574,8 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
3563 | /* Don't lock: we're in enough trouble already. */ | 3574 | /* Don't lock: we're in enough trouble already. */ |
3564 | preempt_disable(); | 3575 | preempt_disable(); |
3565 | if ((colon = strchr(name, ':')) != NULL) { | 3576 | if ((colon = strchr(name, ':')) != NULL) { |
3566 | *colon = '\0'; | 3577 | if ((mod = find_module_all(name, colon - name, false)) != NULL) |
3567 | if ((mod = find_module(name)) != NULL) | ||
3568 | ret = mod_find_symname(mod, colon+1); | 3578 | ret = mod_find_symname(mod, colon+1); |
3569 | *colon = ':'; | ||
3570 | } else { | 3579 | } else { |
3571 | list_for_each_entry_rcu(mod, &modules, list) { | 3580 | list_for_each_entry_rcu(mod, &modules, list) { |
3572 | if (mod->state == MODULE_STATE_UNFORMED) | 3581 | if (mod->state == MODULE_STATE_UNFORMED) |
diff --git a/kernel/mutex.c b/kernel/mutex.c index e581ada5faf4..ff05f4bd86eb 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -18,6 +18,7 @@ | |||
18 | * Also see Documentation/mutex-design.txt. | 18 | * Also see Documentation/mutex-design.txt. |
19 | */ | 19 | */ |
20 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
21 | #include <linux/ww_mutex.h> | ||
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
22 | #include <linux/sched/rt.h> | 23 | #include <linux/sched/rt.h> |
23 | #include <linux/export.h> | 24 | #include <linux/export.h> |
diff --git a/kernel/panic.c b/kernel/panic.c index 167ec097ce8b..801864600514 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/notifier.h> | 15 | #include <linux/notifier.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/random.h> | 17 | #include <linux/random.h> |
18 | #include <linux/ftrace.h> | ||
18 | #include <linux/reboot.h> | 19 | #include <linux/reboot.h> |
19 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
20 | #include <linux/kexec.h> | 21 | #include <linux/kexec.h> |
@@ -399,8 +400,11 @@ struct slowpath_args { | |||
399 | static void warn_slowpath_common(const char *file, int line, void *caller, | 400 | static void warn_slowpath_common(const char *file, int line, void *caller, |
400 | unsigned taint, struct slowpath_args *args) | 401 | unsigned taint, struct slowpath_args *args) |
401 | { | 402 | { |
402 | printk(KERN_WARNING "------------[ cut here ]------------\n"); | 403 | disable_trace_on_warning(); |
403 | printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); | 404 | |
405 | pr_warn("------------[ cut here ]------------\n"); | ||
406 | pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n", | ||
407 | raw_smp_processor_id(), current->pid, file, line, caller); | ||
404 | 408 | ||
405 | if (args) | 409 | if (args) |
406 | vprintk(args->fmt, args->args); | 410 | vprintk(args->fmt, args->args); |
diff --git a/kernel/params.c b/kernel/params.c index 53b958fcd639..440e65d1a544 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -787,7 +787,7 @@ static void __init kernel_add_sysfs_param(const char *name, | |||
787 | } | 787 | } |
788 | 788 | ||
789 | /* | 789 | /* |
790 | * param_sysfs_builtin - add contents in /sys/parameters for built-in modules | 790 | * param_sysfs_builtin - add sysfs parameters for built-in modules |
791 | * | 791 | * |
792 | * Add module_parameters to sysfs for "modules" built into the kernel. | 792 | * Add module_parameters to sysfs for "modules" built into the kernel. |
793 | * | 793 | * |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 42670e9b44e0..c7f31aa272f7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock) | |||
51 | return error; | 51 | return error; |
52 | } | 52 | } |
53 | 53 | ||
54 | static inline union cpu_time_count | 54 | static inline unsigned long long |
55 | timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) | 55 | timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) |
56 | { | 56 | { |
57 | union cpu_time_count ret; | 57 | unsigned long long ret; |
58 | ret.sched = 0; /* high half always zero when .cpu used */ | 58 | |
59 | ret = 0; /* high half always zero when .cpu used */ | ||
59 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | 60 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { |
60 | ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; | 61 | ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; |
61 | } else { | 62 | } else { |
62 | ret.cpu = timespec_to_cputime(tp); | 63 | ret = cputime_to_expires(timespec_to_cputime(tp)); |
63 | } | 64 | } |
64 | return ret; | 65 | return ret; |
65 | } | 66 | } |
66 | 67 | ||
67 | static void sample_to_timespec(const clockid_t which_clock, | 68 | static void sample_to_timespec(const clockid_t which_clock, |
68 | union cpu_time_count cpu, | 69 | unsigned long long expires, |
69 | struct timespec *tp) | 70 | struct timespec *tp) |
70 | { | 71 | { |
71 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) | 72 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) |
72 | *tp = ns_to_timespec(cpu.sched); | 73 | *tp = ns_to_timespec(expires); |
73 | else | 74 | else |
74 | cputime_to_timespec(cpu.cpu, tp); | 75 | cputime_to_timespec((__force cputime_t)expires, tp); |
75 | } | ||
76 | |||
77 | static inline int cpu_time_before(const clockid_t which_clock, | ||
78 | union cpu_time_count now, | ||
79 | union cpu_time_count then) | ||
80 | { | ||
81 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
82 | return now.sched < then.sched; | ||
83 | } else { | ||
84 | return now.cpu < then.cpu; | ||
85 | } | ||
86 | } | ||
87 | static inline void cpu_time_add(const clockid_t which_clock, | ||
88 | union cpu_time_count *acc, | ||
89 | union cpu_time_count val) | ||
90 | { | ||
91 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
92 | acc->sched += val.sched; | ||
93 | } else { | ||
94 | acc->cpu += val.cpu; | ||
95 | } | ||
96 | } | ||
97 | static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, | ||
98 | union cpu_time_count a, | ||
99 | union cpu_time_count b) | ||
100 | { | ||
101 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
102 | a.sched -= b.sched; | ||
103 | } else { | ||
104 | a.cpu -= b.cpu; | ||
105 | } | ||
106 | return a; | ||
107 | } | 76 | } |
108 | 77 | ||
109 | /* | 78 | /* |
@@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, | |||
111 | * given the current clock sample. | 80 | * given the current clock sample. |
112 | */ | 81 | */ |
113 | static void bump_cpu_timer(struct k_itimer *timer, | 82 | static void bump_cpu_timer(struct k_itimer *timer, |
114 | union cpu_time_count now) | 83 | unsigned long long now) |
115 | { | 84 | { |
116 | int i; | 85 | int i; |
86 | unsigned long long delta, incr; | ||
117 | 87 | ||
118 | if (timer->it.cpu.incr.sched == 0) | 88 | if (timer->it.cpu.incr == 0) |
119 | return; | 89 | return; |
120 | 90 | ||
121 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 91 | if (now < timer->it.cpu.expires) |
122 | unsigned long long delta, incr; | 92 | return; |
123 | 93 | ||
124 | if (now.sched < timer->it.cpu.expires.sched) | 94 | incr = timer->it.cpu.incr; |
125 | return; | 95 | delta = now + incr - timer->it.cpu.expires; |
126 | incr = timer->it.cpu.incr.sched; | ||
127 | delta = now.sched + incr - timer->it.cpu.expires.sched; | ||
128 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | ||
129 | for (i = 0; incr < delta - incr; i++) | ||
130 | incr = incr << 1; | ||
131 | for (; i >= 0; incr >>= 1, i--) { | ||
132 | if (delta < incr) | ||
133 | continue; | ||
134 | timer->it.cpu.expires.sched += incr; | ||
135 | timer->it_overrun += 1 << i; | ||
136 | delta -= incr; | ||
137 | } | ||
138 | } else { | ||
139 | cputime_t delta, incr; | ||
140 | 96 | ||
141 | if (now.cpu < timer->it.cpu.expires.cpu) | 97 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ |
142 | return; | 98 | for (i = 0; incr < delta - incr; i++) |
143 | incr = timer->it.cpu.incr.cpu; | 99 | incr = incr << 1; |
144 | delta = now.cpu + incr - timer->it.cpu.expires.cpu; | 100 | |
145 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | 101 | for (; i >= 0; incr >>= 1, i--) { |
146 | for (i = 0; incr < delta - incr; i++) | 102 | if (delta < incr) |
147 | incr += incr; | 103 | continue; |
148 | for (; i >= 0; incr = incr >> 1, i--) { | 104 | |
149 | if (delta < incr) | 105 | timer->it.cpu.expires += incr; |
150 | continue; | 106 | timer->it_overrun += 1 << i; |
151 | timer->it.cpu.expires.cpu += incr; | 107 | delta -= incr; |
152 | timer->it_overrun += 1 << i; | ||
153 | delta -= incr; | ||
154 | } | ||
155 | } | 108 | } |
156 | } | 109 | } |
157 | 110 | ||
@@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) | |||
170 | return 0; | 123 | return 0; |
171 | } | 124 | } |
172 | 125 | ||
173 | static inline cputime_t prof_ticks(struct task_struct *p) | 126 | static inline unsigned long long prof_ticks(struct task_struct *p) |
174 | { | 127 | { |
175 | cputime_t utime, stime; | 128 | cputime_t utime, stime; |
176 | 129 | ||
177 | task_cputime(p, &utime, &stime); | 130 | task_cputime(p, &utime, &stime); |
178 | 131 | ||
179 | return utime + stime; | 132 | return cputime_to_expires(utime + stime); |
180 | } | 133 | } |
181 | static inline cputime_t virt_ticks(struct task_struct *p) | 134 | static inline unsigned long long virt_ticks(struct task_struct *p) |
182 | { | 135 | { |
183 | cputime_t utime; | 136 | cputime_t utime; |
184 | 137 | ||
185 | task_cputime(p, &utime, NULL); | 138 | task_cputime(p, &utime, NULL); |
186 | 139 | ||
187 | return utime; | 140 | return cputime_to_expires(utime); |
188 | } | 141 | } |
189 | 142 | ||
190 | static int | 143 | static int |
@@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) | |||
225 | * Sample a per-thread clock for the given task. | 178 | * Sample a per-thread clock for the given task. |
226 | */ | 179 | */ |
227 | static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | 180 | static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, |
228 | union cpu_time_count *cpu) | 181 | unsigned long long *sample) |
229 | { | 182 | { |
230 | switch (CPUCLOCK_WHICH(which_clock)) { | 183 | switch (CPUCLOCK_WHICH(which_clock)) { |
231 | default: | 184 | default: |
232 | return -EINVAL; | 185 | return -EINVAL; |
233 | case CPUCLOCK_PROF: | 186 | case CPUCLOCK_PROF: |
234 | cpu->cpu = prof_ticks(p); | 187 | *sample = prof_ticks(p); |
235 | break; | 188 | break; |
236 | case CPUCLOCK_VIRT: | 189 | case CPUCLOCK_VIRT: |
237 | cpu->cpu = virt_ticks(p); | 190 | *sample = virt_ticks(p); |
238 | break; | 191 | break; |
239 | case CPUCLOCK_SCHED: | 192 | case CPUCLOCK_SCHED: |
240 | cpu->sched = task_sched_runtime(p); | 193 | *sample = task_sched_runtime(p); |
241 | break; | 194 | break; |
242 | } | 195 | } |
243 | return 0; | 196 | return 0; |
@@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
284 | */ | 237 | */ |
285 | static int cpu_clock_sample_group(const clockid_t which_clock, | 238 | static int cpu_clock_sample_group(const clockid_t which_clock, |
286 | struct task_struct *p, | 239 | struct task_struct *p, |
287 | union cpu_time_count *cpu) | 240 | unsigned long long *sample) |
288 | { | 241 | { |
289 | struct task_cputime cputime; | 242 | struct task_cputime cputime; |
290 | 243 | ||
@@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock, | |||
293 | return -EINVAL; | 246 | return -EINVAL; |
294 | case CPUCLOCK_PROF: | 247 | case CPUCLOCK_PROF: |
295 | thread_group_cputime(p, &cputime); | 248 | thread_group_cputime(p, &cputime); |
296 | cpu->cpu = cputime.utime + cputime.stime; | 249 | *sample = cputime_to_expires(cputime.utime + cputime.stime); |
297 | break; | 250 | break; |
298 | case CPUCLOCK_VIRT: | 251 | case CPUCLOCK_VIRT: |
299 | thread_group_cputime(p, &cputime); | 252 | thread_group_cputime(p, &cputime); |
300 | cpu->cpu = cputime.utime; | 253 | *sample = cputime_to_expires(cputime.utime); |
301 | break; | 254 | break; |
302 | case CPUCLOCK_SCHED: | 255 | case CPUCLOCK_SCHED: |
303 | thread_group_cputime(p, &cputime); | 256 | thread_group_cputime(p, &cputime); |
304 | cpu->sched = cputime.sum_exec_runtime; | 257 | *sample = cputime.sum_exec_runtime; |
305 | break; | 258 | break; |
306 | } | 259 | } |
307 | return 0; | 260 | return 0; |
@@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
312 | { | 265 | { |
313 | const pid_t pid = CPUCLOCK_PID(which_clock); | 266 | const pid_t pid = CPUCLOCK_PID(which_clock); |
314 | int error = -EINVAL; | 267 | int error = -EINVAL; |
315 | union cpu_time_count rtn; | 268 | unsigned long long rtn; |
316 | 269 | ||
317 | if (pid == 0) { | 270 | if (pid == 0) { |
318 | /* | 271 | /* |
@@ -446,6 +399,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
446 | return ret; | 399 | return ret; |
447 | } | 400 | } |
448 | 401 | ||
402 | static void cleanup_timers_list(struct list_head *head, | ||
403 | unsigned long long curr) | ||
404 | { | ||
405 | struct cpu_timer_list *timer, *next; | ||
406 | |||
407 | list_for_each_entry_safe(timer, next, head, entry) | ||
408 | list_del_init(&timer->entry); | ||
409 | } | ||
410 | |||
449 | /* | 411 | /* |
450 | * Clean out CPU timers still ticking when a thread exited. The task | 412 | * Clean out CPU timers still ticking when a thread exited. The task |
451 | * pointer is cleared, and the expiry time is replaced with the residual | 413 | * pointer is cleared, and the expiry time is replaced with the residual |
@@ -456,37 +418,12 @@ static void cleanup_timers(struct list_head *head, | |||
456 | cputime_t utime, cputime_t stime, | 418 | cputime_t utime, cputime_t stime, |
457 | unsigned long long sum_exec_runtime) | 419 | unsigned long long sum_exec_runtime) |
458 | { | 420 | { |
459 | struct cpu_timer_list *timer, *next; | ||
460 | cputime_t ptime = utime + stime; | ||
461 | |||
462 | list_for_each_entry_safe(timer, next, head, entry) { | ||
463 | list_del_init(&timer->entry); | ||
464 | if (timer->expires.cpu < ptime) { | ||
465 | timer->expires.cpu = 0; | ||
466 | } else { | ||
467 | timer->expires.cpu -= ptime; | ||
468 | } | ||
469 | } | ||
470 | 421 | ||
471 | ++head; | 422 | cputime_t ptime = utime + stime; |
472 | list_for_each_entry_safe(timer, next, head, entry) { | ||
473 | list_del_init(&timer->entry); | ||
474 | if (timer->expires.cpu < utime) { | ||
475 | timer->expires.cpu = 0; | ||
476 | } else { | ||
477 | timer->expires.cpu -= utime; | ||
478 | } | ||
479 | } | ||
480 | 423 | ||
481 | ++head; | 424 | cleanup_timers_list(head, cputime_to_expires(ptime)); |
482 | list_for_each_entry_safe(timer, next, head, entry) { | 425 | cleanup_timers_list(++head, cputime_to_expires(utime)); |
483 | list_del_init(&timer->entry); | 426 | cleanup_timers_list(++head, sum_exec_runtime); |
484 | if (timer->expires.sched < sum_exec_runtime) { | ||
485 | timer->expires.sched = 0; | ||
486 | } else { | ||
487 | timer->expires.sched -= sum_exec_runtime; | ||
488 | } | ||
489 | } | ||
490 | } | 427 | } |
491 | 428 | ||
492 | /* | 429 | /* |
@@ -516,17 +453,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) | |||
516 | tsk->se.sum_exec_runtime + sig->sum_sched_runtime); | 453 | tsk->se.sum_exec_runtime + sig->sum_sched_runtime); |
517 | } | 454 | } |
518 | 455 | ||
519 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) | 456 | static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) |
520 | { | 457 | { |
458 | struct cpu_timer_list *timer = &itimer->it.cpu; | ||
459 | |||
521 | /* | 460 | /* |
522 | * That's all for this thread or process. | 461 | * That's all for this thread or process. |
523 | * We leave our residual in expires to be reported. | 462 | * We leave our residual in expires to be reported. |
524 | */ | 463 | */ |
525 | put_task_struct(timer->it.cpu.task); | 464 | put_task_struct(timer->task); |
526 | timer->it.cpu.task = NULL; | 465 | timer->task = NULL; |
527 | timer->it.cpu.expires = cpu_time_sub(timer->it_clock, | 466 | if (timer->expires < now) { |
528 | timer->it.cpu.expires, | 467 | timer->expires = 0; |
529 | now); | 468 | } else { |
469 | timer->expires -= now; | ||
470 | } | ||
530 | } | 471 | } |
531 | 472 | ||
532 | static inline int expires_gt(cputime_t expires, cputime_t new_exp) | 473 | static inline int expires_gt(cputime_t expires, cputime_t new_exp) |
@@ -558,14 +499,14 @@ static void arm_timer(struct k_itimer *timer) | |||
558 | 499 | ||
559 | listpos = head; | 500 | listpos = head; |
560 | list_for_each_entry(next, head, entry) { | 501 | list_for_each_entry(next, head, entry) { |
561 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) | 502 | if (nt->expires < next->expires) |
562 | break; | 503 | break; |
563 | listpos = &next->entry; | 504 | listpos = &next->entry; |
564 | } | 505 | } |
565 | list_add(&nt->entry, listpos); | 506 | list_add(&nt->entry, listpos); |
566 | 507 | ||
567 | if (listpos == head) { | 508 | if (listpos == head) { |
568 | union cpu_time_count *exp = &nt->expires; | 509 | unsigned long long exp = nt->expires; |
569 | 510 | ||
570 | /* | 511 | /* |
571 | * We are the new earliest-expiring POSIX 1.b timer, hence | 512 | * We are the new earliest-expiring POSIX 1.b timer, hence |
@@ -576,17 +517,17 @@ static void arm_timer(struct k_itimer *timer) | |||
576 | 517 | ||
577 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 518 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
578 | case CPUCLOCK_PROF: | 519 | case CPUCLOCK_PROF: |
579 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | 520 | if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) |
580 | cputime_expires->prof_exp = exp->cpu; | 521 | cputime_expires->prof_exp = expires_to_cputime(exp); |
581 | break; | 522 | break; |
582 | case CPUCLOCK_VIRT: | 523 | case CPUCLOCK_VIRT: |
583 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) | 524 | if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) |
584 | cputime_expires->virt_exp = exp->cpu; | 525 | cputime_expires->virt_exp = expires_to_cputime(exp); |
585 | break; | 526 | break; |
586 | case CPUCLOCK_SCHED: | 527 | case CPUCLOCK_SCHED: |
587 | if (cputime_expires->sched_exp == 0 || | 528 | if (cputime_expires->sched_exp == 0 || |
588 | cputime_expires->sched_exp > exp->sched) | 529 | cputime_expires->sched_exp > exp) |
589 | cputime_expires->sched_exp = exp->sched; | 530 | cputime_expires->sched_exp = exp; |
590 | break; | 531 | break; |
591 | } | 532 | } |
592 | } | 533 | } |
@@ -601,20 +542,20 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
601 | /* | 542 | /* |
602 | * User don't want any signal. | 543 | * User don't want any signal. |
603 | */ | 544 | */ |
604 | timer->it.cpu.expires.sched = 0; | 545 | timer->it.cpu.expires = 0; |
605 | } else if (unlikely(timer->sigq == NULL)) { | 546 | } else if (unlikely(timer->sigq == NULL)) { |
606 | /* | 547 | /* |
607 | * This a special case for clock_nanosleep, | 548 | * This a special case for clock_nanosleep, |
608 | * not a normal timer from sys_timer_create. | 549 | * not a normal timer from sys_timer_create. |
609 | */ | 550 | */ |
610 | wake_up_process(timer->it_process); | 551 | wake_up_process(timer->it_process); |
611 | timer->it.cpu.expires.sched = 0; | 552 | timer->it.cpu.expires = 0; |
612 | } else if (timer->it.cpu.incr.sched == 0) { | 553 | } else if (timer->it.cpu.incr == 0) { |
613 | /* | 554 | /* |
614 | * One-shot timer. Clear it as soon as it's fired. | 555 | * One-shot timer. Clear it as soon as it's fired. |
615 | */ | 556 | */ |
616 | posix_timer_event(timer, 0); | 557 | posix_timer_event(timer, 0); |
617 | timer->it.cpu.expires.sched = 0; | 558 | timer->it.cpu.expires = 0; |
618 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { | 559 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { |
619 | /* | 560 | /* |
620 | * The signal did not get queued because the signal | 561 | * The signal did not get queued because the signal |
@@ -632,7 +573,7 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
632 | */ | 573 | */ |
633 | static int cpu_timer_sample_group(const clockid_t which_clock, | 574 | static int cpu_timer_sample_group(const clockid_t which_clock, |
634 | struct task_struct *p, | 575 | struct task_struct *p, |
635 | union cpu_time_count *cpu) | 576 | unsigned long long *sample) |
636 | { | 577 | { |
637 | struct task_cputime cputime; | 578 | struct task_cputime cputime; |
638 | 579 | ||
@@ -641,13 +582,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
641 | default: | 582 | default: |
642 | return -EINVAL; | 583 | return -EINVAL; |
643 | case CPUCLOCK_PROF: | 584 | case CPUCLOCK_PROF: |
644 | cpu->cpu = cputime.utime + cputime.stime; | 585 | *sample = cputime_to_expires(cputime.utime + cputime.stime); |
645 | break; | 586 | break; |
646 | case CPUCLOCK_VIRT: | 587 | case CPUCLOCK_VIRT: |
647 | cpu->cpu = cputime.utime; | 588 | *sample = cputime_to_expires(cputime.utime); |
648 | break; | 589 | break; |
649 | case CPUCLOCK_SCHED: | 590 | case CPUCLOCK_SCHED: |
650 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | 591 | *sample = cputime.sum_exec_runtime + task_delta_exec(p); |
651 | break; | 592 | break; |
652 | } | 593 | } |
653 | return 0; | 594 | return 0; |
@@ -694,7 +635,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
694 | struct itimerspec *new, struct itimerspec *old) | 635 | struct itimerspec *new, struct itimerspec *old) |
695 | { | 636 | { |
696 | struct task_struct *p = timer->it.cpu.task; | 637 | struct task_struct *p = timer->it.cpu.task; |
697 | union cpu_time_count old_expires, new_expires, old_incr, val; | 638 | unsigned long long old_expires, new_expires, old_incr, val; |
698 | int ret; | 639 | int ret; |
699 | 640 | ||
700 | if (unlikely(p == NULL)) { | 641 | if (unlikely(p == NULL)) { |
@@ -749,7 +690,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
749 | } | 690 | } |
750 | 691 | ||
751 | if (old) { | 692 | if (old) { |
752 | if (old_expires.sched == 0) { | 693 | if (old_expires == 0) { |
753 | old->it_value.tv_sec = 0; | 694 | old->it_value.tv_sec = 0; |
754 | old->it_value.tv_nsec = 0; | 695 | old->it_value.tv_nsec = 0; |
755 | } else { | 696 | } else { |
@@ -764,11 +705,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
764 | * new setting. | 705 | * new setting. |
765 | */ | 706 | */ |
766 | bump_cpu_timer(timer, val); | 707 | bump_cpu_timer(timer, val); |
767 | if (cpu_time_before(timer->it_clock, val, | 708 | if (val < timer->it.cpu.expires) { |
768 | timer->it.cpu.expires)) { | 709 | old_expires = timer->it.cpu.expires - val; |
769 | old_expires = cpu_time_sub( | ||
770 | timer->it_clock, | ||
771 | timer->it.cpu.expires, val); | ||
772 | sample_to_timespec(timer->it_clock, | 710 | sample_to_timespec(timer->it_clock, |
773 | old_expires, | 711 | old_expires, |
774 | &old->it_value); | 712 | &old->it_value); |
@@ -791,8 +729,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
791 | goto out; | 729 | goto out; |
792 | } | 730 | } |
793 | 731 | ||
794 | if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { | 732 | if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { |
795 | cpu_time_add(timer->it_clock, &new_expires, val); | 733 | new_expires += val; |
796 | } | 734 | } |
797 | 735 | ||
798 | /* | 736 | /* |
@@ -801,8 +739,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
801 | * arm the timer (we'll just fake it for timer_gettime). | 739 | * arm the timer (we'll just fake it for timer_gettime). |
802 | */ | 740 | */ |
803 | timer->it.cpu.expires = new_expires; | 741 | timer->it.cpu.expires = new_expires; |
804 | if (new_expires.sched != 0 && | 742 | if (new_expires != 0 && val < new_expires) { |
805 | cpu_time_before(timer->it_clock, val, new_expires)) { | ||
806 | arm_timer(timer); | 743 | arm_timer(timer); |
807 | } | 744 | } |
808 | 745 | ||
@@ -826,8 +763,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
826 | timer->it_overrun_last = 0; | 763 | timer->it_overrun_last = 0; |
827 | timer->it_overrun = -1; | 764 | timer->it_overrun = -1; |
828 | 765 | ||
829 | if (new_expires.sched != 0 && | 766 | if (new_expires != 0 && !(val < new_expires)) { |
830 | !cpu_time_before(timer->it_clock, val, new_expires)) { | ||
831 | /* | 767 | /* |
832 | * The designated time already passed, so we notify | 768 | * The designated time already passed, so we notify |
833 | * immediately, even if the thread never runs to | 769 | * immediately, even if the thread never runs to |
@@ -849,7 +785,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
849 | 785 | ||
850 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | 786 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) |
851 | { | 787 | { |
852 | union cpu_time_count now; | 788 | unsigned long long now; |
853 | struct task_struct *p = timer->it.cpu.task; | 789 | struct task_struct *p = timer->it.cpu.task; |
854 | int clear_dead; | 790 | int clear_dead; |
855 | 791 | ||
@@ -859,7 +795,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
859 | sample_to_timespec(timer->it_clock, | 795 | sample_to_timespec(timer->it_clock, |
860 | timer->it.cpu.incr, &itp->it_interval); | 796 | timer->it.cpu.incr, &itp->it_interval); |
861 | 797 | ||
862 | if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ | 798 | if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ |
863 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | 799 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; |
864 | return; | 800 | return; |
865 | } | 801 | } |
@@ -891,7 +827,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
891 | */ | 827 | */ |
892 | put_task_struct(p); | 828 | put_task_struct(p); |
893 | timer->it.cpu.task = NULL; | 829 | timer->it.cpu.task = NULL; |
894 | timer->it.cpu.expires.sched = 0; | 830 | timer->it.cpu.expires = 0; |
895 | read_unlock(&tasklist_lock); | 831 | read_unlock(&tasklist_lock); |
896 | goto dead; | 832 | goto dead; |
897 | } else { | 833 | } else { |
@@ -912,10 +848,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
912 | goto dead; | 848 | goto dead; |
913 | } | 849 | } |
914 | 850 | ||
915 | if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { | 851 | if (now < timer->it.cpu.expires) { |
916 | sample_to_timespec(timer->it_clock, | 852 | sample_to_timespec(timer->it_clock, |
917 | cpu_time_sub(timer->it_clock, | 853 | timer->it.cpu.expires - now, |
918 | timer->it.cpu.expires, now), | ||
919 | &itp->it_value); | 854 | &itp->it_value); |
920 | } else { | 855 | } else { |
921 | /* | 856 | /* |
@@ -927,6 +862,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
927 | } | 862 | } |
928 | } | 863 | } |
929 | 864 | ||
865 | static unsigned long long | ||
866 | check_timers_list(struct list_head *timers, | ||
867 | struct list_head *firing, | ||
868 | unsigned long long curr) | ||
869 | { | ||
870 | int maxfire = 20; | ||
871 | |||
872 | while (!list_empty(timers)) { | ||
873 | struct cpu_timer_list *t; | ||
874 | |||
875 | t = list_first_entry(timers, struct cpu_timer_list, entry); | ||
876 | |||
877 | if (!--maxfire || curr < t->expires) | ||
878 | return t->expires; | ||
879 | |||
880 | t->firing = 1; | ||
881 | list_move_tail(&t->entry, firing); | ||
882 | } | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | |||
930 | /* | 887 | /* |
931 | * Check for any per-thread CPU timers that have fired and move them off | 888 | * Check for any per-thread CPU timers that have fired and move them off |
932 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the | 889 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the |
@@ -935,54 +892,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
935 | static void check_thread_timers(struct task_struct *tsk, | 892 | static void check_thread_timers(struct task_struct *tsk, |
936 | struct list_head *firing) | 893 | struct list_head *firing) |
937 | { | 894 | { |
938 | int maxfire; | ||
939 | struct list_head *timers = tsk->cpu_timers; | 895 | struct list_head *timers = tsk->cpu_timers; |
940 | struct signal_struct *const sig = tsk->signal; | 896 | struct signal_struct *const sig = tsk->signal; |
897 | struct task_cputime *tsk_expires = &tsk->cputime_expires; | ||
898 | unsigned long long expires; | ||
941 | unsigned long soft; | 899 | unsigned long soft; |
942 | 900 | ||
943 | maxfire = 20; | 901 | expires = check_timers_list(timers, firing, prof_ticks(tsk)); |
944 | tsk->cputime_expires.prof_exp = 0; | 902 | tsk_expires->prof_exp = expires_to_cputime(expires); |
945 | while (!list_empty(timers)) { | ||
946 | struct cpu_timer_list *t = list_first_entry(timers, | ||
947 | struct cpu_timer_list, | ||
948 | entry); | ||
949 | if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) { | ||
950 | tsk->cputime_expires.prof_exp = t->expires.cpu; | ||
951 | break; | ||
952 | } | ||
953 | t->firing = 1; | ||
954 | list_move_tail(&t->entry, firing); | ||
955 | } | ||
956 | 903 | ||
957 | ++timers; | 904 | expires = check_timers_list(++timers, firing, virt_ticks(tsk)); |
958 | maxfire = 20; | 905 | tsk_expires->virt_exp = expires_to_cputime(expires); |
959 | tsk->cputime_expires.virt_exp = 0; | ||
960 | while (!list_empty(timers)) { | ||
961 | struct cpu_timer_list *t = list_first_entry(timers, | ||
962 | struct cpu_timer_list, | ||
963 | entry); | ||
964 | if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) { | ||
965 | tsk->cputime_expires.virt_exp = t->expires.cpu; | ||
966 | break; | ||
967 | } | ||
968 | t->firing = 1; | ||
969 | list_move_tail(&t->entry, firing); | ||
970 | } | ||
971 | 906 | ||
972 | ++timers; | 907 | tsk_expires->sched_exp = check_timers_list(++timers, firing, |
973 | maxfire = 20; | 908 | tsk->se.sum_exec_runtime); |
974 | tsk->cputime_expires.sched_exp = 0; | ||
975 | while (!list_empty(timers)) { | ||
976 | struct cpu_timer_list *t = list_first_entry(timers, | ||
977 | struct cpu_timer_list, | ||
978 | entry); | ||
979 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { | ||
980 | tsk->cputime_expires.sched_exp = t->expires.sched; | ||
981 | break; | ||
982 | } | ||
983 | t->firing = 1; | ||
984 | list_move_tail(&t->entry, firing); | ||
985 | } | ||
986 | 909 | ||
987 | /* | 910 | /* |
988 | * Check for the special case thread timers. | 911 | * Check for the special case thread timers. |
@@ -1030,7 +953,8 @@ static void stop_process_timers(struct signal_struct *sig) | |||
1030 | static u32 onecputick; | 953 | static u32 onecputick; |
1031 | 954 | ||
1032 | static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | 955 | static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, |
1033 | cputime_t *expires, cputime_t cur_time, int signo) | 956 | unsigned long long *expires, |
957 | unsigned long long cur_time, int signo) | ||
1034 | { | 958 | { |
1035 | if (!it->expires) | 959 | if (!it->expires) |
1036 | return; | 960 | return; |
@@ -1066,9 +990,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
1066 | static void check_process_timers(struct task_struct *tsk, | 990 | static void check_process_timers(struct task_struct *tsk, |
1067 | struct list_head *firing) | 991 | struct list_head *firing) |
1068 | { | 992 | { |
1069 | int maxfire; | ||
1070 | struct signal_struct *const sig = tsk->signal; | 993 | struct signal_struct *const sig = tsk->signal; |
1071 | cputime_t utime, ptime, virt_expires, prof_expires; | 994 | unsigned long long utime, ptime, virt_expires, prof_expires; |
1072 | unsigned long long sum_sched_runtime, sched_expires; | 995 | unsigned long long sum_sched_runtime, sched_expires; |
1073 | struct list_head *timers = sig->cpu_timers; | 996 | struct list_head *timers = sig->cpu_timers; |
1074 | struct task_cputime cputime; | 997 | struct task_cputime cputime; |
@@ -1078,52 +1001,13 @@ static void check_process_timers(struct task_struct *tsk, | |||
1078 | * Collect the current process totals. | 1001 | * Collect the current process totals. |
1079 | */ | 1002 | */ |
1080 | thread_group_cputimer(tsk, &cputime); | 1003 | thread_group_cputimer(tsk, &cputime); |
1081 | utime = cputime.utime; | 1004 | utime = cputime_to_expires(cputime.utime); |
1082 | ptime = utime + cputime.stime; | 1005 | ptime = utime + cputime_to_expires(cputime.stime); |
1083 | sum_sched_runtime = cputime.sum_exec_runtime; | 1006 | sum_sched_runtime = cputime.sum_exec_runtime; |
1084 | maxfire = 20; | ||
1085 | prof_expires = 0; | ||
1086 | while (!list_empty(timers)) { | ||
1087 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
1088 | struct cpu_timer_list, | ||
1089 | entry); | ||
1090 | if (!--maxfire || ptime < tl->expires.cpu) { | ||
1091 | prof_expires = tl->expires.cpu; | ||
1092 | break; | ||
1093 | } | ||
1094 | tl->firing = 1; | ||
1095 | list_move_tail(&tl->entry, firing); | ||
1096 | } | ||
1097 | 1007 | ||
1098 | ++timers; | 1008 | prof_expires = check_timers_list(timers, firing, ptime); |
1099 | maxfire = 20; | 1009 | virt_expires = check_timers_list(++timers, firing, utime); |
1100 | virt_expires = 0; | 1010 | sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); |
1101 | while (!list_empty(timers)) { | ||
1102 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
1103 | struct cpu_timer_list, | ||
1104 | entry); | ||
1105 | if (!--maxfire || utime < tl->expires.cpu) { | ||
1106 | virt_expires = tl->expires.cpu; | ||
1107 | break; | ||
1108 | } | ||
1109 | tl->firing = 1; | ||
1110 | list_move_tail(&tl->entry, firing); | ||
1111 | } | ||
1112 | |||
1113 | ++timers; | ||
1114 | maxfire = 20; | ||
1115 | sched_expires = 0; | ||
1116 | while (!list_empty(timers)) { | ||
1117 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
1118 | struct cpu_timer_list, | ||
1119 | entry); | ||
1120 | if (!--maxfire || sum_sched_runtime < tl->expires.sched) { | ||
1121 | sched_expires = tl->expires.sched; | ||
1122 | break; | ||
1123 | } | ||
1124 | tl->firing = 1; | ||
1125 | list_move_tail(&tl->entry, firing); | ||
1126 | } | ||
1127 | 1011 | ||
1128 | /* | 1012 | /* |
1129 | * Check for the special case process timers. | 1013 | * Check for the special case process timers. |
@@ -1162,8 +1046,8 @@ static void check_process_timers(struct task_struct *tsk, | |||
1162 | } | 1046 | } |
1163 | } | 1047 | } |
1164 | 1048 | ||
1165 | sig->cputime_expires.prof_exp = prof_expires; | 1049 | sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); |
1166 | sig->cputime_expires.virt_exp = virt_expires; | 1050 | sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); |
1167 | sig->cputime_expires.sched_exp = sched_expires; | 1051 | sig->cputime_expires.sched_exp = sched_expires; |
1168 | if (task_cputime_zero(&sig->cputime_expires)) | 1052 | if (task_cputime_zero(&sig->cputime_expires)) |
1169 | stop_process_timers(sig); | 1053 | stop_process_timers(sig); |
@@ -1176,7 +1060,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
1176 | void posix_cpu_timer_schedule(struct k_itimer *timer) | 1060 | void posix_cpu_timer_schedule(struct k_itimer *timer) |
1177 | { | 1061 | { |
1178 | struct task_struct *p = timer->it.cpu.task; | 1062 | struct task_struct *p = timer->it.cpu.task; |
1179 | union cpu_time_count now; | 1063 | unsigned long long now; |
1180 | 1064 | ||
1181 | if (unlikely(p == NULL)) | 1065 | if (unlikely(p == NULL)) |
1182 | /* | 1066 | /* |
@@ -1205,7 +1089,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1205 | */ | 1089 | */ |
1206 | put_task_struct(p); | 1090 | put_task_struct(p); |
1207 | timer->it.cpu.task = p = NULL; | 1091 | timer->it.cpu.task = p = NULL; |
1208 | timer->it.cpu.expires.sched = 0; | 1092 | timer->it.cpu.expires = 0; |
1209 | goto out_unlock; | 1093 | goto out_unlock; |
1210 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { | 1094 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { |
1211 | /* | 1095 | /* |
@@ -1213,6 +1097,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1213 | * not yet reaped. Take this opportunity to | 1097 | * not yet reaped. Take this opportunity to |
1214 | * drop our task ref. | 1098 | * drop our task ref. |
1215 | */ | 1099 | */ |
1100 | cpu_timer_sample_group(timer->it_clock, p, &now); | ||
1216 | clear_dead_task(timer, now); | 1101 | clear_dead_task(timer, now); |
1217 | goto out_unlock; | 1102 | goto out_unlock; |
1218 | } | 1103 | } |
@@ -1387,7 +1272,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1387 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1272 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
1388 | cputime_t *newval, cputime_t *oldval) | 1273 | cputime_t *newval, cputime_t *oldval) |
1389 | { | 1274 | { |
1390 | union cpu_time_count now; | 1275 | unsigned long long now; |
1391 | 1276 | ||
1392 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1277 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1393 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1278 | cpu_timer_sample_group(clock_idx, tsk, &now); |
@@ -1399,17 +1284,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1399 | * it to be absolute. | 1284 | * it to be absolute. |
1400 | */ | 1285 | */ |
1401 | if (*oldval) { | 1286 | if (*oldval) { |
1402 | if (*oldval <= now.cpu) { | 1287 | if (*oldval <= now) { |
1403 | /* Just about to fire. */ | 1288 | /* Just about to fire. */ |
1404 | *oldval = cputime_one_jiffy; | 1289 | *oldval = cputime_one_jiffy; |
1405 | } else { | 1290 | } else { |
1406 | *oldval -= now.cpu; | 1291 | *oldval -= now; |
1407 | } | 1292 | } |
1408 | } | 1293 | } |
1409 | 1294 | ||
1410 | if (!*newval) | 1295 | if (!*newval) |
1411 | goto out; | 1296 | goto out; |
1412 | *newval += now.cpu; | 1297 | *newval += now; |
1413 | } | 1298 | } |
1414 | 1299 | ||
1415 | /* | 1300 | /* |
@@ -1459,7 +1344,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1459 | } | 1344 | } |
1460 | 1345 | ||
1461 | while (!signal_pending(current)) { | 1346 | while (!signal_pending(current)) { |
1462 | if (timer.it.cpu.expires.sched == 0) { | 1347 | if (timer.it.cpu.expires == 0) { |
1463 | /* | 1348 | /* |
1464 | * Our timer fired and was reset, below | 1349 | * Our timer fired and was reset, below |
1465 | * deletion can not fail. | 1350 | * deletion can not fail. |
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index c6422ffeda9a..9012ecf7b814 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c | |||
@@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work) | |||
32 | 32 | ||
33 | mutex_lock(&autosleep_lock); | 33 | mutex_lock(&autosleep_lock); |
34 | 34 | ||
35 | if (!pm_save_wakeup_count(initial_count)) { | 35 | if (!pm_save_wakeup_count(initial_count) || |
36 | system_state != SYSTEM_RUNNING) { | ||
36 | mutex_unlock(&autosleep_lock); | 37 | mutex_unlock(&autosleep_lock); |
37 | goto out; | 38 | goto out; |
38 | } | 39 | } |
diff --git a/kernel/printk.c b/kernel/printk.c index 8212c1aef125..69b0890ed7e5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu) | |||
1369 | } | 1369 | } |
1370 | } | 1370 | } |
1371 | logbuf_cpu = UINT_MAX; | 1371 | logbuf_cpu = UINT_MAX; |
1372 | raw_spin_unlock(&logbuf_lock); | ||
1372 | if (wake) | 1373 | if (wake) |
1373 | up(&console_sem); | 1374 | up(&console_sem); |
1374 | raw_spin_unlock(&logbuf_lock); | ||
1375 | return retval; | 1375 | return retval; |
1376 | } | 1376 | } |
1377 | 1377 | ||
@@ -1921,7 +1921,7 @@ void resume_console(void) | |||
1921 | * called when a new CPU comes online (or fails to come up), and ensures | 1921 | * called when a new CPU comes online (or fails to come up), and ensures |
1922 | * that any such output gets printed. | 1922 | * that any such output gets printed. |
1923 | */ | 1923 | */ |
1924 | static int __cpuinit console_cpu_notify(struct notifier_block *self, | 1924 | static int console_cpu_notify(struct notifier_block *self, |
1925 | unsigned long action, void *hcpu) | 1925 | unsigned long action, void *hcpu) |
1926 | { | 1926 | { |
1927 | switch (action) { | 1927 | switch (action) { |
diff --git a/kernel/profile.c b/kernel/profile.c index 0bf400737660..6631e1ef55ab 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -331,7 +331,7 @@ out: | |||
331 | put_cpu(); | 331 | put_cpu(); |
332 | } | 332 | } |
333 | 333 | ||
334 | static int __cpuinit profile_cpu_callback(struct notifier_block *info, | 334 | static int profile_cpu_callback(struct notifier_block *info, |
335 | unsigned long action, void *__cpu) | 335 | unsigned long action, void *__cpu) |
336 | { | 336 | { |
337 | int node, cpu = (unsigned long)__cpu; | 337 | int node, cpu = (unsigned long)__cpu; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ba5e6cea181a..4041f5747e73 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -469,6 +469,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) | |||
469 | /* Architecture-specific hardware disable .. */ | 469 | /* Architecture-specific hardware disable .. */ |
470 | ptrace_disable(child); | 470 | ptrace_disable(child); |
471 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | 471 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); |
472 | flush_ptrace_hw_breakpoint(child); | ||
472 | 473 | ||
473 | write_lock_irq(&tasklist_lock); | 474 | write_lock_irq(&tasklist_lock); |
474 | /* | 475 | /* |
@@ -1221,19 +1222,3 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
1221 | return ret; | 1222 | return ret; |
1222 | } | 1223 | } |
1223 | #endif /* CONFIG_COMPAT */ | 1224 | #endif /* CONFIG_COMPAT */ |
1224 | |||
1225 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
1226 | int ptrace_get_breakpoints(struct task_struct *tsk) | ||
1227 | { | ||
1228 | if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt)) | ||
1229 | return 0; | ||
1230 | |||
1231 | return -1; | ||
1232 | } | ||
1233 | |||
1234 | void ptrace_put_breakpoints(struct task_struct *tsk) | ||
1235 | { | ||
1236 | if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt)) | ||
1237 | flush_ptrace_hw_breakpoint(tsk); | ||
1238 | } | ||
1239 | #endif /* CONFIG_HAVE_HW_BREAKPOINT */ | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index b1fa5510388d..f4871e52c546 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -1476,7 +1476,7 @@ rcu_torture_shutdown(void *arg) | |||
1476 | * Execute random CPU-hotplug operations at the interval specified | 1476 | * Execute random CPU-hotplug operations at the interval specified |
1477 | * by the onoff_interval. | 1477 | * by the onoff_interval. |
1478 | */ | 1478 | */ |
1479 | static int __cpuinit | 1479 | static int |
1480 | rcu_torture_onoff(void *arg) | 1480 | rcu_torture_onoff(void *arg) |
1481 | { | 1481 | { |
1482 | int cpu; | 1482 | int cpu; |
@@ -1558,7 +1558,7 @@ rcu_torture_onoff(void *arg) | |||
1558 | return 0; | 1558 | return 0; |
1559 | } | 1559 | } |
1560 | 1560 | ||
1561 | static int __cpuinit | 1561 | static int |
1562 | rcu_torture_onoff_init(void) | 1562 | rcu_torture_onoff_init(void) |
1563 | { | 1563 | { |
1564 | int ret; | 1564 | int ret; |
@@ -1601,7 +1601,7 @@ static void rcu_torture_onoff_cleanup(void) | |||
1601 | * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then | 1601 | * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then |
1602 | * induces a CPU stall for the time specified by stall_cpu. | 1602 | * induces a CPU stall for the time specified by stall_cpu. |
1603 | */ | 1603 | */ |
1604 | static int __cpuinit rcu_torture_stall(void *args) | 1604 | static int rcu_torture_stall(void *args) |
1605 | { | 1605 | { |
1606 | unsigned long stop_at; | 1606 | unsigned long stop_at; |
1607 | 1607 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e08abb9461ac..068de3a93606 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -2910,7 +2910,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
2910 | * can accept some slop in the rsp->completed access due to the fact | 2910 | * can accept some slop in the rsp->completed access due to the fact |
2911 | * that this CPU cannot possibly have any RCU callbacks in flight yet. | 2911 | * that this CPU cannot possibly have any RCU callbacks in flight yet. |
2912 | */ | 2912 | */ |
2913 | static void __cpuinit | 2913 | static void |
2914 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | 2914 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) |
2915 | { | 2915 | { |
2916 | unsigned long flags; | 2916 | unsigned long flags; |
@@ -2962,7 +2962,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2962 | mutex_unlock(&rsp->onoff_mutex); | 2962 | mutex_unlock(&rsp->onoff_mutex); |
2963 | } | 2963 | } |
2964 | 2964 | ||
2965 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2965 | static void rcu_prepare_cpu(int cpu) |
2966 | { | 2966 | { |
2967 | struct rcu_state *rsp; | 2967 | struct rcu_state *rsp; |
2968 | 2968 | ||
@@ -2974,7 +2974,7 @@ static void __cpuinit rcu_prepare_cpu(int cpu) | |||
2974 | /* | 2974 | /* |
2975 | * Handle CPU online/offline notification events. | 2975 | * Handle CPU online/offline notification events. |
2976 | */ | 2976 | */ |
2977 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | 2977 | static int rcu_cpu_notify(struct notifier_block *self, |
2978 | unsigned long action, void *hcpu) | 2978 | unsigned long action, void *hcpu) |
2979 | { | 2979 | { |
2980 | long cpu = (long)hcpu; | 2980 | long cpu = (long)hcpu; |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4a39d364493c..b3832581043c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -521,10 +521,10 @@ static void invoke_rcu_callbacks_kthread(void); | |||
521 | static bool rcu_is_callbacks_kthread(void); | 521 | static bool rcu_is_callbacks_kthread(void); |
522 | #ifdef CONFIG_RCU_BOOST | 522 | #ifdef CONFIG_RCU_BOOST |
523 | static void rcu_preempt_do_callbacks(void); | 523 | static void rcu_preempt_do_callbacks(void); |
524 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 524 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
525 | struct rcu_node *rnp); | 525 | struct rcu_node *rnp); |
526 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 526 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
527 | static void __cpuinit rcu_prepare_kthreads(int cpu); | 527 | static void rcu_prepare_kthreads(int cpu); |
528 | static void rcu_cleanup_after_idle(int cpu); | 528 | static void rcu_cleanup_after_idle(int cpu); |
529 | static void rcu_prepare_for_idle(int cpu); | 529 | static void rcu_prepare_for_idle(int cpu); |
530 | static void rcu_idle_count_callbacks_posted(void); | 530 | static void rcu_idle_count_callbacks_posted(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 63098a59216e..769e12e3151b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1352,7 +1352,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
1352 | * already exist. We only create this kthread for preemptible RCU. | 1352 | * already exist. We only create this kthread for preemptible RCU. |
1353 | * Returns zero if all is well, a negated errno otherwise. | 1353 | * Returns zero if all is well, a negated errno otherwise. |
1354 | */ | 1354 | */ |
1355 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1355 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
1356 | struct rcu_node *rnp) | 1356 | struct rcu_node *rnp) |
1357 | { | 1357 | { |
1358 | int rnp_index = rnp - &rsp->node[0]; | 1358 | int rnp_index = rnp - &rsp->node[0]; |
@@ -1507,7 +1507,7 @@ static int __init rcu_spawn_kthreads(void) | |||
1507 | } | 1507 | } |
1508 | early_initcall(rcu_spawn_kthreads); | 1508 | early_initcall(rcu_spawn_kthreads); |
1509 | 1509 | ||
1510 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1510 | static void rcu_prepare_kthreads(int cpu) |
1511 | { | 1511 | { |
1512 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 1512 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
1513 | struct rcu_node *rnp = rdp->mynode; | 1513 | struct rcu_node *rnp = rdp->mynode; |
@@ -1549,7 +1549,7 @@ static int __init rcu_scheduler_really_started(void) | |||
1549 | } | 1549 | } |
1550 | early_initcall(rcu_scheduler_really_started); | 1550 | early_initcall(rcu_scheduler_really_started); |
1551 | 1551 | ||
1552 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1552 | static void rcu_prepare_kthreads(int cpu) |
1553 | { | 1553 | { |
1554 | } | 1554 | } |
1555 | 1555 | ||
diff --git a/kernel/reboot.c b/kernel/reboot.c new file mode 100644 index 000000000000..269ed9384cc4 --- /dev/null +++ b/kernel/reboot.c | |||
@@ -0,0 +1,419 @@ | |||
1 | /* | ||
2 | * linux/kernel/reboot.c | ||
3 | * | ||
4 | * Copyright (C) 2013 Linus Torvalds | ||
5 | */ | ||
6 | |||
7 | #define pr_fmt(fmt) "reboot: " fmt | ||
8 | |||
9 | #include <linux/ctype.h> | ||
10 | #include <linux/export.h> | ||
11 | #include <linux/kexec.h> | ||
12 | #include <linux/kmod.h> | ||
13 | #include <linux/kmsg_dump.h> | ||
14 | #include <linux/reboot.h> | ||
15 | #include <linux/suspend.h> | ||
16 | #include <linux/syscalls.h> | ||
17 | #include <linux/syscore_ops.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | |||
20 | /* | ||
21 | * this indicates whether you can reboot with ctrl-alt-del: the default is yes | ||
22 | */ | ||
23 | |||
24 | int C_A_D = 1; | ||
25 | struct pid *cad_pid; | ||
26 | EXPORT_SYMBOL(cad_pid); | ||
27 | |||
28 | #if defined(CONFIG_ARM) || defined(CONFIG_UNICORE32) | ||
29 | #define DEFAULT_REBOOT_MODE = REBOOT_HARD | ||
30 | #else | ||
31 | #define DEFAULT_REBOOT_MODE | ||
32 | #endif | ||
33 | enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; | ||
34 | |||
35 | int reboot_default; | ||
36 | int reboot_cpu; | ||
37 | enum reboot_type reboot_type = BOOT_ACPI; | ||
38 | int reboot_force; | ||
39 | |||
40 | /* | ||
41 | * If set, this is used for preparing the system to power off. | ||
42 | */ | ||
43 | |||
44 | void (*pm_power_off_prepare)(void); | ||
45 | |||
46 | /** | ||
47 | * emergency_restart - reboot the system | ||
48 | * | ||
49 | * Without shutting down any hardware or taking any locks | ||
50 | * reboot the system. This is called when we know we are in | ||
51 | * trouble so this is our best effort to reboot. This is | ||
52 | * safe to call in interrupt context. | ||
53 | */ | ||
54 | void emergency_restart(void) | ||
55 | { | ||
56 | kmsg_dump(KMSG_DUMP_EMERG); | ||
57 | machine_emergency_restart(); | ||
58 | } | ||
59 | EXPORT_SYMBOL_GPL(emergency_restart); | ||
60 | |||
61 | void kernel_restart_prepare(char *cmd) | ||
62 | { | ||
63 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | ||
64 | system_state = SYSTEM_RESTART; | ||
65 | usermodehelper_disable(); | ||
66 | device_shutdown(); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * register_reboot_notifier - Register function to be called at reboot time | ||
71 | * @nb: Info about notifier function to be called | ||
72 | * | ||
73 | * Registers a function with the list of functions | ||
74 | * to be called at reboot time. | ||
75 | * | ||
76 | * Currently always returns zero, as blocking_notifier_chain_register() | ||
77 | * always returns zero. | ||
78 | */ | ||
79 | int register_reboot_notifier(struct notifier_block *nb) | ||
80 | { | ||
81 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | ||
82 | } | ||
83 | EXPORT_SYMBOL(register_reboot_notifier); | ||
84 | |||
85 | /** | ||
86 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | ||
87 | * @nb: Hook to be unregistered | ||
88 | * | ||
89 | * Unregisters a previously registered reboot | ||
90 | * notifier function. | ||
91 | * | ||
92 | * Returns zero on success, or %-ENOENT on failure. | ||
93 | */ | ||
94 | int unregister_reboot_notifier(struct notifier_block *nb) | ||
95 | { | ||
96 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | ||
97 | } | ||
98 | EXPORT_SYMBOL(unregister_reboot_notifier); | ||
99 | |||
100 | static void migrate_to_reboot_cpu(void) | ||
101 | { | ||
102 | /* The boot cpu is always logical cpu 0 */ | ||
103 | int cpu = reboot_cpu; | ||
104 | |||
105 | cpu_hotplug_disable(); | ||
106 | |||
107 | /* Make certain the cpu I'm about to reboot on is online */ | ||
108 | if (!cpu_online(cpu)) | ||
109 | cpu = cpumask_first(cpu_online_mask); | ||
110 | |||
111 | /* Prevent races with other tasks migrating this task */ | ||
112 | current->flags |= PF_NO_SETAFFINITY; | ||
113 | |||
114 | /* Make certain I only run on the appropriate processor */ | ||
115 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * kernel_restart - reboot the system | ||
120 | * @cmd: pointer to buffer containing command to execute for restart | ||
121 | * or %NULL | ||
122 | * | ||
123 | * Shutdown everything and perform a clean reboot. | ||
124 | * This is not safe to call in interrupt context. | ||
125 | */ | ||
126 | void kernel_restart(char *cmd) | ||
127 | { | ||
128 | kernel_restart_prepare(cmd); | ||
129 | migrate_to_reboot_cpu(); | ||
130 | syscore_shutdown(); | ||
131 | if (!cmd) | ||
132 | pr_emerg("Restarting system\n"); | ||
133 | else | ||
134 | pr_emerg("Restarting system with command '%s'\n", cmd); | ||
135 | kmsg_dump(KMSG_DUMP_RESTART); | ||
136 | machine_restart(cmd); | ||
137 | } | ||
138 | EXPORT_SYMBOL_GPL(kernel_restart); | ||
139 | |||
140 | static void kernel_shutdown_prepare(enum system_states state) | ||
141 | { | ||
142 | blocking_notifier_call_chain(&reboot_notifier_list, | ||
143 | (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); | ||
144 | system_state = state; | ||
145 | usermodehelper_disable(); | ||
146 | device_shutdown(); | ||
147 | } | ||
148 | /** | ||
149 | * kernel_halt - halt the system | ||
150 | * | ||
151 | * Shutdown everything and perform a clean system halt. | ||
152 | */ | ||
153 | void kernel_halt(void) | ||
154 | { | ||
155 | kernel_shutdown_prepare(SYSTEM_HALT); | ||
156 | migrate_to_reboot_cpu(); | ||
157 | syscore_shutdown(); | ||
158 | pr_emerg("System halted\n"); | ||
159 | kmsg_dump(KMSG_DUMP_HALT); | ||
160 | machine_halt(); | ||
161 | } | ||
162 | EXPORT_SYMBOL_GPL(kernel_halt); | ||
163 | |||
164 | /** | ||
165 | * kernel_power_off - power_off the system | ||
166 | * | ||
167 | * Shutdown everything and perform a clean system power_off. | ||
168 | */ | ||
169 | void kernel_power_off(void) | ||
170 | { | ||
171 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | ||
172 | if (pm_power_off_prepare) | ||
173 | pm_power_off_prepare(); | ||
174 | migrate_to_reboot_cpu(); | ||
175 | syscore_shutdown(); | ||
176 | pr_emerg("Power down\n"); | ||
177 | kmsg_dump(KMSG_DUMP_POWEROFF); | ||
178 | machine_power_off(); | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(kernel_power_off); | ||
181 | |||
182 | static DEFINE_MUTEX(reboot_mutex); | ||
183 | |||
184 | /* | ||
185 | * Reboot system call: for obvious reasons only root may call it, | ||
186 | * and even root needs to set up some magic numbers in the registers | ||
187 | * so that some mistake won't make this reboot the whole machine. | ||
188 | * You can also set the meaning of the ctrl-alt-del-key here. | ||
189 | * | ||
190 | * reboot doesn't sync: do that yourself before calling this. | ||
191 | */ | ||
192 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | ||
193 | void __user *, arg) | ||
194 | { | ||
195 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
196 | char buffer[256]; | ||
197 | int ret = 0; | ||
198 | |||
199 | /* We only trust the superuser with rebooting the system. */ | ||
200 | if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) | ||
201 | return -EPERM; | ||
202 | |||
203 | /* For safety, we require "magic" arguments. */ | ||
204 | if (magic1 != LINUX_REBOOT_MAGIC1 || | ||
205 | (magic2 != LINUX_REBOOT_MAGIC2 && | ||
206 | magic2 != LINUX_REBOOT_MAGIC2A && | ||
207 | magic2 != LINUX_REBOOT_MAGIC2B && | ||
208 | magic2 != LINUX_REBOOT_MAGIC2C)) | ||
209 | return -EINVAL; | ||
210 | |||
211 | /* | ||
212 | * If pid namespaces are enabled and the current task is in a child | ||
213 | * pid_namespace, the command is handled by reboot_pid_ns() which will | ||
214 | * call do_exit(). | ||
215 | */ | ||
216 | ret = reboot_pid_ns(pid_ns, cmd); | ||
217 | if (ret) | ||
218 | return ret; | ||
219 | |||
220 | /* Instead of trying to make the power_off code look like | ||
221 | * halt when pm_power_off is not set do it the easy way. | ||
222 | */ | ||
223 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | ||
224 | cmd = LINUX_REBOOT_CMD_HALT; | ||
225 | |||
226 | mutex_lock(&reboot_mutex); | ||
227 | switch (cmd) { | ||
228 | case LINUX_REBOOT_CMD_RESTART: | ||
229 | kernel_restart(NULL); | ||
230 | break; | ||
231 | |||
232 | case LINUX_REBOOT_CMD_CAD_ON: | ||
233 | C_A_D = 1; | ||
234 | break; | ||
235 | |||
236 | case LINUX_REBOOT_CMD_CAD_OFF: | ||
237 | C_A_D = 0; | ||
238 | break; | ||
239 | |||
240 | case LINUX_REBOOT_CMD_HALT: | ||
241 | kernel_halt(); | ||
242 | do_exit(0); | ||
243 | panic("cannot halt"); | ||
244 | |||
245 | case LINUX_REBOOT_CMD_POWER_OFF: | ||
246 | kernel_power_off(); | ||
247 | do_exit(0); | ||
248 | break; | ||
249 | |||
250 | case LINUX_REBOOT_CMD_RESTART2: | ||
251 | ret = strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1); | ||
252 | if (ret < 0) { | ||
253 | ret = -EFAULT; | ||
254 | break; | ||
255 | } | ||
256 | buffer[sizeof(buffer) - 1] = '\0'; | ||
257 | |||
258 | kernel_restart(buffer); | ||
259 | break; | ||
260 | |||
261 | #ifdef CONFIG_KEXEC | ||
262 | case LINUX_REBOOT_CMD_KEXEC: | ||
263 | ret = kernel_kexec(); | ||
264 | break; | ||
265 | #endif | ||
266 | |||
267 | #ifdef CONFIG_HIBERNATION | ||
268 | case LINUX_REBOOT_CMD_SW_SUSPEND: | ||
269 | ret = hibernate(); | ||
270 | break; | ||
271 | #endif | ||
272 | |||
273 | default: | ||
274 | ret = -EINVAL; | ||
275 | break; | ||
276 | } | ||
277 | mutex_unlock(&reboot_mutex); | ||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | static void deferred_cad(struct work_struct *dummy) | ||
282 | { | ||
283 | kernel_restart(NULL); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * This function gets called by ctrl-alt-del - ie the keyboard interrupt. | ||
288 | * As it's called within an interrupt, it may NOT sync: the only choice | ||
289 | * is whether to reboot at once, or just ignore the ctrl-alt-del. | ||
290 | */ | ||
291 | void ctrl_alt_del(void) | ||
292 | { | ||
293 | static DECLARE_WORK(cad_work, deferred_cad); | ||
294 | |||
295 | if (C_A_D) | ||
296 | schedule_work(&cad_work); | ||
297 | else | ||
298 | kill_cad_pid(SIGINT, 1); | ||
299 | } | ||
300 | |||
301 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | ||
302 | |||
303 | static int __orderly_poweroff(bool force) | ||
304 | { | ||
305 | char **argv; | ||
306 | static char *envp[] = { | ||
307 | "HOME=/", | ||
308 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | ||
309 | NULL | ||
310 | }; | ||
311 | int ret; | ||
312 | |||
313 | argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); | ||
314 | if (argv) { | ||
315 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); | ||
316 | argv_free(argv); | ||
317 | } else { | ||
318 | ret = -ENOMEM; | ||
319 | } | ||
320 | |||
321 | if (ret && force) { | ||
322 | pr_warn("Failed to start orderly shutdown: forcing the issue\n"); | ||
323 | /* | ||
324 | * I guess this should try to kick off some daemon to sync and | ||
325 | * poweroff asap. Or not even bother syncing if we're doing an | ||
326 | * emergency shutdown? | ||
327 | */ | ||
328 | emergency_sync(); | ||
329 | kernel_power_off(); | ||
330 | } | ||
331 | |||
332 | return ret; | ||
333 | } | ||
334 | |||
335 | static bool poweroff_force; | ||
336 | |||
337 | static void poweroff_work_func(struct work_struct *work) | ||
338 | { | ||
339 | __orderly_poweroff(poweroff_force); | ||
340 | } | ||
341 | |||
342 | static DECLARE_WORK(poweroff_work, poweroff_work_func); | ||
343 | |||
344 | /** | ||
345 | * orderly_poweroff - Trigger an orderly system poweroff | ||
346 | * @force: force poweroff if command execution fails | ||
347 | * | ||
348 | * This may be called from any context to trigger a system shutdown. | ||
349 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
350 | */ | ||
351 | int orderly_poweroff(bool force) | ||
352 | { | ||
353 | if (force) /* do not override the pending "true" */ | ||
354 | poweroff_force = true; | ||
355 | schedule_work(&poweroff_work); | ||
356 | return 0; | ||
357 | } | ||
358 | EXPORT_SYMBOL_GPL(orderly_poweroff); | ||
359 | |||
360 | static int __init reboot_setup(char *str) | ||
361 | { | ||
362 | for (;;) { | ||
363 | /* | ||
364 | * Having anything passed on the command line via | ||
365 | * reboot= will cause us to disable DMI checking | ||
366 | * below. | ||
367 | */ | ||
368 | reboot_default = 0; | ||
369 | |||
370 | switch (*str) { | ||
371 | case 'w': | ||
372 | reboot_mode = REBOOT_WARM; | ||
373 | break; | ||
374 | |||
375 | case 'c': | ||
376 | reboot_mode = REBOOT_COLD; | ||
377 | break; | ||
378 | |||
379 | case 'h': | ||
380 | reboot_mode = REBOOT_HARD; | ||
381 | break; | ||
382 | |||
383 | case 's': | ||
384 | if (isdigit(*(str+1))) | ||
385 | reboot_cpu = simple_strtoul(str+1, NULL, 0); | ||
386 | else if (str[1] == 'm' && str[2] == 'p' && | ||
387 | isdigit(*(str+3))) | ||
388 | reboot_cpu = simple_strtoul(str+3, NULL, 0); | ||
389 | else | ||
390 | reboot_mode = REBOOT_SOFT; | ||
391 | break; | ||
392 | |||
393 | case 'g': | ||
394 | reboot_mode = REBOOT_GPIO; | ||
395 | break; | ||
396 | |||
397 | case 'b': | ||
398 | case 'a': | ||
399 | case 'k': | ||
400 | case 't': | ||
401 | case 'e': | ||
402 | case 'p': | ||
403 | reboot_type = *str; | ||
404 | break; | ||
405 | |||
406 | case 'f': | ||
407 | reboot_force = 1; | ||
408 | break; | ||
409 | } | ||
410 | |||
411 | str = strchr(str, ','); | ||
412 | if (str) | ||
413 | str++; | ||
414 | else | ||
415 | break; | ||
416 | } | ||
417 | return 1; | ||
418 | } | ||
419 | __setup("reboot=", reboot_setup); | ||
diff --git a/kernel/relay.c b/kernel/relay.c index b91488ba2e5a..5001c9887db1 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -516,7 +516,7 @@ static void setup_callbacks(struct rchan *chan, | |||
516 | * | 516 | * |
517 | * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) | 517 | * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) |
518 | */ | 518 | */ |
519 | static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, | 519 | static int relay_hotcpu_callback(struct notifier_block *nb, |
520 | unsigned long action, | 520 | unsigned long action, |
521 | void *hcpu) | 521 | void *hcpu) |
522 | { | 522 | { |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9b1f2e533b95..b7c32cb7bfeb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -370,13 +370,6 @@ static struct rq *this_rq_lock(void) | |||
370 | #ifdef CONFIG_SCHED_HRTICK | 370 | #ifdef CONFIG_SCHED_HRTICK |
371 | /* | 371 | /* |
372 | * Use HR-timers to deliver accurate preemption points. | 372 | * Use HR-timers to deliver accurate preemption points. |
373 | * | ||
374 | * Its all a bit involved since we cannot program an hrt while holding the | ||
375 | * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a | ||
376 | * reschedule event. | ||
377 | * | ||
378 | * When we get rescheduled we reprogram the hrtick_timer outside of the | ||
379 | * rq->lock. | ||
380 | */ | 373 | */ |
381 | 374 | ||
382 | static void hrtick_clear(struct rq *rq) | 375 | static void hrtick_clear(struct rq *rq) |
@@ -404,6 +397,15 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
404 | } | 397 | } |
405 | 398 | ||
406 | #ifdef CONFIG_SMP | 399 | #ifdef CONFIG_SMP |
400 | |||
401 | static int __hrtick_restart(struct rq *rq) | ||
402 | { | ||
403 | struct hrtimer *timer = &rq->hrtick_timer; | ||
404 | ktime_t time = hrtimer_get_softexpires(timer); | ||
405 | |||
406 | return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0); | ||
407 | } | ||
408 | |||
407 | /* | 409 | /* |
408 | * called from hardirq (IPI) context | 410 | * called from hardirq (IPI) context |
409 | */ | 411 | */ |
@@ -412,7 +414,7 @@ static void __hrtick_start(void *arg) | |||
412 | struct rq *rq = arg; | 414 | struct rq *rq = arg; |
413 | 415 | ||
414 | raw_spin_lock(&rq->lock); | 416 | raw_spin_lock(&rq->lock); |
415 | hrtimer_restart(&rq->hrtick_timer); | 417 | __hrtick_restart(rq); |
416 | rq->hrtick_csd_pending = 0; | 418 | rq->hrtick_csd_pending = 0; |
417 | raw_spin_unlock(&rq->lock); | 419 | raw_spin_unlock(&rq->lock); |
418 | } | 420 | } |
@@ -430,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) | |||
430 | hrtimer_set_expires(timer, time); | 432 | hrtimer_set_expires(timer, time); |
431 | 433 | ||
432 | if (rq == this_rq()) { | 434 | if (rq == this_rq()) { |
433 | hrtimer_restart(timer); | 435 | __hrtick_restart(rq); |
434 | } else if (!rq->hrtick_csd_pending) { | 436 | } else if (!rq->hrtick_csd_pending) { |
435 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); | 437 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); |
436 | rq->hrtick_csd_pending = 1; | 438 | rq->hrtick_csd_pending = 1; |
@@ -4131,7 +4133,7 @@ void show_state_filter(unsigned long state_filter) | |||
4131 | debug_show_all_locks(); | 4133 | debug_show_all_locks(); |
4132 | } | 4134 | } |
4133 | 4135 | ||
4134 | void __cpuinit init_idle_bootup_task(struct task_struct *idle) | 4136 | void init_idle_bootup_task(struct task_struct *idle) |
4135 | { | 4137 | { |
4136 | idle->sched_class = &idle_sched_class; | 4138 | idle->sched_class = &idle_sched_class; |
4137 | } | 4139 | } |
@@ -4144,7 +4146,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle) | |||
4144 | * NOTE: this function does not set the idle thread's NEED_RESCHED | 4146 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
4145 | * flag, to make booting more robust. | 4147 | * flag, to make booting more robust. |
4146 | */ | 4148 | */ |
4147 | void __cpuinit init_idle(struct task_struct *idle, int cpu) | 4149 | void init_idle(struct task_struct *idle, int cpu) |
4148 | { | 4150 | { |
4149 | struct rq *rq = cpu_rq(cpu); | 4151 | struct rq *rq = cpu_rq(cpu); |
4150 | unsigned long flags; | 4152 | unsigned long flags; |
@@ -4628,7 +4630,7 @@ static void set_rq_offline(struct rq *rq) | |||
4628 | * migration_call - callback that gets triggered when a CPU is added. | 4630 | * migration_call - callback that gets triggered when a CPU is added. |
4629 | * Here we can start up the necessary migration thread for the new CPU. | 4631 | * Here we can start up the necessary migration thread for the new CPU. |
4630 | */ | 4632 | */ |
4631 | static int __cpuinit | 4633 | static int |
4632 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | 4634 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) |
4633 | { | 4635 | { |
4634 | int cpu = (long)hcpu; | 4636 | int cpu = (long)hcpu; |
@@ -4682,12 +4684,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
4682 | * happens before everything else. This has to be lower priority than | 4684 | * happens before everything else. This has to be lower priority than |
4683 | * the notifier in the perf_event subsystem, though. | 4685 | * the notifier in the perf_event subsystem, though. |
4684 | */ | 4686 | */ |
4685 | static struct notifier_block __cpuinitdata migration_notifier = { | 4687 | static struct notifier_block migration_notifier = { |
4686 | .notifier_call = migration_call, | 4688 | .notifier_call = migration_call, |
4687 | .priority = CPU_PRI_MIGRATION, | 4689 | .priority = CPU_PRI_MIGRATION, |
4688 | }; | 4690 | }; |
4689 | 4691 | ||
4690 | static int __cpuinit sched_cpu_active(struct notifier_block *nfb, | 4692 | static int sched_cpu_active(struct notifier_block *nfb, |
4691 | unsigned long action, void *hcpu) | 4693 | unsigned long action, void *hcpu) |
4692 | { | 4694 | { |
4693 | switch (action & ~CPU_TASKS_FROZEN) { | 4695 | switch (action & ~CPU_TASKS_FROZEN) { |
@@ -4700,7 +4702,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb, | |||
4700 | } | 4702 | } |
4701 | } | 4703 | } |
4702 | 4704 | ||
4703 | static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb, | 4705 | static int sched_cpu_inactive(struct notifier_block *nfb, |
4704 | unsigned long action, void *hcpu) | 4706 | unsigned long action, void *hcpu) |
4705 | { | 4707 | { |
4706 | switch (action & ~CPU_TASKS_FROZEN) { | 4708 | switch (action & ~CPU_TASKS_FROZEN) { |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f77f9c527449..bb456f44b7b1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -5506,7 +5506,7 @@ void nohz_balance_enter_idle(int cpu) | |||
5506 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 5506 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
5507 | } | 5507 | } |
5508 | 5508 | ||
5509 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | 5509 | static int sched_ilb_notifier(struct notifier_block *nfb, |
5510 | unsigned long action, void *hcpu) | 5510 | unsigned long action, void *hcpu) |
5511 | { | 5511 | { |
5512 | switch (action & ~CPU_TASKS_FROZEN) { | 5512 | switch (action & ~CPU_TASKS_FROZEN) { |
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 17d7065c3872..5aef494fc8b4 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
@@ -162,6 +162,39 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
162 | */ | 162 | */ |
163 | 163 | ||
164 | /** | 164 | /** |
165 | * cputimer_running - return true if cputimer is running | ||
166 | * | ||
167 | * @tsk: Pointer to target task. | ||
168 | */ | ||
169 | static inline bool cputimer_running(struct task_struct *tsk) | ||
170 | |||
171 | { | ||
172 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | ||
173 | |||
174 | if (!cputimer->running) | ||
175 | return false; | ||
176 | |||
177 | /* | ||
178 | * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime | ||
179 | * in __exit_signal(), we won't account to the signal struct further | ||
180 | * cputime consumed by that task, even though the task can still be | ||
181 | * ticking after __exit_signal(). | ||
182 | * | ||
183 | * In order to keep a consistent behaviour between thread group cputime | ||
184 | * and thread group cputimer accounting, lets also ignore the cputime | ||
185 | * elapsing after __exit_signal() in any thread group timer running. | ||
186 | * | ||
187 | * This makes sure that POSIX CPU clocks and timers are synchronized, so | ||
188 | * that a POSIX CPU timer won't expire while the corresponding POSIX CPU | ||
189 | * clock delta is behind the expiring timer value. | ||
190 | */ | ||
191 | if (unlikely(!tsk->sighand)) | ||
192 | return false; | ||
193 | |||
194 | return true; | ||
195 | } | ||
196 | |||
197 | /** | ||
165 | * account_group_user_time - Maintain utime for a thread group. | 198 | * account_group_user_time - Maintain utime for a thread group. |
166 | * | 199 | * |
167 | * @tsk: Pointer to task structure. | 200 | * @tsk: Pointer to task structure. |
@@ -176,7 +209,7 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
176 | { | 209 | { |
177 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 210 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
178 | 211 | ||
179 | if (!cputimer->running) | 212 | if (!cputimer_running(tsk)) |
180 | return; | 213 | return; |
181 | 214 | ||
182 | raw_spin_lock(&cputimer->lock); | 215 | raw_spin_lock(&cputimer->lock); |
@@ -199,7 +232,7 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
199 | { | 232 | { |
200 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 233 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
201 | 234 | ||
202 | if (!cputimer->running) | 235 | if (!cputimer_running(tsk)) |
203 | return; | 236 | return; |
204 | 237 | ||
205 | raw_spin_lock(&cputimer->lock); | 238 | raw_spin_lock(&cputimer->lock); |
@@ -222,7 +255,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
222 | { | 255 | { |
223 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 256 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
224 | 257 | ||
225 | if (!cputimer->running) | 258 | if (!cputimer_running(tsk)) |
226 | return; | 259 | return; |
227 | 260 | ||
228 | raw_spin_lock(&cputimer->lock); | 261 | raw_spin_lock(&cputimer->lock); |
diff --git a/kernel/smp.c b/kernel/smp.c index 4dba0f7b72ad..fe9f773d7114 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -73,7 +73,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
73 | return NOTIFY_OK; | 73 | return NOTIFY_OK; |
74 | } | 74 | } |
75 | 75 | ||
76 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | 76 | static struct notifier_block hotplug_cfd_notifier = { |
77 | .notifier_call = hotplug_cfd, | 77 | .notifier_call = hotplug_cfd, |
78 | }; | 78 | }; |
79 | 79 | ||
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 02fc5c933673..eb89e1807408 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
@@ -24,7 +24,7 @@ | |||
24 | */ | 24 | */ |
25 | static DEFINE_PER_CPU(struct task_struct *, idle_threads); | 25 | static DEFINE_PER_CPU(struct task_struct *, idle_threads); |
26 | 26 | ||
27 | struct task_struct * __cpuinit idle_thread_get(unsigned int cpu) | 27 | struct task_struct *idle_thread_get(unsigned int cpu) |
28 | { | 28 | { |
29 | struct task_struct *tsk = per_cpu(idle_threads, cpu); | 29 | struct task_struct *tsk = per_cpu(idle_threads, cpu); |
30 | 30 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index ca25e6e704a2..be3d3514c325 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -699,7 +699,7 @@ void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) | |||
699 | } | 699 | } |
700 | EXPORT_SYMBOL(send_remote_softirq); | 700 | EXPORT_SYMBOL(send_remote_softirq); |
701 | 701 | ||
702 | static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, | 702 | static int remote_softirq_cpu_notify(struct notifier_block *self, |
703 | unsigned long action, void *hcpu) | 703 | unsigned long action, void *hcpu) |
704 | { | 704 | { |
705 | /* | 705 | /* |
@@ -728,7 +728,7 @@ static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, | |||
728 | return NOTIFY_OK; | 728 | return NOTIFY_OK; |
729 | } | 729 | } |
730 | 730 | ||
731 | static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { | 731 | static struct notifier_block remote_softirq_cpu_notifier = { |
732 | .notifier_call = remote_softirq_cpu_notify, | 732 | .notifier_call = remote_softirq_cpu_notify, |
733 | }; | 733 | }; |
734 | 734 | ||
@@ -830,7 +830,7 @@ static void takeover_tasklets(unsigned int cpu) | |||
830 | } | 830 | } |
831 | #endif /* CONFIG_HOTPLUG_CPU */ | 831 | #endif /* CONFIG_HOTPLUG_CPU */ |
832 | 832 | ||
833 | static int __cpuinit cpu_callback(struct notifier_block *nfb, | 833 | static int cpu_callback(struct notifier_block *nfb, |
834 | unsigned long action, | 834 | unsigned long action, |
835 | void *hcpu) | 835 | void *hcpu) |
836 | { | 836 | { |
@@ -845,7 +845,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
845 | return NOTIFY_OK; | 845 | return NOTIFY_OK; |
846 | } | 846 | } |
847 | 847 | ||
848 | static struct notifier_block __cpuinitdata cpu_nfb = { | 848 | static struct notifier_block cpu_nfb = { |
849 | .notifier_call = cpu_callback | 849 | .notifier_call = cpu_callback |
850 | }; | 850 | }; |
851 | 851 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 071de900c824..771129b299f8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -116,20 +116,6 @@ EXPORT_SYMBOL(fs_overflowuid); | |||
116 | EXPORT_SYMBOL(fs_overflowgid); | 116 | EXPORT_SYMBOL(fs_overflowgid); |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * this indicates whether you can reboot with ctrl-alt-del: the default is yes | ||
120 | */ | ||
121 | |||
122 | int C_A_D = 1; | ||
123 | struct pid *cad_pid; | ||
124 | EXPORT_SYMBOL(cad_pid); | ||
125 | |||
126 | /* | ||
127 | * If set, this is used for preparing the system to power off. | ||
128 | */ | ||
129 | |||
130 | void (*pm_power_off_prepare)(void); | ||
131 | |||
132 | /* | ||
133 | * Returns true if current's euid is same as p's uid or euid, | 119 | * Returns true if current's euid is same as p's uid or euid, |
134 | * or has CAP_SYS_NICE to p's user_ns. | 120 | * or has CAP_SYS_NICE to p's user_ns. |
135 | * | 121 | * |
@@ -308,266 +294,6 @@ out_unlock: | |||
308 | return retval; | 294 | return retval; |
309 | } | 295 | } |
310 | 296 | ||
311 | /** | ||
312 | * emergency_restart - reboot the system | ||
313 | * | ||
314 | * Without shutting down any hardware or taking any locks | ||
315 | * reboot the system. This is called when we know we are in | ||
316 | * trouble so this is our best effort to reboot. This is | ||
317 | * safe to call in interrupt context. | ||
318 | */ | ||
319 | void emergency_restart(void) | ||
320 | { | ||
321 | kmsg_dump(KMSG_DUMP_EMERG); | ||
322 | machine_emergency_restart(); | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(emergency_restart); | ||
325 | |||
326 | void kernel_restart_prepare(char *cmd) | ||
327 | { | ||
328 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | ||
329 | system_state = SYSTEM_RESTART; | ||
330 | usermodehelper_disable(); | ||
331 | device_shutdown(); | ||
332 | } | ||
333 | |||
334 | /** | ||
335 | * register_reboot_notifier - Register function to be called at reboot time | ||
336 | * @nb: Info about notifier function to be called | ||
337 | * | ||
338 | * Registers a function with the list of functions | ||
339 | * to be called at reboot time. | ||
340 | * | ||
341 | * Currently always returns zero, as blocking_notifier_chain_register() | ||
342 | * always returns zero. | ||
343 | */ | ||
344 | int register_reboot_notifier(struct notifier_block *nb) | ||
345 | { | ||
346 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | ||
347 | } | ||
348 | EXPORT_SYMBOL(register_reboot_notifier); | ||
349 | |||
350 | /** | ||
351 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | ||
352 | * @nb: Hook to be unregistered | ||
353 | * | ||
354 | * Unregisters a previously registered reboot | ||
355 | * notifier function. | ||
356 | * | ||
357 | * Returns zero on success, or %-ENOENT on failure. | ||
358 | */ | ||
359 | int unregister_reboot_notifier(struct notifier_block *nb) | ||
360 | { | ||
361 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | ||
362 | } | ||
363 | EXPORT_SYMBOL(unregister_reboot_notifier); | ||
364 | |||
365 | /* Add backwards compatibility for stable trees. */ | ||
366 | #ifndef PF_NO_SETAFFINITY | ||
367 | #define PF_NO_SETAFFINITY PF_THREAD_BOUND | ||
368 | #endif | ||
369 | |||
370 | static void migrate_to_reboot_cpu(void) | ||
371 | { | ||
372 | /* The boot cpu is always logical cpu 0 */ | ||
373 | int cpu = 0; | ||
374 | |||
375 | cpu_hotplug_disable(); | ||
376 | |||
377 | /* Make certain the cpu I'm about to reboot on is online */ | ||
378 | if (!cpu_online(cpu)) | ||
379 | cpu = cpumask_first(cpu_online_mask); | ||
380 | |||
381 | /* Prevent races with other tasks migrating this task */ | ||
382 | current->flags |= PF_NO_SETAFFINITY; | ||
383 | |||
384 | /* Make certain I only run on the appropriate processor */ | ||
385 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * kernel_restart - reboot the system | ||
390 | * @cmd: pointer to buffer containing command to execute for restart | ||
391 | * or %NULL | ||
392 | * | ||
393 | * Shutdown everything and perform a clean reboot. | ||
394 | * This is not safe to call in interrupt context. | ||
395 | */ | ||
396 | void kernel_restart(char *cmd) | ||
397 | { | ||
398 | kernel_restart_prepare(cmd); | ||
399 | migrate_to_reboot_cpu(); | ||
400 | syscore_shutdown(); | ||
401 | if (!cmd) | ||
402 | printk(KERN_EMERG "Restarting system.\n"); | ||
403 | else | ||
404 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); | ||
405 | kmsg_dump(KMSG_DUMP_RESTART); | ||
406 | machine_restart(cmd); | ||
407 | } | ||
408 | EXPORT_SYMBOL_GPL(kernel_restart); | ||
409 | |||
410 | static void kernel_shutdown_prepare(enum system_states state) | ||
411 | { | ||
412 | blocking_notifier_call_chain(&reboot_notifier_list, | ||
413 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | ||
414 | system_state = state; | ||
415 | usermodehelper_disable(); | ||
416 | device_shutdown(); | ||
417 | } | ||
418 | /** | ||
419 | * kernel_halt - halt the system | ||
420 | * | ||
421 | * Shutdown everything and perform a clean system halt. | ||
422 | */ | ||
423 | void kernel_halt(void) | ||
424 | { | ||
425 | kernel_shutdown_prepare(SYSTEM_HALT); | ||
426 | migrate_to_reboot_cpu(); | ||
427 | syscore_shutdown(); | ||
428 | printk(KERN_EMERG "System halted.\n"); | ||
429 | kmsg_dump(KMSG_DUMP_HALT); | ||
430 | machine_halt(); | ||
431 | } | ||
432 | |||
433 | EXPORT_SYMBOL_GPL(kernel_halt); | ||
434 | |||
435 | /** | ||
436 | * kernel_power_off - power_off the system | ||
437 | * | ||
438 | * Shutdown everything and perform a clean system power_off. | ||
439 | */ | ||
440 | void kernel_power_off(void) | ||
441 | { | ||
442 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | ||
443 | if (pm_power_off_prepare) | ||
444 | pm_power_off_prepare(); | ||
445 | migrate_to_reboot_cpu(); | ||
446 | syscore_shutdown(); | ||
447 | printk(KERN_EMERG "Power down.\n"); | ||
448 | kmsg_dump(KMSG_DUMP_POWEROFF); | ||
449 | machine_power_off(); | ||
450 | } | ||
451 | EXPORT_SYMBOL_GPL(kernel_power_off); | ||
452 | |||
453 | static DEFINE_MUTEX(reboot_mutex); | ||
454 | |||
455 | /* | ||
456 | * Reboot system call: for obvious reasons only root may call it, | ||
457 | * and even root needs to set up some magic numbers in the registers | ||
458 | * so that some mistake won't make this reboot the whole machine. | ||
459 | * You can also set the meaning of the ctrl-alt-del-key here. | ||
460 | * | ||
461 | * reboot doesn't sync: do that yourself before calling this. | ||
462 | */ | ||
463 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | ||
464 | void __user *, arg) | ||
465 | { | ||
466 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
467 | char buffer[256]; | ||
468 | int ret = 0; | ||
469 | |||
470 | /* We only trust the superuser with rebooting the system. */ | ||
471 | if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) | ||
472 | return -EPERM; | ||
473 | |||
474 | /* For safety, we require "magic" arguments. */ | ||
475 | if (magic1 != LINUX_REBOOT_MAGIC1 || | ||
476 | (magic2 != LINUX_REBOOT_MAGIC2 && | ||
477 | magic2 != LINUX_REBOOT_MAGIC2A && | ||
478 | magic2 != LINUX_REBOOT_MAGIC2B && | ||
479 | magic2 != LINUX_REBOOT_MAGIC2C)) | ||
480 | return -EINVAL; | ||
481 | |||
482 | /* | ||
483 | * If pid namespaces are enabled and the current task is in a child | ||
484 | * pid_namespace, the command is handled by reboot_pid_ns() which will | ||
485 | * call do_exit(). | ||
486 | */ | ||
487 | ret = reboot_pid_ns(pid_ns, cmd); | ||
488 | if (ret) | ||
489 | return ret; | ||
490 | |||
491 | /* Instead of trying to make the power_off code look like | ||
492 | * halt when pm_power_off is not set do it the easy way. | ||
493 | */ | ||
494 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | ||
495 | cmd = LINUX_REBOOT_CMD_HALT; | ||
496 | |||
497 | mutex_lock(&reboot_mutex); | ||
498 | switch (cmd) { | ||
499 | case LINUX_REBOOT_CMD_RESTART: | ||
500 | kernel_restart(NULL); | ||
501 | break; | ||
502 | |||
503 | case LINUX_REBOOT_CMD_CAD_ON: | ||
504 | C_A_D = 1; | ||
505 | break; | ||
506 | |||
507 | case LINUX_REBOOT_CMD_CAD_OFF: | ||
508 | C_A_D = 0; | ||
509 | break; | ||
510 | |||
511 | case LINUX_REBOOT_CMD_HALT: | ||
512 | kernel_halt(); | ||
513 | do_exit(0); | ||
514 | panic("cannot halt.\n"); | ||
515 | |||
516 | case LINUX_REBOOT_CMD_POWER_OFF: | ||
517 | kernel_power_off(); | ||
518 | do_exit(0); | ||
519 | break; | ||
520 | |||
521 | case LINUX_REBOOT_CMD_RESTART2: | ||
522 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { | ||
523 | ret = -EFAULT; | ||
524 | break; | ||
525 | } | ||
526 | buffer[sizeof(buffer) - 1] = '\0'; | ||
527 | |||
528 | kernel_restart(buffer); | ||
529 | break; | ||
530 | |||
531 | #ifdef CONFIG_KEXEC | ||
532 | case LINUX_REBOOT_CMD_KEXEC: | ||
533 | ret = kernel_kexec(); | ||
534 | break; | ||
535 | #endif | ||
536 | |||
537 | #ifdef CONFIG_HIBERNATION | ||
538 | case LINUX_REBOOT_CMD_SW_SUSPEND: | ||
539 | ret = hibernate(); | ||
540 | break; | ||
541 | #endif | ||
542 | |||
543 | default: | ||
544 | ret = -EINVAL; | ||
545 | break; | ||
546 | } | ||
547 | mutex_unlock(&reboot_mutex); | ||
548 | return ret; | ||
549 | } | ||
550 | |||
551 | static void deferred_cad(struct work_struct *dummy) | ||
552 | { | ||
553 | kernel_restart(NULL); | ||
554 | } | ||
555 | |||
556 | /* | ||
557 | * This function gets called by ctrl-alt-del - ie the keyboard interrupt. | ||
558 | * As it's called within an interrupt, it may NOT sync: the only choice | ||
559 | * is whether to reboot at once, or just ignore the ctrl-alt-del. | ||
560 | */ | ||
561 | void ctrl_alt_del(void) | ||
562 | { | ||
563 | static DECLARE_WORK(cad_work, deferred_cad); | ||
564 | |||
565 | if (C_A_D) | ||
566 | schedule_work(&cad_work); | ||
567 | else | ||
568 | kill_cad_pid(SIGINT, 1); | ||
569 | } | ||
570 | |||
571 | /* | 297 | /* |
572 | * Unprivileged users may change the real gid to the effective gid | 298 | * Unprivileged users may change the real gid to the effective gid |
573 | * or vice versa. (BSD-style) | 299 | * or vice versa. (BSD-style) |
@@ -2292,68 +2018,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, | |||
2292 | return err ? -EFAULT : 0; | 2018 | return err ? -EFAULT : 0; |
2293 | } | 2019 | } |
2294 | 2020 | ||
2295 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | ||
2296 | |||
2297 | static int __orderly_poweroff(bool force) | ||
2298 | { | ||
2299 | char **argv; | ||
2300 | static char *envp[] = { | ||
2301 | "HOME=/", | ||
2302 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | ||
2303 | NULL | ||
2304 | }; | ||
2305 | int ret; | ||
2306 | |||
2307 | argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); | ||
2308 | if (argv) { | ||
2309 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); | ||
2310 | argv_free(argv); | ||
2311 | } else { | ||
2312 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | ||
2313 | __func__, poweroff_cmd); | ||
2314 | ret = -ENOMEM; | ||
2315 | } | ||
2316 | |||
2317 | if (ret && force) { | ||
2318 | printk(KERN_WARNING "Failed to start orderly shutdown: " | ||
2319 | "forcing the issue\n"); | ||
2320 | /* | ||
2321 | * I guess this should try to kick off some daemon to sync and | ||
2322 | * poweroff asap. Or not even bother syncing if we're doing an | ||
2323 | * emergency shutdown? | ||
2324 | */ | ||
2325 | emergency_sync(); | ||
2326 | kernel_power_off(); | ||
2327 | } | ||
2328 | |||
2329 | return ret; | ||
2330 | } | ||
2331 | |||
2332 | static bool poweroff_force; | ||
2333 | |||
2334 | static void poweroff_work_func(struct work_struct *work) | ||
2335 | { | ||
2336 | __orderly_poweroff(poweroff_force); | ||
2337 | } | ||
2338 | |||
2339 | static DECLARE_WORK(poweroff_work, poweroff_work_func); | ||
2340 | |||
2341 | /** | ||
2342 | * orderly_poweroff - Trigger an orderly system poweroff | ||
2343 | * @force: force poweroff if command execution fails | ||
2344 | * | ||
2345 | * This may be called from any context to trigger a system shutdown. | ||
2346 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
2347 | */ | ||
2348 | int orderly_poweroff(bool force) | ||
2349 | { | ||
2350 | if (force) /* do not override the pending "true" */ | ||
2351 | poweroff_force = true; | ||
2352 | schedule_work(&poweroff_work); | ||
2353 | return 0; | ||
2354 | } | ||
2355 | EXPORT_SYMBOL_GPL(orderly_poweroff); | ||
2356 | |||
2357 | /** | 2021 | /** |
2358 | * do_sysinfo - fill in sysinfo struct | 2022 | * do_sysinfo - fill in sysinfo struct |
2359 | * @info: pointer to buffer to fill | 2023 | * @info: pointer to buffer to fill |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ce13c3cedb9..ac09d98490aa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -599,6 +599,13 @@ static struct ctl_table kern_table[] = { | |||
599 | .mode = 0644, | 599 | .mode = 0644, |
600 | .proc_handler = proc_dointvec, | 600 | .proc_handler = proc_dointvec, |
601 | }, | 601 | }, |
602 | { | ||
603 | .procname = "traceoff_on_warning", | ||
604 | .data = &__disable_trace_on_warning, | ||
605 | .maxlen = sizeof(__disable_trace_on_warning), | ||
606 | .mode = 0644, | ||
607 | .proc_handler = proc_dointvec, | ||
608 | }, | ||
602 | #endif | 609 | #endif |
603 | #ifdef CONFIG_MODULES | 610 | #ifdef CONFIG_MODULES |
604 | { | 611 | { |
@@ -800,7 +807,7 @@ static struct ctl_table kern_table[] = { | |||
800 | #if defined(CONFIG_LOCKUP_DETECTOR) | 807 | #if defined(CONFIG_LOCKUP_DETECTOR) |
801 | { | 808 | { |
802 | .procname = "watchdog", | 809 | .procname = "watchdog", |
803 | .data = &watchdog_enabled, | 810 | .data = &watchdog_user_enabled, |
804 | .maxlen = sizeof (int), | 811 | .maxlen = sizeof (int), |
805 | .mode = 0644, | 812 | .mode = 0644, |
806 | .proc_handler = proc_dowatchdog, | 813 | .proc_handler = proc_dowatchdog, |
@@ -827,7 +834,7 @@ static struct ctl_table kern_table[] = { | |||
827 | }, | 834 | }, |
828 | { | 835 | { |
829 | .procname = "nmi_watchdog", | 836 | .procname = "nmi_watchdog", |
830 | .data = &watchdog_enabled, | 837 | .data = &watchdog_user_enabled, |
831 | .maxlen = sizeof (int), | 838 | .maxlen = sizeof (int), |
832 | .mode = 0644, | 839 | .mode = 0644, |
833 | .proc_handler = proc_dowatchdog, | 840 | .proc_handler = proc_dowatchdog, |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index aea4a9ea6fc8..b609213ca9a2 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include "../fs/xfs/xfs_sysctl.h" | 3 | #include "../fs/xfs/xfs_sysctl.h" |
4 | #include <linux/sunrpc/debug.h> | 4 | #include <linux/sunrpc/debug.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <net/ip_vs.h> | ||
7 | #include <linux/syscalls.h> | 6 | #include <linux/syscalls.h> |
8 | #include <linux/namei.h> | 7 | #include <linux/namei.h> |
9 | #include <linux/mount.h> | 8 | #include <linux/mount.h> |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ff7d9d2ab504..9250130646f5 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
@@ -4,6 +4,8 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o | |||
4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o | 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o |
5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o |
6 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o | 6 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o |
7 | obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o | ||
7 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | 8 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o |
8 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o | 9 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o |
9 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o | 10 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o |
11 | obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o | ||
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f11d83b12949..eec50fcef9e4 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
@@ -199,6 +199,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) | |||
199 | 199 | ||
200 | } | 200 | } |
201 | 201 | ||
202 | ktime_t alarm_expires_remaining(const struct alarm *alarm) | ||
203 | { | ||
204 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
205 | return ktime_sub(alarm->node.expires, base->gettime()); | ||
206 | } | ||
207 | EXPORT_SYMBOL_GPL(alarm_expires_remaining); | ||
208 | |||
202 | #ifdef CONFIG_RTC_CLASS | 209 | #ifdef CONFIG_RTC_CLASS |
203 | /** | 210 | /** |
204 | * alarmtimer_suspend - Suspend time callback | 211 | * alarmtimer_suspend - Suspend time callback |
@@ -303,9 +310,10 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | |||
303 | alarm->type = type; | 310 | alarm->type = type; |
304 | alarm->state = ALARMTIMER_STATE_INACTIVE; | 311 | alarm->state = ALARMTIMER_STATE_INACTIVE; |
305 | } | 312 | } |
313 | EXPORT_SYMBOL_GPL(alarm_init); | ||
306 | 314 | ||
307 | /** | 315 | /** |
308 | * alarm_start - Sets an alarm to fire | 316 | * alarm_start - Sets an absolute alarm to fire |
309 | * @alarm: ptr to alarm to set | 317 | * @alarm: ptr to alarm to set |
310 | * @start: time to run the alarm | 318 | * @start: time to run the alarm |
311 | */ | 319 | */ |
@@ -323,6 +331,34 @@ int alarm_start(struct alarm *alarm, ktime_t start) | |||
323 | spin_unlock_irqrestore(&base->lock, flags); | 331 | spin_unlock_irqrestore(&base->lock, flags); |
324 | return ret; | 332 | return ret; |
325 | } | 333 | } |
334 | EXPORT_SYMBOL_GPL(alarm_start); | ||
335 | |||
336 | /** | ||
337 | * alarm_start_relative - Sets a relative alarm to fire | ||
338 | * @alarm: ptr to alarm to set | ||
339 | * @start: time relative to now to run the alarm | ||
340 | */ | ||
341 | int alarm_start_relative(struct alarm *alarm, ktime_t start) | ||
342 | { | ||
343 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
344 | |||
345 | start = ktime_add(start, base->gettime()); | ||
346 | return alarm_start(alarm, start); | ||
347 | } | ||
348 | EXPORT_SYMBOL_GPL(alarm_start_relative); | ||
349 | |||
350 | void alarm_restart(struct alarm *alarm) | ||
351 | { | ||
352 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
353 | unsigned long flags; | ||
354 | |||
355 | spin_lock_irqsave(&base->lock, flags); | ||
356 | hrtimer_set_expires(&alarm->timer, alarm->node.expires); | ||
357 | hrtimer_restart(&alarm->timer); | ||
358 | alarmtimer_enqueue(base, alarm); | ||
359 | spin_unlock_irqrestore(&base->lock, flags); | ||
360 | } | ||
361 | EXPORT_SYMBOL_GPL(alarm_restart); | ||
326 | 362 | ||
327 | /** | 363 | /** |
328 | * alarm_try_to_cancel - Tries to cancel an alarm timer | 364 | * alarm_try_to_cancel - Tries to cancel an alarm timer |
@@ -344,6 +380,7 @@ int alarm_try_to_cancel(struct alarm *alarm) | |||
344 | spin_unlock_irqrestore(&base->lock, flags); | 380 | spin_unlock_irqrestore(&base->lock, flags); |
345 | return ret; | 381 | return ret; |
346 | } | 382 | } |
383 | EXPORT_SYMBOL_GPL(alarm_try_to_cancel); | ||
347 | 384 | ||
348 | 385 | ||
349 | /** | 386 | /** |
@@ -361,6 +398,7 @@ int alarm_cancel(struct alarm *alarm) | |||
361 | cpu_relax(); | 398 | cpu_relax(); |
362 | } | 399 | } |
363 | } | 400 | } |
401 | EXPORT_SYMBOL_GPL(alarm_cancel); | ||
364 | 402 | ||
365 | 403 | ||
366 | u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) | 404 | u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) |
@@ -393,8 +431,15 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) | |||
393 | alarm->node.expires = ktime_add(alarm->node.expires, interval); | 431 | alarm->node.expires = ktime_add(alarm->node.expires, interval); |
394 | return overrun; | 432 | return overrun; |
395 | } | 433 | } |
434 | EXPORT_SYMBOL_GPL(alarm_forward); | ||
396 | 435 | ||
436 | u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) | ||
437 | { | ||
438 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
397 | 439 | ||
440 | return alarm_forward(alarm, base->gettime(), interval); | ||
441 | } | ||
442 | EXPORT_SYMBOL_GPL(alarm_forward_now); | ||
398 | 443 | ||
399 | 444 | ||
400 | /** | 445 | /** |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c6d6400ee137..38959c866789 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -15,20 +15,23 @@ | |||
15 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/notifier.h> | ||
19 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/device.h> | ||
20 | 20 | ||
21 | #include "tick-internal.h" | 21 | #include "tick-internal.h" |
22 | 22 | ||
23 | /* The registered clock event devices */ | 23 | /* The registered clock event devices */ |
24 | static LIST_HEAD(clockevent_devices); | 24 | static LIST_HEAD(clockevent_devices); |
25 | static LIST_HEAD(clockevents_released); | 25 | static LIST_HEAD(clockevents_released); |
26 | |||
27 | /* Notification for clock events */ | ||
28 | static RAW_NOTIFIER_HEAD(clockevents_chain); | ||
29 | |||
30 | /* Protection for the above */ | 26 | /* Protection for the above */ |
31 | static DEFINE_RAW_SPINLOCK(clockevents_lock); | 27 | static DEFINE_RAW_SPINLOCK(clockevents_lock); |
28 | /* Protection for unbind operations */ | ||
29 | static DEFINE_MUTEX(clockevents_mutex); | ||
30 | |||
31 | struct ce_unbind { | ||
32 | struct clock_event_device *ce; | ||
33 | int res; | ||
34 | }; | ||
32 | 35 | ||
33 | /** | 36 | /** |
34 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds | 37 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds |
@@ -232,47 +235,107 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, | |||
232 | return (rc && force) ? clockevents_program_min_delta(dev) : rc; | 235 | return (rc && force) ? clockevents_program_min_delta(dev) : rc; |
233 | } | 236 | } |
234 | 237 | ||
235 | /** | 238 | /* |
236 | * clockevents_register_notifier - register a clock events change listener | 239 | * Called after a notify add to make devices available which were |
240 | * released from the notifier call. | ||
237 | */ | 241 | */ |
238 | int clockevents_register_notifier(struct notifier_block *nb) | 242 | static void clockevents_notify_released(void) |
239 | { | 243 | { |
240 | unsigned long flags; | 244 | struct clock_event_device *dev; |
241 | int ret; | ||
242 | 245 | ||
243 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 246 | while (!list_empty(&clockevents_released)) { |
244 | ret = raw_notifier_chain_register(&clockevents_chain, nb); | 247 | dev = list_entry(clockevents_released.next, |
245 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); | 248 | struct clock_event_device, list); |
249 | list_del(&dev->list); | ||
250 | list_add(&dev->list, &clockevent_devices); | ||
251 | tick_check_new_device(dev); | ||
252 | } | ||
253 | } | ||
246 | 254 | ||
247 | return ret; | 255 | /* |
256 | * Try to install a replacement clock event device | ||
257 | */ | ||
258 | static int clockevents_replace(struct clock_event_device *ced) | ||
259 | { | ||
260 | struct clock_event_device *dev, *newdev = NULL; | ||
261 | |||
262 | list_for_each_entry(dev, &clockevent_devices, list) { | ||
263 | if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) | ||
264 | continue; | ||
265 | |||
266 | if (!tick_check_replacement(newdev, dev)) | ||
267 | continue; | ||
268 | |||
269 | if (!try_module_get(dev->owner)) | ||
270 | continue; | ||
271 | |||
272 | if (newdev) | ||
273 | module_put(newdev->owner); | ||
274 | newdev = dev; | ||
275 | } | ||
276 | if (newdev) { | ||
277 | tick_install_replacement(newdev); | ||
278 | list_del_init(&ced->list); | ||
279 | } | ||
280 | return newdev ? 0 : -EBUSY; | ||
248 | } | 281 | } |
249 | 282 | ||
250 | /* | 283 | /* |
251 | * Notify about a clock event change. Called with clockevents_lock | 284 | * Called with clockevents_mutex and clockevents_lock held |
252 | * held. | ||
253 | */ | 285 | */ |
254 | static void clockevents_do_notify(unsigned long reason, void *dev) | 286 | static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) |
255 | { | 287 | { |
256 | raw_notifier_call_chain(&clockevents_chain, reason, dev); | 288 | /* Fast track. Device is unused */ |
289 | if (ced->mode == CLOCK_EVT_MODE_UNUSED) { | ||
290 | list_del_init(&ced->list); | ||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY; | ||
257 | } | 295 | } |
258 | 296 | ||
259 | /* | 297 | /* |
260 | * Called after a notify add to make devices available which were | 298 | * SMP function call to unbind a device |
261 | * released from the notifier call. | ||
262 | */ | 299 | */ |
263 | static void clockevents_notify_released(void) | 300 | static void __clockevents_unbind(void *arg) |
264 | { | 301 | { |
265 | struct clock_event_device *dev; | 302 | struct ce_unbind *cu = arg; |
303 | int res; | ||
304 | |||
305 | raw_spin_lock(&clockevents_lock); | ||
306 | res = __clockevents_try_unbind(cu->ce, smp_processor_id()); | ||
307 | if (res == -EAGAIN) | ||
308 | res = clockevents_replace(cu->ce); | ||
309 | cu->res = res; | ||
310 | raw_spin_unlock(&clockevents_lock); | ||
311 | } | ||
266 | 312 | ||
267 | while (!list_empty(&clockevents_released)) { | 313 | /* |
268 | dev = list_entry(clockevents_released.next, | 314 | * Issues smp function call to unbind a per cpu device. Called with |
269 | struct clock_event_device, list); | 315 | * clockevents_mutex held. |
270 | list_del(&dev->list); | 316 | */ |
271 | list_add(&dev->list, &clockevent_devices); | 317 | static int clockevents_unbind(struct clock_event_device *ced, int cpu) |
272 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | 318 | { |
273 | } | 319 | struct ce_unbind cu = { .ce = ced, .res = -ENODEV }; |
320 | |||
321 | smp_call_function_single(cpu, __clockevents_unbind, &cu, 1); | ||
322 | return cu.res; | ||
274 | } | 323 | } |
275 | 324 | ||
325 | /* | ||
326 | * Unbind a clockevents device. | ||
327 | */ | ||
328 | int clockevents_unbind_device(struct clock_event_device *ced, int cpu) | ||
329 | { | ||
330 | int ret; | ||
331 | |||
332 | mutex_lock(&clockevents_mutex); | ||
333 | ret = clockevents_unbind(ced, cpu); | ||
334 | mutex_unlock(&clockevents_mutex); | ||
335 | return ret; | ||
336 | } | ||
337 | EXPORT_SYMBOL_GPL(clockevents_unbind); | ||
338 | |||
276 | /** | 339 | /** |
277 | * clockevents_register_device - register a clock event device | 340 | * clockevents_register_device - register a clock event device |
278 | * @dev: device to register | 341 | * @dev: device to register |
@@ -290,7 +353,7 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
290 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 353 | raw_spin_lock_irqsave(&clockevents_lock, flags); |
291 | 354 | ||
292 | list_add(&dev->list, &clockevent_devices); | 355 | list_add(&dev->list, &clockevent_devices); |
293 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | 356 | tick_check_new_device(dev); |
294 | clockevents_notify_released(); | 357 | clockevents_notify_released(); |
295 | 358 | ||
296 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); | 359 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); |
@@ -386,6 +449,7 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
386 | * released list and do a notify add later. | 449 | * released list and do a notify add later. |
387 | */ | 450 | */ |
388 | if (old) { | 451 | if (old) { |
452 | module_put(old->owner); | ||
389 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); | 453 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); |
390 | list_del(&old->list); | 454 | list_del(&old->list); |
391 | list_add(&old->list, &clockevents_released); | 455 | list_add(&old->list, &clockevents_released); |
@@ -433,10 +497,36 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
433 | int cpu; | 497 | int cpu; |
434 | 498 | ||
435 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 499 | raw_spin_lock_irqsave(&clockevents_lock, flags); |
436 | clockevents_do_notify(reason, arg); | ||
437 | 500 | ||
438 | switch (reason) { | 501 | switch (reason) { |
502 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
503 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
504 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | ||
505 | tick_broadcast_on_off(reason, arg); | ||
506 | break; | ||
507 | |||
508 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
509 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
510 | tick_broadcast_oneshot_control(reason); | ||
511 | break; | ||
512 | |||
513 | case CLOCK_EVT_NOTIFY_CPU_DYING: | ||
514 | tick_handover_do_timer(arg); | ||
515 | break; | ||
516 | |||
517 | case CLOCK_EVT_NOTIFY_SUSPEND: | ||
518 | tick_suspend(); | ||
519 | tick_suspend_broadcast(); | ||
520 | break; | ||
521 | |||
522 | case CLOCK_EVT_NOTIFY_RESUME: | ||
523 | tick_resume(); | ||
524 | break; | ||
525 | |||
439 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | 526 | case CLOCK_EVT_NOTIFY_CPU_DEAD: |
527 | tick_shutdown_broadcast_oneshot(arg); | ||
528 | tick_shutdown_broadcast(arg); | ||
529 | tick_shutdown(arg); | ||
440 | /* | 530 | /* |
441 | * Unregister the clock event devices which were | 531 | * Unregister the clock event devices which were |
442 | * released from the users in the notify chain. | 532 | * released from the users in the notify chain. |
@@ -462,4 +552,123 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
462 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); | 552 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); |
463 | } | 553 | } |
464 | EXPORT_SYMBOL_GPL(clockevents_notify); | 554 | EXPORT_SYMBOL_GPL(clockevents_notify); |
555 | |||
556 | #ifdef CONFIG_SYSFS | ||
557 | struct bus_type clockevents_subsys = { | ||
558 | .name = "clockevents", | ||
559 | .dev_name = "clockevent", | ||
560 | }; | ||
561 | |||
562 | static DEFINE_PER_CPU(struct device, tick_percpu_dev); | ||
563 | static struct tick_device *tick_get_tick_dev(struct device *dev); | ||
564 | |||
565 | static ssize_t sysfs_show_current_tick_dev(struct device *dev, | ||
566 | struct device_attribute *attr, | ||
567 | char *buf) | ||
568 | { | ||
569 | struct tick_device *td; | ||
570 | ssize_t count = 0; | ||
571 | |||
572 | raw_spin_lock_irq(&clockevents_lock); | ||
573 | td = tick_get_tick_dev(dev); | ||
574 | if (td && td->evtdev) | ||
575 | count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name); | ||
576 | raw_spin_unlock_irq(&clockevents_lock); | ||
577 | return count; | ||
578 | } | ||
579 | static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL); | ||
580 | |||
581 | /* We don't support the abomination of removable broadcast devices */ | ||
582 | static ssize_t sysfs_unbind_tick_dev(struct device *dev, | ||
583 | struct device_attribute *attr, | ||
584 | const char *buf, size_t count) | ||
585 | { | ||
586 | char name[CS_NAME_LEN]; | ||
587 | size_t ret = sysfs_get_uname(buf, name, count); | ||
588 | struct clock_event_device *ce; | ||
589 | |||
590 | if (ret < 0) | ||
591 | return ret; | ||
592 | |||
593 | ret = -ENODEV; | ||
594 | mutex_lock(&clockevents_mutex); | ||
595 | raw_spin_lock_irq(&clockevents_lock); | ||
596 | list_for_each_entry(ce, &clockevent_devices, list) { | ||
597 | if (!strcmp(ce->name, name)) { | ||
598 | ret = __clockevents_try_unbind(ce, dev->id); | ||
599 | break; | ||
600 | } | ||
601 | } | ||
602 | raw_spin_unlock_irq(&clockevents_lock); | ||
603 | /* | ||
604 | * We hold clockevents_mutex, so ce can't go away | ||
605 | */ | ||
606 | if (ret == -EAGAIN) | ||
607 | ret = clockevents_unbind(ce, dev->id); | ||
608 | mutex_unlock(&clockevents_mutex); | ||
609 | return ret ? ret : count; | ||
610 | } | ||
611 | static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev); | ||
612 | |||
613 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
614 | static struct device tick_bc_dev = { | ||
615 | .init_name = "broadcast", | ||
616 | .id = 0, | ||
617 | .bus = &clockevents_subsys, | ||
618 | }; | ||
619 | |||
620 | static struct tick_device *tick_get_tick_dev(struct device *dev) | ||
621 | { | ||
622 | return dev == &tick_bc_dev ? tick_get_broadcast_device() : | ||
623 | &per_cpu(tick_cpu_device, dev->id); | ||
624 | } | ||
625 | |||
626 | static __init int tick_broadcast_init_sysfs(void) | ||
627 | { | ||
628 | int err = device_register(&tick_bc_dev); | ||
629 | |||
630 | if (!err) | ||
631 | err = device_create_file(&tick_bc_dev, &dev_attr_current_device); | ||
632 | return err; | ||
633 | } | ||
634 | #else | ||
635 | static struct tick_device *tick_get_tick_dev(struct device *dev) | ||
636 | { | ||
637 | return &per_cpu(tick_cpu_device, dev->id); | ||
638 | } | ||
639 | static inline int tick_broadcast_init_sysfs(void) { return 0; } | ||
465 | #endif | 640 | #endif |
641 | |||
642 | static int __init tick_init_sysfs(void) | ||
643 | { | ||
644 | int cpu; | ||
645 | |||
646 | for_each_possible_cpu(cpu) { | ||
647 | struct device *dev = &per_cpu(tick_percpu_dev, cpu); | ||
648 | int err; | ||
649 | |||
650 | dev->id = cpu; | ||
651 | dev->bus = &clockevents_subsys; | ||
652 | err = device_register(dev); | ||
653 | if (!err) | ||
654 | err = device_create_file(dev, &dev_attr_current_device); | ||
655 | if (!err) | ||
656 | err = device_create_file(dev, &dev_attr_unbind_device); | ||
657 | if (err) | ||
658 | return err; | ||
659 | } | ||
660 | return tick_broadcast_init_sysfs(); | ||
661 | } | ||
662 | |||
663 | static int __init clockevents_init_sysfs(void) | ||
664 | { | ||
665 | int err = subsys_system_register(&clockevents_subsys, NULL); | ||
666 | |||
667 | if (!err) | ||
668 | err = tick_init_sysfs(); | ||
669 | return err; | ||
670 | } | ||
671 | device_initcall(clockevents_init_sysfs); | ||
672 | #endif /* SYSFS */ | ||
673 | |||
674 | #endif /* GENERIC_CLOCK_EVENTS */ | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c9583382141a..50a8736757f3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/tick.h> | 31 | #include <linux/tick.h> |
32 | #include <linux/kthread.h> | 32 | #include <linux/kthread.h> |
33 | 33 | ||
34 | #include "tick-internal.h" | ||
35 | |||
34 | void timecounter_init(struct timecounter *tc, | 36 | void timecounter_init(struct timecounter *tc, |
35 | const struct cyclecounter *cc, | 37 | const struct cyclecounter *cc, |
36 | u64 start_tstamp) | 38 | u64 start_tstamp) |
@@ -174,11 +176,12 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) | |||
174 | static struct clocksource *curr_clocksource; | 176 | static struct clocksource *curr_clocksource; |
175 | static LIST_HEAD(clocksource_list); | 177 | static LIST_HEAD(clocksource_list); |
176 | static DEFINE_MUTEX(clocksource_mutex); | 178 | static DEFINE_MUTEX(clocksource_mutex); |
177 | static char override_name[32]; | 179 | static char override_name[CS_NAME_LEN]; |
178 | static int finished_booting; | 180 | static int finished_booting; |
179 | 181 | ||
180 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | 182 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG |
181 | static void clocksource_watchdog_work(struct work_struct *work); | 183 | static void clocksource_watchdog_work(struct work_struct *work); |
184 | static void clocksource_select(void); | ||
182 | 185 | ||
183 | static LIST_HEAD(watchdog_list); | 186 | static LIST_HEAD(watchdog_list); |
184 | static struct clocksource *watchdog; | 187 | static struct clocksource *watchdog; |
@@ -299,13 +302,30 @@ static void clocksource_watchdog(unsigned long data) | |||
299 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | 302 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && |
300 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | 303 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && |
301 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | 304 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { |
305 | /* Mark it valid for high-res. */ | ||
302 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 306 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
307 | |||
308 | /* | ||
309 | * clocksource_done_booting() will sort it if | ||
310 | * finished_booting is not set yet. | ||
311 | */ | ||
312 | if (!finished_booting) | ||
313 | continue; | ||
314 | |||
303 | /* | 315 | /* |
304 | * We just marked the clocksource as highres-capable, | 316 | * If this is not the current clocksource let |
305 | * notify the rest of the system as well so that we | 317 | * the watchdog thread reselect it. Due to the |
306 | * transition into high-res mode: | 318 | * change to high res this clocksource might |
319 | * be preferred now. If it is the current | ||
320 | * clocksource let the tick code know about | ||
321 | * that change. | ||
307 | */ | 322 | */ |
308 | tick_clock_notify(); | 323 | if (cs != curr_clocksource) { |
324 | cs->flags |= CLOCK_SOURCE_RESELECT; | ||
325 | schedule_work(&watchdog_work); | ||
326 | } else { | ||
327 | tick_clock_notify(); | ||
328 | } | ||
309 | } | 329 | } |
310 | } | 330 | } |
311 | 331 | ||
@@ -388,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
388 | 408 | ||
389 | static void clocksource_dequeue_watchdog(struct clocksource *cs) | 409 | static void clocksource_dequeue_watchdog(struct clocksource *cs) |
390 | { | 410 | { |
391 | struct clocksource *tmp; | ||
392 | unsigned long flags; | 411 | unsigned long flags; |
393 | 412 | ||
394 | spin_lock_irqsave(&watchdog_lock, flags); | 413 | spin_lock_irqsave(&watchdog_lock, flags); |
395 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | 414 | if (cs != watchdog) { |
396 | /* cs is a watched clocksource. */ | 415 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { |
397 | list_del_init(&cs->wd_list); | 416 | /* cs is a watched clocksource. */ |
398 | } else if (cs == watchdog) { | 417 | list_del_init(&cs->wd_list); |
399 | /* Reset watchdog cycles */ | 418 | /* Check if the watchdog timer needs to be stopped. */ |
400 | clocksource_reset_watchdog(); | 419 | clocksource_stop_watchdog(); |
401 | /* Current watchdog is removed. Find an alternative. */ | ||
402 | watchdog = NULL; | ||
403 | list_for_each_entry(tmp, &clocksource_list, list) { | ||
404 | if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) | ||
405 | continue; | ||
406 | if (!watchdog || tmp->rating > watchdog->rating) | ||
407 | watchdog = tmp; | ||
408 | } | 420 | } |
409 | } | 421 | } |
410 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
411 | /* Check if the watchdog timer needs to be stopped. */ | ||
412 | clocksource_stop_watchdog(); | ||
413 | spin_unlock_irqrestore(&watchdog_lock, flags); | 422 | spin_unlock_irqrestore(&watchdog_lock, flags); |
414 | } | 423 | } |
415 | 424 | ||
416 | static int clocksource_watchdog_kthread(void *data) | 425 | static int __clocksource_watchdog_kthread(void) |
417 | { | 426 | { |
418 | struct clocksource *cs, *tmp; | 427 | struct clocksource *cs, *tmp; |
419 | unsigned long flags; | 428 | unsigned long flags; |
420 | LIST_HEAD(unstable); | 429 | LIST_HEAD(unstable); |
430 | int select = 0; | ||
421 | 431 | ||
422 | mutex_lock(&clocksource_mutex); | ||
423 | spin_lock_irqsave(&watchdog_lock, flags); | 432 | spin_lock_irqsave(&watchdog_lock, flags); |
424 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) | 433 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { |
425 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { | 434 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { |
426 | list_del_init(&cs->wd_list); | 435 | list_del_init(&cs->wd_list); |
427 | list_add(&cs->wd_list, &unstable); | 436 | list_add(&cs->wd_list, &unstable); |
437 | select = 1; | ||
428 | } | 438 | } |
439 | if (cs->flags & CLOCK_SOURCE_RESELECT) { | ||
440 | cs->flags &= ~CLOCK_SOURCE_RESELECT; | ||
441 | select = 1; | ||
442 | } | ||
443 | } | ||
429 | /* Check if the watchdog timer needs to be stopped. */ | 444 | /* Check if the watchdog timer needs to be stopped. */ |
430 | clocksource_stop_watchdog(); | 445 | clocksource_stop_watchdog(); |
431 | spin_unlock_irqrestore(&watchdog_lock, flags); | 446 | spin_unlock_irqrestore(&watchdog_lock, flags); |
@@ -435,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data) | |||
435 | list_del_init(&cs->wd_list); | 450 | list_del_init(&cs->wd_list); |
436 | __clocksource_change_rating(cs, 0); | 451 | __clocksource_change_rating(cs, 0); |
437 | } | 452 | } |
453 | return select; | ||
454 | } | ||
455 | |||
456 | static int clocksource_watchdog_kthread(void *data) | ||
457 | { | ||
458 | mutex_lock(&clocksource_mutex); | ||
459 | if (__clocksource_watchdog_kthread()) | ||
460 | clocksource_select(); | ||
438 | mutex_unlock(&clocksource_mutex); | 461 | mutex_unlock(&clocksource_mutex); |
439 | return 0; | 462 | return 0; |
440 | } | 463 | } |
441 | 464 | ||
465 | static bool clocksource_is_watchdog(struct clocksource *cs) | ||
466 | { | ||
467 | return cs == watchdog; | ||
468 | } | ||
469 | |||
442 | #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ | 470 | #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ |
443 | 471 | ||
444 | static void clocksource_enqueue_watchdog(struct clocksource *cs) | 472 | static void clocksource_enqueue_watchdog(struct clocksource *cs) |
@@ -449,7 +477,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
449 | 477 | ||
450 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } | 478 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } |
451 | static inline void clocksource_resume_watchdog(void) { } | 479 | static inline void clocksource_resume_watchdog(void) { } |
452 | static inline int clocksource_watchdog_kthread(void *data) { return 0; } | 480 | static inline int __clocksource_watchdog_kthread(void) { return 0; } |
481 | static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } | ||
453 | 482 | ||
454 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ | 483 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ |
455 | 484 | ||
@@ -553,24 +582,42 @@ static u64 clocksource_max_deferment(struct clocksource *cs) | |||
553 | 582 | ||
554 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET | 583 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET |
555 | 584 | ||
556 | /** | 585 | static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) |
557 | * clocksource_select - Select the best clocksource available | ||
558 | * | ||
559 | * Private function. Must hold clocksource_mutex when called. | ||
560 | * | ||
561 | * Select the clocksource with the best rating, or the clocksource, | ||
562 | * which is selected by userspace override. | ||
563 | */ | ||
564 | static void clocksource_select(void) | ||
565 | { | 586 | { |
566 | struct clocksource *best, *cs; | 587 | struct clocksource *cs; |
567 | 588 | ||
568 | if (!finished_booting || list_empty(&clocksource_list)) | 589 | if (!finished_booting || list_empty(&clocksource_list)) |
590 | return NULL; | ||
591 | |||
592 | /* | ||
593 | * We pick the clocksource with the highest rating. If oneshot | ||
594 | * mode is active, we pick the highres valid clocksource with | ||
595 | * the best rating. | ||
596 | */ | ||
597 | list_for_each_entry(cs, &clocksource_list, list) { | ||
598 | if (skipcur && cs == curr_clocksource) | ||
599 | continue; | ||
600 | if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) | ||
601 | continue; | ||
602 | return cs; | ||
603 | } | ||
604 | return NULL; | ||
605 | } | ||
606 | |||
607 | static void __clocksource_select(bool skipcur) | ||
608 | { | ||
609 | bool oneshot = tick_oneshot_mode_active(); | ||
610 | struct clocksource *best, *cs; | ||
611 | |||
612 | /* Find the best suitable clocksource */ | ||
613 | best = clocksource_find_best(oneshot, skipcur); | ||
614 | if (!best) | ||
569 | return; | 615 | return; |
570 | /* First clocksource on the list has the best rating. */ | 616 | |
571 | best = list_first_entry(&clocksource_list, struct clocksource, list); | ||
572 | /* Check for the override clocksource. */ | 617 | /* Check for the override clocksource. */ |
573 | list_for_each_entry(cs, &clocksource_list, list) { | 618 | list_for_each_entry(cs, &clocksource_list, list) { |
619 | if (skipcur && cs == curr_clocksource) | ||
620 | continue; | ||
574 | if (strcmp(cs->name, override_name) != 0) | 621 | if (strcmp(cs->name, override_name) != 0) |
575 | continue; | 622 | continue; |
576 | /* | 623 | /* |
@@ -578,8 +625,7 @@ static void clocksource_select(void) | |||
578 | * capable clocksource if the tick code is in oneshot | 625 | * capable clocksource if the tick code is in oneshot |
579 | * mode (highres or nohz) | 626 | * mode (highres or nohz) |
580 | */ | 627 | */ |
581 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | 628 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { |
582 | tick_oneshot_mode_active()) { | ||
583 | /* Override clocksource cannot be used. */ | 629 | /* Override clocksource cannot be used. */ |
584 | printk(KERN_WARNING "Override clocksource %s is not " | 630 | printk(KERN_WARNING "Override clocksource %s is not " |
585 | "HRT compatible. Cannot switch while in " | 631 | "HRT compatible. Cannot switch while in " |
@@ -590,16 +636,35 @@ static void clocksource_select(void) | |||
590 | best = cs; | 636 | best = cs; |
591 | break; | 637 | break; |
592 | } | 638 | } |
593 | if (curr_clocksource != best) { | 639 | |
594 | printk(KERN_INFO "Switching to clocksource %s\n", best->name); | 640 | if (curr_clocksource != best && !timekeeping_notify(best)) { |
641 | pr_info("Switched to clocksource %s\n", best->name); | ||
595 | curr_clocksource = best; | 642 | curr_clocksource = best; |
596 | timekeeping_notify(curr_clocksource); | ||
597 | } | 643 | } |
598 | } | 644 | } |
599 | 645 | ||
646 | /** | ||
647 | * clocksource_select - Select the best clocksource available | ||
648 | * | ||
649 | * Private function. Must hold clocksource_mutex when called. | ||
650 | * | ||
651 | * Select the clocksource with the best rating, or the clocksource, | ||
652 | * which is selected by userspace override. | ||
653 | */ | ||
654 | static void clocksource_select(void) | ||
655 | { | ||
656 | return __clocksource_select(false); | ||
657 | } | ||
658 | |||
659 | static void clocksource_select_fallback(void) | ||
660 | { | ||
661 | return __clocksource_select(true); | ||
662 | } | ||
663 | |||
600 | #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ | 664 | #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ |
601 | 665 | ||
602 | static inline void clocksource_select(void) { } | 666 | static inline void clocksource_select(void) { } |
667 | static inline void clocksource_select_fallback(void) { } | ||
603 | 668 | ||
604 | #endif | 669 | #endif |
605 | 670 | ||
@@ -614,16 +679,11 @@ static int __init clocksource_done_booting(void) | |||
614 | { | 679 | { |
615 | mutex_lock(&clocksource_mutex); | 680 | mutex_lock(&clocksource_mutex); |
616 | curr_clocksource = clocksource_default_clock(); | 681 | curr_clocksource = clocksource_default_clock(); |
617 | mutex_unlock(&clocksource_mutex); | ||
618 | |||
619 | finished_booting = 1; | 682 | finished_booting = 1; |
620 | |||
621 | /* | 683 | /* |
622 | * Run the watchdog first to eliminate unstable clock sources | 684 | * Run the watchdog first to eliminate unstable clock sources |
623 | */ | 685 | */ |
624 | clocksource_watchdog_kthread(NULL); | 686 | __clocksource_watchdog_kthread(); |
625 | |||
626 | mutex_lock(&clocksource_mutex); | ||
627 | clocksource_select(); | 687 | clocksource_select(); |
628 | mutex_unlock(&clocksource_mutex); | 688 | mutex_unlock(&clocksource_mutex); |
629 | return 0; | 689 | return 0; |
@@ -756,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) | |||
756 | list_del(&cs->list); | 816 | list_del(&cs->list); |
757 | cs->rating = rating; | 817 | cs->rating = rating; |
758 | clocksource_enqueue(cs); | 818 | clocksource_enqueue(cs); |
759 | clocksource_select(); | ||
760 | } | 819 | } |
761 | 820 | ||
762 | /** | 821 | /** |
@@ -768,21 +827,47 @@ void clocksource_change_rating(struct clocksource *cs, int rating) | |||
768 | { | 827 | { |
769 | mutex_lock(&clocksource_mutex); | 828 | mutex_lock(&clocksource_mutex); |
770 | __clocksource_change_rating(cs, rating); | 829 | __clocksource_change_rating(cs, rating); |
830 | clocksource_select(); | ||
771 | mutex_unlock(&clocksource_mutex); | 831 | mutex_unlock(&clocksource_mutex); |
772 | } | 832 | } |
773 | EXPORT_SYMBOL(clocksource_change_rating); | 833 | EXPORT_SYMBOL(clocksource_change_rating); |
774 | 834 | ||
835 | /* | ||
836 | * Unbind clocksource @cs. Called with clocksource_mutex held | ||
837 | */ | ||
838 | static int clocksource_unbind(struct clocksource *cs) | ||
839 | { | ||
840 | /* | ||
841 | * I really can't convince myself to support this on hardware | ||
842 | * designed by lobotomized monkeys. | ||
843 | */ | ||
844 | if (clocksource_is_watchdog(cs)) | ||
845 | return -EBUSY; | ||
846 | |||
847 | if (cs == curr_clocksource) { | ||
848 | /* Select and try to install a replacement clock source */ | ||
849 | clocksource_select_fallback(); | ||
850 | if (curr_clocksource == cs) | ||
851 | return -EBUSY; | ||
852 | } | ||
853 | clocksource_dequeue_watchdog(cs); | ||
854 | list_del_init(&cs->list); | ||
855 | return 0; | ||
856 | } | ||
857 | |||
775 | /** | 858 | /** |
776 | * clocksource_unregister - remove a registered clocksource | 859 | * clocksource_unregister - remove a registered clocksource |
777 | * @cs: clocksource to be unregistered | 860 | * @cs: clocksource to be unregistered |
778 | */ | 861 | */ |
779 | void clocksource_unregister(struct clocksource *cs) | 862 | int clocksource_unregister(struct clocksource *cs) |
780 | { | 863 | { |
864 | int ret = 0; | ||
865 | |||
781 | mutex_lock(&clocksource_mutex); | 866 | mutex_lock(&clocksource_mutex); |
782 | clocksource_dequeue_watchdog(cs); | 867 | if (!list_empty(&cs->list)) |
783 | list_del(&cs->list); | 868 | ret = clocksource_unbind(cs); |
784 | clocksource_select(); | ||
785 | mutex_unlock(&clocksource_mutex); | 869 | mutex_unlock(&clocksource_mutex); |
870 | return ret; | ||
786 | } | 871 | } |
787 | EXPORT_SYMBOL(clocksource_unregister); | 872 | EXPORT_SYMBOL(clocksource_unregister); |
788 | 873 | ||
@@ -808,6 +893,23 @@ sysfs_show_current_clocksources(struct device *dev, | |||
808 | return count; | 893 | return count; |
809 | } | 894 | } |
810 | 895 | ||
896 | size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) | ||
897 | { | ||
898 | size_t ret = cnt; | ||
899 | |||
900 | /* strings from sysfs write are not 0 terminated! */ | ||
901 | if (!cnt || cnt >= CS_NAME_LEN) | ||
902 | return -EINVAL; | ||
903 | |||
904 | /* strip of \n: */ | ||
905 | if (buf[cnt-1] == '\n') | ||
906 | cnt--; | ||
907 | if (cnt > 0) | ||
908 | memcpy(dst, buf, cnt); | ||
909 | dst[cnt] = 0; | ||
910 | return ret; | ||
911 | } | ||
912 | |||
811 | /** | 913 | /** |
812 | * sysfs_override_clocksource - interface for manually overriding clocksource | 914 | * sysfs_override_clocksource - interface for manually overriding clocksource |
813 | * @dev: unused | 915 | * @dev: unused |
@@ -822,22 +924,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev, | |||
822 | struct device_attribute *attr, | 924 | struct device_attribute *attr, |
823 | const char *buf, size_t count) | 925 | const char *buf, size_t count) |
824 | { | 926 | { |
825 | size_t ret = count; | 927 | size_t ret; |
826 | |||
827 | /* strings from sysfs write are not 0 terminated! */ | ||
828 | if (count >= sizeof(override_name)) | ||
829 | return -EINVAL; | ||
830 | |||
831 | /* strip of \n: */ | ||
832 | if (buf[count-1] == '\n') | ||
833 | count--; | ||
834 | 928 | ||
835 | mutex_lock(&clocksource_mutex); | 929 | mutex_lock(&clocksource_mutex); |
836 | 930 | ||
837 | if (count > 0) | 931 | ret = sysfs_get_uname(buf, override_name, count); |
838 | memcpy(override_name, buf, count); | 932 | if (ret >= 0) |
839 | override_name[count] = 0; | 933 | clocksource_select(); |
840 | clocksource_select(); | ||
841 | 934 | ||
842 | mutex_unlock(&clocksource_mutex); | 935 | mutex_unlock(&clocksource_mutex); |
843 | 936 | ||
@@ -845,6 +938,40 @@ static ssize_t sysfs_override_clocksource(struct device *dev, | |||
845 | } | 938 | } |
846 | 939 | ||
847 | /** | 940 | /** |
941 | * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource | ||
942 | * @dev: unused | ||
943 | * @attr: unused | ||
944 | * @buf: unused | ||
945 | * @count: length of buffer | ||
946 | * | ||
947 | * Takes input from sysfs interface for manually unbinding a clocksource. | ||
948 | */ | ||
949 | static ssize_t sysfs_unbind_clocksource(struct device *dev, | ||
950 | struct device_attribute *attr, | ||
951 | const char *buf, size_t count) | ||
952 | { | ||
953 | struct clocksource *cs; | ||
954 | char name[CS_NAME_LEN]; | ||
955 | size_t ret; | ||
956 | |||
957 | ret = sysfs_get_uname(buf, name, count); | ||
958 | if (ret < 0) | ||
959 | return ret; | ||
960 | |||
961 | ret = -ENODEV; | ||
962 | mutex_lock(&clocksource_mutex); | ||
963 | list_for_each_entry(cs, &clocksource_list, list) { | ||
964 | if (strcmp(cs->name, name)) | ||
965 | continue; | ||
966 | ret = clocksource_unbind(cs); | ||
967 | break; | ||
968 | } | ||
969 | mutex_unlock(&clocksource_mutex); | ||
970 | |||
971 | return ret ? ret : count; | ||
972 | } | ||
973 | |||
974 | /** | ||
848 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource | 975 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource |
849 | * @dev: unused | 976 | * @dev: unused |
850 | * @attr: unused | 977 | * @attr: unused |
@@ -886,6 +1013,8 @@ sysfs_show_available_clocksources(struct device *dev, | |||
886 | static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, | 1013 | static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, |
887 | sysfs_override_clocksource); | 1014 | sysfs_override_clocksource); |
888 | 1015 | ||
1016 | static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); | ||
1017 | |||
889 | static DEVICE_ATTR(available_clocksource, 0444, | 1018 | static DEVICE_ATTR(available_clocksource, 0444, |
890 | sysfs_show_available_clocksources, NULL); | 1019 | sysfs_show_available_clocksources, NULL); |
891 | 1020 | ||
@@ -910,6 +1039,9 @@ static int __init init_clocksource_sysfs(void) | |||
910 | &device_clocksource, | 1039 | &device_clocksource, |
911 | &dev_attr_current_clocksource); | 1040 | &dev_attr_current_clocksource); |
912 | if (!error) | 1041 | if (!error) |
1042 | error = device_create_file(&device_clocksource, | ||
1043 | &dev_attr_unbind_clocksource); | ||
1044 | if (!error) | ||
913 | error = device_create_file( | 1045 | error = device_create_file( |
914 | &device_clocksource, | 1046 | &device_clocksource, |
915 | &dev_attr_available_clocksource); | 1047 | &dev_attr_available_clocksource); |
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c new file mode 100644 index 000000000000..a326f27d7f09 --- /dev/null +++ b/kernel/time/sched_clock.c | |||
@@ -0,0 +1,212 @@ | |||
1 | /* | ||
2 | * sched_clock.c: support for extending counters to full 64-bit ns counter | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | #include <linux/clocksource.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/jiffies.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/syscore_ops.h> | ||
15 | #include <linux/timer.h> | ||
16 | #include <linux/sched_clock.h> | ||
17 | |||
18 | struct clock_data { | ||
19 | u64 epoch_ns; | ||
20 | u32 epoch_cyc; | ||
21 | u32 epoch_cyc_copy; | ||
22 | unsigned long rate; | ||
23 | u32 mult; | ||
24 | u32 shift; | ||
25 | bool suspended; | ||
26 | }; | ||
27 | |||
28 | static void sched_clock_poll(unsigned long wrap_ticks); | ||
29 | static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); | ||
30 | static int irqtime = -1; | ||
31 | |||
32 | core_param(irqtime, irqtime, int, 0400); | ||
33 | |||
34 | static struct clock_data cd = { | ||
35 | .mult = NSEC_PER_SEC / HZ, | ||
36 | }; | ||
37 | |||
38 | static u32 __read_mostly sched_clock_mask = 0xffffffff; | ||
39 | |||
40 | static u32 notrace jiffy_sched_clock_read(void) | ||
41 | { | ||
42 | return (u32)(jiffies - INITIAL_JIFFIES); | ||
43 | } | ||
44 | |||
45 | static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; | ||
46 | |||
47 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) | ||
48 | { | ||
49 | return (cyc * mult) >> shift; | ||
50 | } | ||
51 | |||
52 | static unsigned long long notrace sched_clock_32(void) | ||
53 | { | ||
54 | u64 epoch_ns; | ||
55 | u32 epoch_cyc; | ||
56 | u32 cyc; | ||
57 | |||
58 | if (cd.suspended) | ||
59 | return cd.epoch_ns; | ||
60 | |||
61 | /* | ||
62 | * Load the epoch_cyc and epoch_ns atomically. We do this by | ||
63 | * ensuring that we always write epoch_cyc, epoch_ns and | ||
64 | * epoch_cyc_copy in strict order, and read them in strict order. | ||
65 | * If epoch_cyc and epoch_cyc_copy are not equal, then we're in | ||
66 | * the middle of an update, and we should repeat the load. | ||
67 | */ | ||
68 | do { | ||
69 | epoch_cyc = cd.epoch_cyc; | ||
70 | smp_rmb(); | ||
71 | epoch_ns = cd.epoch_ns; | ||
72 | smp_rmb(); | ||
73 | } while (epoch_cyc != cd.epoch_cyc_copy); | ||
74 | |||
75 | cyc = read_sched_clock(); | ||
76 | cyc = (cyc - epoch_cyc) & sched_clock_mask; | ||
77 | return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Atomically update the sched_clock epoch. | ||
82 | */ | ||
83 | static void notrace update_sched_clock(void) | ||
84 | { | ||
85 | unsigned long flags; | ||
86 | u32 cyc; | ||
87 | u64 ns; | ||
88 | |||
89 | cyc = read_sched_clock(); | ||
90 | ns = cd.epoch_ns + | ||
91 | cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, | ||
92 | cd.mult, cd.shift); | ||
93 | /* | ||
94 | * Write epoch_cyc and epoch_ns in a way that the update is | ||
95 | * detectable in cyc_to_fixed_sched_clock(). | ||
96 | */ | ||
97 | raw_local_irq_save(flags); | ||
98 | cd.epoch_cyc_copy = cyc; | ||
99 | smp_wmb(); | ||
100 | cd.epoch_ns = ns; | ||
101 | smp_wmb(); | ||
102 | cd.epoch_cyc = cyc; | ||
103 | raw_local_irq_restore(flags); | ||
104 | } | ||
105 | |||
106 | static void sched_clock_poll(unsigned long wrap_ticks) | ||
107 | { | ||
108 | mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); | ||
109 | update_sched_clock(); | ||
110 | } | ||
111 | |||
112 | void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) | ||
113 | { | ||
114 | unsigned long r, w; | ||
115 | u64 res, wrap; | ||
116 | char r_unit; | ||
117 | |||
118 | if (cd.rate > rate) | ||
119 | return; | ||
120 | |||
121 | BUG_ON(bits > 32); | ||
122 | WARN_ON(!irqs_disabled()); | ||
123 | read_sched_clock = read; | ||
124 | sched_clock_mask = (1 << bits) - 1; | ||
125 | cd.rate = rate; | ||
126 | |||
127 | /* calculate the mult/shift to convert counter ticks to ns. */ | ||
128 | clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); | ||
129 | |||
130 | r = rate; | ||
131 | if (r >= 4000000) { | ||
132 | r /= 1000000; | ||
133 | r_unit = 'M'; | ||
134 | } else if (r >= 1000) { | ||
135 | r /= 1000; | ||
136 | r_unit = 'k'; | ||
137 | } else | ||
138 | r_unit = ' '; | ||
139 | |||
140 | /* calculate how many ns until we wrap */ | ||
141 | wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); | ||
142 | do_div(wrap, NSEC_PER_MSEC); | ||
143 | w = wrap; | ||
144 | |||
145 | /* calculate the ns resolution of this counter */ | ||
146 | res = cyc_to_ns(1ULL, cd.mult, cd.shift); | ||
147 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", | ||
148 | bits, r, r_unit, res, w); | ||
149 | |||
150 | /* | ||
151 | * Start the timer to keep sched_clock() properly updated and | ||
152 | * sets the initial epoch. | ||
153 | */ | ||
154 | sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); | ||
155 | update_sched_clock(); | ||
156 | |||
157 | /* | ||
158 | * Ensure that sched_clock() starts off at 0ns | ||
159 | */ | ||
160 | cd.epoch_ns = 0; | ||
161 | |||
162 | /* Enable IRQ time accounting if we have a fast enough sched_clock */ | ||
163 | if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) | ||
164 | enable_sched_clock_irqtime(); | ||
165 | |||
166 | pr_debug("Registered %pF as sched_clock source\n", read); | ||
167 | } | ||
168 | |||
169 | unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; | ||
170 | |||
171 | unsigned long long notrace sched_clock(void) | ||
172 | { | ||
173 | return sched_clock_func(); | ||
174 | } | ||
175 | |||
176 | void __init sched_clock_postinit(void) | ||
177 | { | ||
178 | /* | ||
179 | * If no sched_clock function has been provided at that point, | ||
180 | * make it the final one one. | ||
181 | */ | ||
182 | if (read_sched_clock == jiffy_sched_clock_read) | ||
183 | setup_sched_clock(jiffy_sched_clock_read, 32, HZ); | ||
184 | |||
185 | sched_clock_poll(sched_clock_timer.data); | ||
186 | } | ||
187 | |||
188 | static int sched_clock_suspend(void) | ||
189 | { | ||
190 | sched_clock_poll(sched_clock_timer.data); | ||
191 | cd.suspended = true; | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | static void sched_clock_resume(void) | ||
196 | { | ||
197 | cd.epoch_cyc = read_sched_clock(); | ||
198 | cd.epoch_cyc_copy = cd.epoch_cyc; | ||
199 | cd.suspended = false; | ||
200 | } | ||
201 | |||
202 | static struct syscore_ops sched_clock_ops = { | ||
203 | .suspend = sched_clock_suspend, | ||
204 | .resume = sched_clock_resume, | ||
205 | }; | ||
206 | |||
207 | static int __init sched_clock_syscore_init(void) | ||
208 | { | ||
209 | register_syscore_ops(&sched_clock_ops); | ||
210 | return 0; | ||
211 | } | ||
212 | device_initcall(sched_clock_syscore_init); | ||
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 20d6fba70652..218bcb565fed 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/profile.h> | 19 | #include <linux/profile.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/module.h> | ||
22 | 23 | ||
23 | #include "tick-internal.h" | 24 | #include "tick-internal.h" |
24 | 25 | ||
@@ -29,6 +30,7 @@ | |||
29 | 30 | ||
30 | static struct tick_device tick_broadcast_device; | 31 | static struct tick_device tick_broadcast_device; |
31 | static cpumask_var_t tick_broadcast_mask; | 32 | static cpumask_var_t tick_broadcast_mask; |
33 | static cpumask_var_t tick_broadcast_on; | ||
32 | static cpumask_var_t tmpmask; | 34 | static cpumask_var_t tmpmask; |
33 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); | 35 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); |
34 | static int tick_broadcast_force; | 36 | static int tick_broadcast_force; |
@@ -64,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) | |||
64 | /* | 66 | /* |
65 | * Check, if the device can be utilized as broadcast device: | 67 | * Check, if the device can be utilized as broadcast device: |
66 | */ | 68 | */ |
67 | int tick_check_broadcast_device(struct clock_event_device *dev) | 69 | static bool tick_check_broadcast_device(struct clock_event_device *curdev, |
70 | struct clock_event_device *newdev) | ||
71 | { | ||
72 | if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || | ||
73 | (newdev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
74 | return false; | ||
75 | |||
76 | if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && | ||
77 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
78 | return false; | ||
79 | |||
80 | return !curdev || newdev->rating > curdev->rating; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Conditionally install/replace broadcast device | ||
85 | */ | ||
86 | void tick_install_broadcast_device(struct clock_event_device *dev) | ||
68 | { | 87 | { |
69 | struct clock_event_device *cur = tick_broadcast_device.evtdev; | 88 | struct clock_event_device *cur = tick_broadcast_device.evtdev; |
70 | 89 | ||
71 | if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || | 90 | if (!tick_check_broadcast_device(cur, dev)) |
72 | (tick_broadcast_device.evtdev && | 91 | return; |
73 | tick_broadcast_device.evtdev->rating >= dev->rating) || | 92 | |
74 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | 93 | if (!try_module_get(dev->owner)) |
75 | return 0; | 94 | return; |
76 | 95 | ||
77 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); | 96 | clockevents_exchange_device(cur, dev); |
78 | if (cur) | 97 | if (cur) |
79 | cur->event_handler = clockevents_handle_noop; | 98 | cur->event_handler = clockevents_handle_noop; |
80 | tick_broadcast_device.evtdev = dev; | 99 | tick_broadcast_device.evtdev = dev; |
@@ -90,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev) | |||
90 | */ | 109 | */ |
91 | if (dev->features & CLOCK_EVT_FEAT_ONESHOT) | 110 | if (dev->features & CLOCK_EVT_FEAT_ONESHOT) |
92 | tick_clock_notify(); | 111 | tick_clock_notify(); |
93 | return 1; | ||
94 | } | 112 | } |
95 | 113 | ||
96 | /* | 114 | /* |
@@ -123,8 +141,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev) | |||
123 | */ | 141 | */ |
124 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | 142 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) |
125 | { | 143 | { |
144 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | ||
126 | unsigned long flags; | 145 | unsigned long flags; |
127 | int ret = 0; | 146 | int ret; |
128 | 147 | ||
129 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 148 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
130 | 149 | ||
@@ -138,20 +157,62 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | |||
138 | dev->event_handler = tick_handle_periodic; | 157 | dev->event_handler = tick_handle_periodic; |
139 | tick_device_setup_broadcast_func(dev); | 158 | tick_device_setup_broadcast_func(dev); |
140 | cpumask_set_cpu(cpu, tick_broadcast_mask); | 159 | cpumask_set_cpu(cpu, tick_broadcast_mask); |
141 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | 160 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
161 | tick_broadcast_start_periodic(bc); | ||
162 | else | ||
163 | tick_broadcast_setup_oneshot(bc); | ||
142 | ret = 1; | 164 | ret = 1; |
143 | } else { | 165 | } else { |
144 | /* | 166 | /* |
145 | * When the new device is not affected by the stop | 167 | * Clear the broadcast bit for this cpu if the |
146 | * feature and the cpu is marked in the broadcast mask | 168 | * device is not power state affected. |
147 | * then clear the broadcast bit. | ||
148 | */ | 169 | */ |
149 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { | 170 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
150 | int cpu = smp_processor_id(); | ||
151 | cpumask_clear_cpu(cpu, tick_broadcast_mask); | 171 | cpumask_clear_cpu(cpu, tick_broadcast_mask); |
152 | tick_broadcast_clear_oneshot(cpu); | 172 | else |
153 | } else { | ||
154 | tick_device_setup_broadcast_func(dev); | 173 | tick_device_setup_broadcast_func(dev); |
174 | |||
175 | /* | ||
176 | * Clear the broadcast bit if the CPU is not in | ||
177 | * periodic broadcast on state. | ||
178 | */ | ||
179 | if (!cpumask_test_cpu(cpu, tick_broadcast_on)) | ||
180 | cpumask_clear_cpu(cpu, tick_broadcast_mask); | ||
181 | |||
182 | switch (tick_broadcast_device.mode) { | ||
183 | case TICKDEV_MODE_ONESHOT: | ||
184 | /* | ||
185 | * If the system is in oneshot mode we can | ||
186 | * unconditionally clear the oneshot mask bit, | ||
187 | * because the CPU is running and therefore | ||
188 | * not in an idle state which causes the power | ||
189 | * state affected device to stop. Let the | ||
190 | * caller initialize the device. | ||
191 | */ | ||
192 | tick_broadcast_clear_oneshot(cpu); | ||
193 | ret = 0; | ||
194 | break; | ||
195 | |||
196 | case TICKDEV_MODE_PERIODIC: | ||
197 | /* | ||
198 | * If the system is in periodic mode, check | ||
199 | * whether the broadcast device can be | ||
200 | * switched off now. | ||
201 | */ | ||
202 | if (cpumask_empty(tick_broadcast_mask) && bc) | ||
203 | clockevents_shutdown(bc); | ||
204 | /* | ||
205 | * If we kept the cpu in the broadcast mask, | ||
206 | * tell the caller to leave the per cpu device | ||
207 | * in shutdown state. The periodic interrupt | ||
208 | * is delivered by the broadcast device. | ||
209 | */ | ||
210 | ret = cpumask_test_cpu(cpu, tick_broadcast_mask); | ||
211 | break; | ||
212 | default: | ||
213 | /* Nothing to do */ | ||
214 | ret = 0; | ||
215 | break; | ||
155 | } | 216 | } |
156 | } | 217 | } |
157 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 218 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
@@ -281,6 +342,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
281 | switch (*reason) { | 342 | switch (*reason) { |
282 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | 343 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: |
283 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | 344 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: |
345 | cpumask_set_cpu(cpu, tick_broadcast_on); | ||
284 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { | 346 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { |
285 | if (tick_broadcast_device.mode == | 347 | if (tick_broadcast_device.mode == |
286 | TICKDEV_MODE_PERIODIC) | 348 | TICKDEV_MODE_PERIODIC) |
@@ -290,8 +352,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
290 | tick_broadcast_force = 1; | 352 | tick_broadcast_force = 1; |
291 | break; | 353 | break; |
292 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | 354 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: |
293 | if (!tick_broadcast_force && | 355 | if (tick_broadcast_force) |
294 | cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { | 356 | break; |
357 | cpumask_clear_cpu(cpu, tick_broadcast_on); | ||
358 | if (!tick_device_is_functional(dev)) | ||
359 | break; | ||
360 | if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { | ||
295 | if (tick_broadcast_device.mode == | 361 | if (tick_broadcast_device.mode == |
296 | TICKDEV_MODE_PERIODIC) | 362 | TICKDEV_MODE_PERIODIC) |
297 | tick_setup_periodic(dev, 0); | 363 | tick_setup_periodic(dev, 0); |
@@ -349,6 +415,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) | |||
349 | 415 | ||
350 | bc = tick_broadcast_device.evtdev; | 416 | bc = tick_broadcast_device.evtdev; |
351 | cpumask_clear_cpu(cpu, tick_broadcast_mask); | 417 | cpumask_clear_cpu(cpu, tick_broadcast_mask); |
418 | cpumask_clear_cpu(cpu, tick_broadcast_on); | ||
352 | 419 | ||
353 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | 420 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { |
354 | if (bc && cpumask_empty(tick_broadcast_mask)) | 421 | if (bc && cpumask_empty(tick_broadcast_mask)) |
@@ -475,7 +542,15 @@ void tick_check_oneshot_broadcast(int cpu) | |||
475 | if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { | 542 | if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { |
476 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | 543 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); |
477 | 544 | ||
478 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | 545 | /* |
546 | * We might be in the middle of switching over from | ||
547 | * periodic to oneshot. If the CPU has not yet | ||
548 | * switched over, leave the device alone. | ||
549 | */ | ||
550 | if (td->mode == TICKDEV_MODE_ONESHOT) { | ||
551 | clockevents_set_mode(td->evtdev, | ||
552 | CLOCK_EVT_MODE_ONESHOT); | ||
553 | } | ||
479 | } | 554 | } |
480 | } | 555 | } |
481 | 556 | ||
@@ -522,6 +597,13 @@ again: | |||
522 | cpumask_clear(tick_broadcast_force_mask); | 597 | cpumask_clear(tick_broadcast_force_mask); |
523 | 598 | ||
524 | /* | 599 | /* |
600 | * Sanity check. Catch the case where we try to broadcast to | ||
601 | * offline cpus. | ||
602 | */ | ||
603 | if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) | ||
604 | cpumask_and(tmpmask, tmpmask, cpu_online_mask); | ||
605 | |||
606 | /* | ||
525 | * Wakeup the cpus which have an expired event. | 607 | * Wakeup the cpus which have an expired event. |
526 | */ | 608 | */ |
527 | tick_do_broadcast(tmpmask); | 609 | tick_do_broadcast(tmpmask); |
@@ -761,10 +843,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | |||
761 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 843 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
762 | 844 | ||
763 | /* | 845 | /* |
764 | * Clear the broadcast mask flag for the dead cpu, but do not | 846 | * Clear the broadcast masks for the dead cpu, but do not stop |
765 | * stop the broadcast device! | 847 | * the broadcast device! |
766 | */ | 848 | */ |
767 | cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); | 849 | cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); |
850 | cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); | ||
851 | cpumask_clear_cpu(cpu, tick_broadcast_force_mask); | ||
768 | 852 | ||
769 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 853 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
770 | } | 854 | } |
@@ -792,6 +876,7 @@ bool tick_broadcast_oneshot_available(void) | |||
792 | void __init tick_broadcast_init(void) | 876 | void __init tick_broadcast_init(void) |
793 | { | 877 | { |
794 | zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); | 878 | zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); |
879 | zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); | ||
795 | zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); | 880 | zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); |
796 | #ifdef CONFIG_TICK_ONESHOT | 881 | #ifdef CONFIG_TICK_ONESHOT |
797 | zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); | 882 | zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..64522ecdfe0e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
19 | #include <linux/profile.h> | 19 | #include <linux/profile.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/module.h> | ||
21 | 22 | ||
22 | #include <asm/irq_regs.h> | 23 | #include <asm/irq_regs.h> |
23 | 24 | ||
@@ -33,7 +34,6 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | |||
33 | ktime_t tick_next_period; | 34 | ktime_t tick_next_period; |
34 | ktime_t tick_period; | 35 | ktime_t tick_period; |
35 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; | 36 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; |
36 | static DEFINE_RAW_SPINLOCK(tick_device_lock); | ||
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Debugging: see timer_list.c | 39 | * Debugging: see timer_list.c |
@@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td, | |||
194 | * When global broadcasting is active, check if the current | 194 | * When global broadcasting is active, check if the current |
195 | * device is registered as a placeholder for broadcast mode. | 195 | * device is registered as a placeholder for broadcast mode. |
196 | * This allows us to handle this x86 misfeature in a generic | 196 | * This allows us to handle this x86 misfeature in a generic |
197 | * way. | 197 | * way. This function also returns !=0 when we keep the |
198 | * current active broadcast state for this CPU. | ||
198 | */ | 199 | */ |
199 | if (tick_device_uses_broadcast(newdev, cpu)) | 200 | if (tick_device_uses_broadcast(newdev, cpu)) |
200 | return; | 201 | return; |
@@ -205,17 +206,75 @@ static void tick_setup_device(struct tick_device *td, | |||
205 | tick_setup_oneshot(newdev, handler, next_event); | 206 | tick_setup_oneshot(newdev, handler, next_event); |
206 | } | 207 | } |
207 | 208 | ||
209 | void tick_install_replacement(struct clock_event_device *newdev) | ||
210 | { | ||
211 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | ||
212 | int cpu = smp_processor_id(); | ||
213 | |||
214 | clockevents_exchange_device(td->evtdev, newdev); | ||
215 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); | ||
216 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | ||
217 | tick_oneshot_notify(); | ||
218 | } | ||
219 | |||
220 | static bool tick_check_percpu(struct clock_event_device *curdev, | ||
221 | struct clock_event_device *newdev, int cpu) | ||
222 | { | ||
223 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) | ||
224 | return false; | ||
225 | if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) | ||
226 | return true; | ||
227 | /* Check if irq affinity can be set */ | ||
228 | if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) | ||
229 | return false; | ||
230 | /* Prefer an existing cpu local device */ | ||
231 | if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) | ||
232 | return false; | ||
233 | return true; | ||
234 | } | ||
235 | |||
236 | static bool tick_check_preferred(struct clock_event_device *curdev, | ||
237 | struct clock_event_device *newdev) | ||
238 | { | ||
239 | /* Prefer oneshot capable device */ | ||
240 | if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { | ||
241 | if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
242 | return false; | ||
243 | if (tick_oneshot_mode_active()) | ||
244 | return false; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * Use the higher rated one, but prefer a CPU local device with a lower | ||
249 | * rating than a non-CPU local device | ||
250 | */ | ||
251 | return !curdev || | ||
252 | newdev->rating > curdev->rating || | ||
253 | !cpumask_equal(curdev->cpumask, newdev->cpumask); | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Check whether the new device is a better fit than curdev. curdev | ||
258 | * can be NULL ! | ||
259 | */ | ||
260 | bool tick_check_replacement(struct clock_event_device *curdev, | ||
261 | struct clock_event_device *newdev) | ||
262 | { | ||
263 | if (tick_check_percpu(curdev, newdev, smp_processor_id())) | ||
264 | return false; | ||
265 | |||
266 | return tick_check_preferred(curdev, newdev); | ||
267 | } | ||
268 | |||
208 | /* | 269 | /* |
209 | * Check, if the new registered device should be used. | 270 | * Check, if the new registered device should be used. Called with |
271 | * clockevents_lock held and interrupts disabled. | ||
210 | */ | 272 | */ |
211 | static int tick_check_new_device(struct clock_event_device *newdev) | 273 | void tick_check_new_device(struct clock_event_device *newdev) |
212 | { | 274 | { |
213 | struct clock_event_device *curdev; | 275 | struct clock_event_device *curdev; |
214 | struct tick_device *td; | 276 | struct tick_device *td; |
215 | int cpu, ret = NOTIFY_OK; | 277 | int cpu; |
216 | unsigned long flags; | ||
217 | |||
218 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
219 | 278 | ||
220 | cpu = smp_processor_id(); | 279 | cpu = smp_processor_id(); |
221 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) | 280 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) |
@@ -225,40 +284,15 @@ static int tick_check_new_device(struct clock_event_device *newdev) | |||
225 | curdev = td->evtdev; | 284 | curdev = td->evtdev; |
226 | 285 | ||
227 | /* cpu local device ? */ | 286 | /* cpu local device ? */ |
228 | if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { | 287 | if (!tick_check_percpu(curdev, newdev, cpu)) |
229 | 288 | goto out_bc; | |
230 | /* | ||
231 | * If the cpu affinity of the device interrupt can not | ||
232 | * be set, ignore it. | ||
233 | */ | ||
234 | if (!irq_can_set_affinity(newdev->irq)) | ||
235 | goto out_bc; | ||
236 | 289 | ||
237 | /* | 290 | /* Preference decision */ |
238 | * If we have a cpu local device already, do not replace it | 291 | if (!tick_check_preferred(curdev, newdev)) |
239 | * by a non cpu local device | 292 | goto out_bc; |
240 | */ | ||
241 | if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) | ||
242 | goto out_bc; | ||
243 | } | ||
244 | 293 | ||
245 | /* | 294 | if (!try_module_get(newdev->owner)) |
246 | * If we have an active device, then check the rating and the oneshot | 295 | return; |
247 | * feature. | ||
248 | */ | ||
249 | if (curdev) { | ||
250 | /* | ||
251 | * Prefer one shot capable devices ! | ||
252 | */ | ||
253 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && | ||
254 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
255 | goto out_bc; | ||
256 | /* | ||
257 | * Check the rating | ||
258 | */ | ||
259 | if (curdev->rating >= newdev->rating) | ||
260 | goto out_bc; | ||
261 | } | ||
262 | 296 | ||
263 | /* | 297 | /* |
264 | * Replace the eventually existing device by the new | 298 | * Replace the eventually existing device by the new |
@@ -273,20 +307,13 @@ static int tick_check_new_device(struct clock_event_device *newdev) | |||
273 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); | 307 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); |
274 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | 308 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) |
275 | tick_oneshot_notify(); | 309 | tick_oneshot_notify(); |
276 | 310 | return; | |
277 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
278 | return NOTIFY_STOP; | ||
279 | 311 | ||
280 | out_bc: | 312 | out_bc: |
281 | /* | 313 | /* |
282 | * Can the new device be used as a broadcast device ? | 314 | * Can the new device be used as a broadcast device ? |
283 | */ | 315 | */ |
284 | if (tick_check_broadcast_device(newdev)) | 316 | tick_install_broadcast_device(newdev); |
285 | ret = NOTIFY_STOP; | ||
286 | |||
287 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
288 | |||
289 | return ret; | ||
290 | } | 317 | } |
291 | 318 | ||
292 | /* | 319 | /* |
@@ -294,7 +321,7 @@ out_bc: | |||
294 | * | 321 | * |
295 | * Called with interrupts disabled. | 322 | * Called with interrupts disabled. |
296 | */ | 323 | */ |
297 | static void tick_handover_do_timer(int *cpup) | 324 | void tick_handover_do_timer(int *cpup) |
298 | { | 325 | { |
299 | if (*cpup == tick_do_timer_cpu) { | 326 | if (*cpup == tick_do_timer_cpu) { |
300 | int cpu = cpumask_first(cpu_online_mask); | 327 | int cpu = cpumask_first(cpu_online_mask); |
@@ -311,13 +338,11 @@ static void tick_handover_do_timer(int *cpup) | |||
311 | * access the hardware device itself. | 338 | * access the hardware device itself. |
312 | * We just set the mode and remove it from the lists. | 339 | * We just set the mode and remove it from the lists. |
313 | */ | 340 | */ |
314 | static void tick_shutdown(unsigned int *cpup) | 341 | void tick_shutdown(unsigned int *cpup) |
315 | { | 342 | { |
316 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); | 343 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); |
317 | struct clock_event_device *dev = td->evtdev; | 344 | struct clock_event_device *dev = td->evtdev; |
318 | unsigned long flags; | ||
319 | 345 | ||
320 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
321 | td->mode = TICKDEV_MODE_PERIODIC; | 346 | td->mode = TICKDEV_MODE_PERIODIC; |
322 | if (dev) { | 347 | if (dev) { |
323 | /* | 348 | /* |
@@ -329,26 +354,20 @@ static void tick_shutdown(unsigned int *cpup) | |||
329 | dev->event_handler = clockevents_handle_noop; | 354 | dev->event_handler = clockevents_handle_noop; |
330 | td->evtdev = NULL; | 355 | td->evtdev = NULL; |
331 | } | 356 | } |
332 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
333 | } | 357 | } |
334 | 358 | ||
335 | static void tick_suspend(void) | 359 | void tick_suspend(void) |
336 | { | 360 | { |
337 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 361 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
338 | unsigned long flags; | ||
339 | 362 | ||
340 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
341 | clockevents_shutdown(td->evtdev); | 363 | clockevents_shutdown(td->evtdev); |
342 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
343 | } | 364 | } |
344 | 365 | ||
345 | static void tick_resume(void) | 366 | void tick_resume(void) |
346 | { | 367 | { |
347 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 368 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
348 | unsigned long flags; | ||
349 | int broadcast = tick_resume_broadcast(); | 369 | int broadcast = tick_resume_broadcast(); |
350 | 370 | ||
351 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
352 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); | 371 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); |
353 | 372 | ||
354 | if (!broadcast) { | 373 | if (!broadcast) { |
@@ -357,68 +376,12 @@ static void tick_resume(void) | |||
357 | else | 376 | else |
358 | tick_resume_oneshot(); | 377 | tick_resume_oneshot(); |
359 | } | 378 | } |
360 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
361 | } | 379 | } |
362 | 380 | ||
363 | /* | ||
364 | * Notification about clock event devices | ||
365 | */ | ||
366 | static int tick_notify(struct notifier_block *nb, unsigned long reason, | ||
367 | void *dev) | ||
368 | { | ||
369 | switch (reason) { | ||
370 | |||
371 | case CLOCK_EVT_NOTIFY_ADD: | ||
372 | return tick_check_new_device(dev); | ||
373 | |||
374 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
375 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
376 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | ||
377 | tick_broadcast_on_off(reason, dev); | ||
378 | break; | ||
379 | |||
380 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
381 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
382 | tick_broadcast_oneshot_control(reason); | ||
383 | break; | ||
384 | |||
385 | case CLOCK_EVT_NOTIFY_CPU_DYING: | ||
386 | tick_handover_do_timer(dev); | ||
387 | break; | ||
388 | |||
389 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
390 | tick_shutdown_broadcast_oneshot(dev); | ||
391 | tick_shutdown_broadcast(dev); | ||
392 | tick_shutdown(dev); | ||
393 | break; | ||
394 | |||
395 | case CLOCK_EVT_NOTIFY_SUSPEND: | ||
396 | tick_suspend(); | ||
397 | tick_suspend_broadcast(); | ||
398 | break; | ||
399 | |||
400 | case CLOCK_EVT_NOTIFY_RESUME: | ||
401 | tick_resume(); | ||
402 | break; | ||
403 | |||
404 | default: | ||
405 | break; | ||
406 | } | ||
407 | |||
408 | return NOTIFY_OK; | ||
409 | } | ||
410 | |||
411 | static struct notifier_block tick_notifier = { | ||
412 | .notifier_call = tick_notify, | ||
413 | }; | ||
414 | |||
415 | /** | 381 | /** |
416 | * tick_init - initialize the tick control | 382 | * tick_init - initialize the tick control |
417 | * | ||
418 | * Register the notifier with the clockevents framework | ||
419 | */ | 383 | */ |
420 | void __init tick_init(void) | 384 | void __init tick_init(void) |
421 | { | 385 | { |
422 | clockevents_register_notifier(&tick_notifier); | ||
423 | tick_broadcast_init(); | 386 | tick_broadcast_init(); |
424 | } | 387 | } |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f0299eae4602..bc906cad709b 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
@@ -6,6 +6,8 @@ | |||
6 | 6 | ||
7 | extern seqlock_t jiffies_lock; | 7 | extern seqlock_t jiffies_lock; |
8 | 8 | ||
9 | #define CS_NAME_LEN 32 | ||
10 | |||
9 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD | 11 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD |
10 | 12 | ||
11 | #define TICK_DO_TIMER_NONE -1 | 13 | #define TICK_DO_TIMER_NONE -1 |
@@ -18,9 +20,19 @@ extern int tick_do_timer_cpu __read_mostly; | |||
18 | 20 | ||
19 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); | 21 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); |
20 | extern void tick_handle_periodic(struct clock_event_device *dev); | 22 | extern void tick_handle_periodic(struct clock_event_device *dev); |
23 | extern void tick_check_new_device(struct clock_event_device *dev); | ||
24 | extern void tick_handover_do_timer(int *cpup); | ||
25 | extern void tick_shutdown(unsigned int *cpup); | ||
26 | extern void tick_suspend(void); | ||
27 | extern void tick_resume(void); | ||
28 | extern bool tick_check_replacement(struct clock_event_device *curdev, | ||
29 | struct clock_event_device *newdev); | ||
30 | extern void tick_install_replacement(struct clock_event_device *dev); | ||
21 | 31 | ||
22 | extern void clockevents_shutdown(struct clock_event_device *dev); | 32 | extern void clockevents_shutdown(struct clock_event_device *dev); |
23 | 33 | ||
34 | extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); | ||
35 | |||
24 | /* | 36 | /* |
25 | * NO_HZ / high resolution timer shared code | 37 | * NO_HZ / high resolution timer shared code |
26 | */ | 38 | */ |
@@ -90,7 +102,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; } | |||
90 | */ | 102 | */ |
91 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 103 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
92 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); | 104 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); |
93 | extern int tick_check_broadcast_device(struct clock_event_device *dev); | 105 | extern void tick_install_broadcast_device(struct clock_event_device *dev); |
94 | extern int tick_is_broadcast_device(struct clock_event_device *dev); | 106 | extern int tick_is_broadcast_device(struct clock_event_device *dev); |
95 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); | 107 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); |
96 | extern void tick_shutdown_broadcast(unsigned int *cpup); | 108 | extern void tick_shutdown_broadcast(unsigned int *cpup); |
@@ -102,9 +114,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); | |||
102 | 114 | ||
103 | #else /* !BROADCAST */ | 115 | #else /* !BROADCAST */ |
104 | 116 | ||
105 | static inline int tick_check_broadcast_device(struct clock_event_device *dev) | 117 | static inline void tick_install_broadcast_device(struct clock_event_device *dev) |
106 | { | 118 | { |
107 | return 0; | ||
108 | } | 119 | } |
109 | 120 | ||
110 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) | 121 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0cf1c1453181..e80183f4a6c4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -178,6 +178,11 @@ static bool can_stop_full_tick(void) | |||
178 | */ | 178 | */ |
179 | if (!sched_clock_stable) { | 179 | if (!sched_clock_stable) { |
180 | trace_tick_stop(0, "unstable sched clock\n"); | 180 | trace_tick_stop(0, "unstable sched clock\n"); |
181 | /* | ||
182 | * Don't allow the user to think they can get | ||
183 | * full NO_HZ with this machine. | ||
184 | */ | ||
185 | WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); | ||
181 | return false; | 186 | return false; |
182 | } | 187 | } |
183 | #endif | 188 | #endif |
@@ -293,7 +298,7 @@ static int __init tick_nohz_full_setup(char *str) | |||
293 | } | 298 | } |
294 | __setup("nohz_full=", tick_nohz_full_setup); | 299 | __setup("nohz_full=", tick_nohz_full_setup); |
295 | 300 | ||
296 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | 301 | static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, |
297 | unsigned long action, | 302 | unsigned long action, |
298 | void *hcpu) | 303 | void *hcpu) |
299 | { | 304 | { |
@@ -346,16 +351,6 @@ void __init tick_nohz_init(void) | |||
346 | } | 351 | } |
347 | 352 | ||
348 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 353 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
349 | |||
350 | /* Make sure full dynticks CPU are also RCU nocbs */ | ||
351 | for_each_cpu(cpu, nohz_full_mask) { | ||
352 | if (!rcu_is_nocb_cpu(cpu)) { | ||
353 | pr_warning("NO_HZ: CPU %d is not RCU nocb: " | ||
354 | "cleared from nohz_full range", cpu); | ||
355 | cpumask_clear_cpu(cpu, nohz_full_mask); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 354 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); |
360 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 355 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
361 | } | 356 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index baeeb5c87cf1..48b9fffabdc2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -25,6 +25,11 @@ | |||
25 | 25 | ||
26 | #include "tick-internal.h" | 26 | #include "tick-internal.h" |
27 | #include "ntp_internal.h" | 27 | #include "ntp_internal.h" |
28 | #include "timekeeping_internal.h" | ||
29 | |||
30 | #define TK_CLEAR_NTP (1 << 0) | ||
31 | #define TK_MIRROR (1 << 1) | ||
32 | #define TK_CLOCK_WAS_SET (1 << 2) | ||
28 | 33 | ||
29 | static struct timekeeper timekeeper; | 34 | static struct timekeeper timekeeper; |
30 | static DEFINE_RAW_SPINLOCK(timekeeper_lock); | 35 | static DEFINE_RAW_SPINLOCK(timekeeper_lock); |
@@ -200,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
200 | 205 | ||
201 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); | 206 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); |
202 | 207 | ||
203 | static void update_pvclock_gtod(struct timekeeper *tk) | 208 | static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) |
204 | { | 209 | { |
205 | raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); | 210 | raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); |
206 | } | 211 | } |
207 | 212 | ||
208 | /** | 213 | /** |
@@ -216,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb) | |||
216 | 221 | ||
217 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 222 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
218 | ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); | 223 | ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); |
219 | update_pvclock_gtod(tk); | 224 | update_pvclock_gtod(tk, true); |
220 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 225 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
221 | 226 | ||
222 | return ret; | 227 | return ret; |
@@ -241,16 +246,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) | |||
241 | EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); | 246 | EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); |
242 | 247 | ||
243 | /* must hold timekeeper_lock */ | 248 | /* must hold timekeeper_lock */ |
244 | static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror) | 249 | static void timekeeping_update(struct timekeeper *tk, unsigned int action) |
245 | { | 250 | { |
246 | if (clearntp) { | 251 | if (action & TK_CLEAR_NTP) { |
247 | tk->ntp_error = 0; | 252 | tk->ntp_error = 0; |
248 | ntp_clear(); | 253 | ntp_clear(); |
249 | } | 254 | } |
250 | update_vsyscall(tk); | 255 | update_vsyscall(tk); |
251 | update_pvclock_gtod(tk); | 256 | update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); |
252 | 257 | ||
253 | if (mirror) | 258 | if (action & TK_MIRROR) |
254 | memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); | 259 | memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); |
255 | } | 260 | } |
256 | 261 | ||
@@ -508,7 +513,7 @@ int do_settimeofday(const struct timespec *tv) | |||
508 | 513 | ||
509 | tk_set_xtime(tk, tv); | 514 | tk_set_xtime(tk, tv); |
510 | 515 | ||
511 | timekeeping_update(tk, true, true); | 516 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
512 | 517 | ||
513 | write_seqcount_end(&timekeeper_seq); | 518 | write_seqcount_end(&timekeeper_seq); |
514 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 519 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
@@ -552,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
552 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); | 557 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); |
553 | 558 | ||
554 | error: /* even if we error out, we forwarded the time, so call update */ | 559 | error: /* even if we error out, we forwarded the time, so call update */ |
555 | timekeeping_update(tk, true, true); | 560 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
556 | 561 | ||
557 | write_seqcount_end(&timekeeper_seq); | 562 | write_seqcount_end(&timekeeper_seq); |
558 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 563 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
@@ -627,13 +632,22 @@ static int change_clocksource(void *data) | |||
627 | write_seqcount_begin(&timekeeper_seq); | 632 | write_seqcount_begin(&timekeeper_seq); |
628 | 633 | ||
629 | timekeeping_forward_now(tk); | 634 | timekeeping_forward_now(tk); |
630 | if (!new->enable || new->enable(new) == 0) { | 635 | /* |
631 | old = tk->clock; | 636 | * If the cs is in module, get a module reference. Succeeds |
632 | tk_setup_internals(tk, new); | 637 | * for built-in code (owner == NULL) as well. |
633 | if (old->disable) | 638 | */ |
634 | old->disable(old); | 639 | if (try_module_get(new->owner)) { |
640 | if (!new->enable || new->enable(new) == 0) { | ||
641 | old = tk->clock; | ||
642 | tk_setup_internals(tk, new); | ||
643 | if (old->disable) | ||
644 | old->disable(old); | ||
645 | module_put(old->owner); | ||
646 | } else { | ||
647 | module_put(new->owner); | ||
648 | } | ||
635 | } | 649 | } |
636 | timekeeping_update(tk, true, true); | 650 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
637 | 651 | ||
638 | write_seqcount_end(&timekeeper_seq); | 652 | write_seqcount_end(&timekeeper_seq); |
639 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 653 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
@@ -648,14 +662,15 @@ static int change_clocksource(void *data) | |||
648 | * This function is called from clocksource.c after a new, better clock | 662 | * This function is called from clocksource.c after a new, better clock |
649 | * source has been registered. The caller holds the clocksource_mutex. | 663 | * source has been registered. The caller holds the clocksource_mutex. |
650 | */ | 664 | */ |
651 | void timekeeping_notify(struct clocksource *clock) | 665 | int timekeeping_notify(struct clocksource *clock) |
652 | { | 666 | { |
653 | struct timekeeper *tk = &timekeeper; | 667 | struct timekeeper *tk = &timekeeper; |
654 | 668 | ||
655 | if (tk->clock == clock) | 669 | if (tk->clock == clock) |
656 | return; | 670 | return 0; |
657 | stop_machine(change_clocksource, clock, NULL); | 671 | stop_machine(change_clocksource, clock, NULL); |
658 | tick_clock_notify(); | 672 | tick_clock_notify(); |
673 | return tk->clock == clock ? 0 : -1; | ||
659 | } | 674 | } |
660 | 675 | ||
661 | /** | 676 | /** |
@@ -841,6 +856,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | |||
841 | tk_xtime_add(tk, delta); | 856 | tk_xtime_add(tk, delta); |
842 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); | 857 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); |
843 | tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); | 858 | tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); |
859 | tk_debug_account_sleep_time(delta); | ||
844 | } | 860 | } |
845 | 861 | ||
846 | /** | 862 | /** |
@@ -872,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) | |||
872 | 888 | ||
873 | __timekeeping_inject_sleeptime(tk, delta); | 889 | __timekeeping_inject_sleeptime(tk, delta); |
874 | 890 | ||
875 | timekeeping_update(tk, true, true); | 891 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
876 | 892 | ||
877 | write_seqcount_end(&timekeeper_seq); | 893 | write_seqcount_end(&timekeeper_seq); |
878 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 894 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
@@ -954,7 +970,7 @@ static void timekeeping_resume(void) | |||
954 | tk->cycle_last = clock->cycle_last = cycle_now; | 970 | tk->cycle_last = clock->cycle_last = cycle_now; |
955 | tk->ntp_error = 0; | 971 | tk->ntp_error = 0; |
956 | timekeeping_suspended = 0; | 972 | timekeeping_suspended = 0; |
957 | timekeeping_update(tk, false, true); | 973 | timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); |
958 | write_seqcount_end(&timekeeper_seq); | 974 | write_seqcount_end(&timekeeper_seq); |
959 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 975 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
960 | 976 | ||
@@ -1236,9 +1252,10 @@ out_adjust: | |||
1236 | * It also calls into the NTP code to handle leapsecond processing. | 1252 | * It also calls into the NTP code to handle leapsecond processing. |
1237 | * | 1253 | * |
1238 | */ | 1254 | */ |
1239 | static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | 1255 | static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) |
1240 | { | 1256 | { |
1241 | u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; | 1257 | u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; |
1258 | unsigned int action = 0; | ||
1242 | 1259 | ||
1243 | while (tk->xtime_nsec >= nsecps) { | 1260 | while (tk->xtime_nsec >= nsecps) { |
1244 | int leap; | 1261 | int leap; |
@@ -1261,8 +1278,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | |||
1261 | __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); | 1278 | __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); |
1262 | 1279 | ||
1263 | clock_was_set_delayed(); | 1280 | clock_was_set_delayed(); |
1281 | action = TK_CLOCK_WAS_SET; | ||
1264 | } | 1282 | } |
1265 | } | 1283 | } |
1284 | return action; | ||
1266 | } | 1285 | } |
1267 | 1286 | ||
1268 | /** | 1287 | /** |
@@ -1347,6 +1366,7 @@ static void update_wall_time(void) | |||
1347 | struct timekeeper *tk = &shadow_timekeeper; | 1366 | struct timekeeper *tk = &shadow_timekeeper; |
1348 | cycle_t offset; | 1367 | cycle_t offset; |
1349 | int shift = 0, maxshift; | 1368 | int shift = 0, maxshift; |
1369 | unsigned int action; | ||
1350 | unsigned long flags; | 1370 | unsigned long flags; |
1351 | 1371 | ||
1352 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 1372 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
@@ -1399,7 +1419,7 @@ static void update_wall_time(void) | |||
1399 | * Finally, make sure that after the rounding | 1419 | * Finally, make sure that after the rounding |
1400 | * xtime_nsec isn't larger than NSEC_PER_SEC | 1420 | * xtime_nsec isn't larger than NSEC_PER_SEC |
1401 | */ | 1421 | */ |
1402 | accumulate_nsecs_to_secs(tk); | 1422 | action = accumulate_nsecs_to_secs(tk); |
1403 | 1423 | ||
1404 | write_seqcount_begin(&timekeeper_seq); | 1424 | write_seqcount_begin(&timekeeper_seq); |
1405 | /* Update clock->cycle_last with the new value */ | 1425 | /* Update clock->cycle_last with the new value */ |
@@ -1415,7 +1435,7 @@ static void update_wall_time(void) | |||
1415 | * updating. | 1435 | * updating. |
1416 | */ | 1436 | */ |
1417 | memcpy(real_tk, tk, sizeof(*tk)); | 1437 | memcpy(real_tk, tk, sizeof(*tk)); |
1418 | timekeeping_update(real_tk, false, false); | 1438 | timekeeping_update(real_tk, action); |
1419 | write_seqcount_end(&timekeeper_seq); | 1439 | write_seqcount_end(&timekeeper_seq); |
1420 | out: | 1440 | out: |
1421 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 1441 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
@@ -1677,6 +1697,7 @@ int do_adjtimex(struct timex *txc) | |||
1677 | 1697 | ||
1678 | if (tai != orig_tai) { | 1698 | if (tai != orig_tai) { |
1679 | __timekeeping_set_tai_offset(tk, tai); | 1699 | __timekeeping_set_tai_offset(tk, tai); |
1700 | update_pvclock_gtod(tk, true); | ||
1680 | clock_was_set_delayed(); | 1701 | clock_was_set_delayed(); |
1681 | } | 1702 | } |
1682 | write_seqcount_end(&timekeeper_seq); | 1703 | write_seqcount_end(&timekeeper_seq); |
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c new file mode 100644 index 000000000000..802433a4f5eb --- /dev/null +++ b/kernel/time/timekeeping_debug.c | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * debugfs file to track time spent in suspend | ||
3 | * | ||
4 | * Copyright (c) 2011, Google, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/debugfs.h> | ||
18 | #include <linux/err.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/seq_file.h> | ||
22 | #include <linux/time.h> | ||
23 | |||
24 | static unsigned int sleep_time_bin[32] = {0}; | ||
25 | |||
26 | static int tk_debug_show_sleep_time(struct seq_file *s, void *data) | ||
27 | { | ||
28 | unsigned int bin; | ||
29 | seq_puts(s, " time (secs) count\n"); | ||
30 | seq_puts(s, "------------------------------\n"); | ||
31 | for (bin = 0; bin < 32; bin++) { | ||
32 | if (sleep_time_bin[bin] == 0) | ||
33 | continue; | ||
34 | seq_printf(s, "%10u - %-10u %4u\n", | ||
35 | bin ? 1 << (bin - 1) : 0, 1 << bin, | ||
36 | sleep_time_bin[bin]); | ||
37 | } | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static int tk_debug_sleep_time_open(struct inode *inode, struct file *file) | ||
42 | { | ||
43 | return single_open(file, tk_debug_show_sleep_time, NULL); | ||
44 | } | ||
45 | |||
46 | static const struct file_operations tk_debug_sleep_time_fops = { | ||
47 | .open = tk_debug_sleep_time_open, | ||
48 | .read = seq_read, | ||
49 | .llseek = seq_lseek, | ||
50 | .release = single_release, | ||
51 | }; | ||
52 | |||
53 | static int __init tk_debug_sleep_time_init(void) | ||
54 | { | ||
55 | struct dentry *d; | ||
56 | |||
57 | d = debugfs_create_file("sleep_time", 0444, NULL, NULL, | ||
58 | &tk_debug_sleep_time_fops); | ||
59 | if (!d) { | ||
60 | pr_err("Failed to create sleep_time debug file\n"); | ||
61 | return -ENOMEM; | ||
62 | } | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | late_initcall(tk_debug_sleep_time_init); | ||
67 | |||
68 | void tk_debug_account_sleep_time(struct timespec *t) | ||
69 | { | ||
70 | sleep_time_bin[fls(t->tv_sec)]++; | ||
71 | } | ||
72 | |||
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h new file mode 100644 index 000000000000..13323ea08ffa --- /dev/null +++ b/kernel/time/timekeeping_internal.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _TIMEKEEPING_INTERNAL_H | ||
2 | #define _TIMEKEEPING_INTERNAL_H | ||
3 | /* | ||
4 | * timekeeping debug functions | ||
5 | */ | ||
6 | #include <linux/time.h> | ||
7 | |||
8 | #ifdef CONFIG_DEBUG_FS | ||
9 | extern void tk_debug_account_sleep_time(struct timespec *t); | ||
10 | #else | ||
11 | #define tk_debug_account_sleep_time(x) | ||
12 | #endif | ||
13 | |||
14 | #endif /* _TIMEKEEPING_INTERNAL_H */ | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 15ffdb3f1948..4296d13db3d1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -149,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, | |||
149 | /* now that we have rounded, subtract the extra skew again */ | 149 | /* now that we have rounded, subtract the extra skew again */ |
150 | j -= cpu * 3; | 150 | j -= cpu * 3; |
151 | 151 | ||
152 | if (j <= jiffies) /* rounding ate our timeout entirely; */ | 152 | /* |
153 | return original; | 153 | * Make sure j is still in the future. Otherwise return the |
154 | return j; | 154 | * unmodified value. |
155 | */ | ||
156 | return time_is_after_jiffies(j) ? j : original; | ||
155 | } | 157 | } |
156 | 158 | ||
157 | /** | 159 | /** |
@@ -1503,11 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) | |||
1503 | } | 1505 | } |
1504 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); | 1506 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); |
1505 | 1507 | ||
1506 | static int __cpuinit init_timers_cpu(int cpu) | 1508 | static int init_timers_cpu(int cpu) |
1507 | { | 1509 | { |
1508 | int j; | 1510 | int j; |
1509 | struct tvec_base *base; | 1511 | struct tvec_base *base; |
1510 | static char __cpuinitdata tvec_base_done[NR_CPUS]; | 1512 | static char tvec_base_done[NR_CPUS]; |
1511 | 1513 | ||
1512 | if (!tvec_base_done[cpu]) { | 1514 | if (!tvec_base_done[cpu]) { |
1513 | static char boot_done; | 1515 | static char boot_done; |
@@ -1575,7 +1577,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea | |||
1575 | } | 1577 | } |
1576 | } | 1578 | } |
1577 | 1579 | ||
1578 | static void __cpuinit migrate_timers(int cpu) | 1580 | static void migrate_timers(int cpu) |
1579 | { | 1581 | { |
1580 | struct tvec_base *old_base; | 1582 | struct tvec_base *old_base; |
1581 | struct tvec_base *new_base; | 1583 | struct tvec_base *new_base; |
@@ -1608,7 +1610,7 @@ static void __cpuinit migrate_timers(int cpu) | |||
1608 | } | 1610 | } |
1609 | #endif /* CONFIG_HOTPLUG_CPU */ | 1611 | #endif /* CONFIG_HOTPLUG_CPU */ |
1610 | 1612 | ||
1611 | static int __cpuinit timer_cpu_notify(struct notifier_block *self, | 1613 | static int timer_cpu_notify(struct notifier_block *self, |
1612 | unsigned long action, void *hcpu) | 1614 | unsigned long action, void *hcpu) |
1613 | { | 1615 | { |
1614 | long cpu = (long)hcpu; | 1616 | long cpu = (long)hcpu; |
@@ -1633,7 +1635,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, | |||
1633 | return NOTIFY_OK; | 1635 | return NOTIFY_OK; |
1634 | } | 1636 | } |
1635 | 1637 | ||
1636 | static struct notifier_block __cpuinitdata timers_nb = { | 1638 | static struct notifier_block timers_nb = { |
1637 | .notifier_call = timer_cpu_notify, | 1639 | .notifier_call = timer_cpu_notify, |
1638 | }; | 1640 | }; |
1639 | 1641 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff33c62..67708f46baae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
413 | return 0; | 413 | return 0; |
414 | } | 414 | } |
415 | 415 | ||
416 | static void ftrace_sync(struct work_struct *work) | ||
417 | { | ||
418 | /* | ||
419 | * This function is just a stub to implement a hard force | ||
420 | * of synchronize_sched(). This requires synchronizing | ||
421 | * tasks even in userspace and idle. | ||
422 | * | ||
423 | * Yes, function tracing is rude. | ||
424 | */ | ||
425 | } | ||
426 | |||
416 | static int __unregister_ftrace_function(struct ftrace_ops *ops) | 427 | static int __unregister_ftrace_function(struct ftrace_ops *ops) |
417 | { | 428 | { |
418 | int ret; | 429 | int ret; |
@@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
440 | * so there'll be no new users. We must ensure | 451 | * so there'll be no new users. We must ensure |
441 | * all current users are done before we free | 452 | * all current users are done before we free |
442 | * the control data. | 453 | * the control data. |
454 | * Note synchronize_sched() is not enough, as we | ||
455 | * use preempt_disable() to do RCU, but the function | ||
456 | * tracer can be called where RCU is not active | ||
457 | * (before user_exit()). | ||
443 | */ | 458 | */ |
444 | synchronize_sched(); | 459 | schedule_on_each_cpu(ftrace_sync); |
445 | control_ops_free(ops); | 460 | control_ops_free(ops); |
446 | } | 461 | } |
447 | } else | 462 | } else |
@@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
456 | /* | 471 | /* |
457 | * Dynamic ops may be freed, we must make sure that all | 472 | * Dynamic ops may be freed, we must make sure that all |
458 | * callers are done before leaving this function. | 473 | * callers are done before leaving this function. |
474 | * | ||
475 | * Again, normal synchronize_sched() is not good enough. | ||
476 | * We need to do a hard force of sched synchronization. | ||
459 | */ | 477 | */ |
460 | if (ops->flags & FTRACE_OPS_FL_DYNAMIC) | 478 | if (ops->flags & FTRACE_OPS_FL_DYNAMIC) |
461 | synchronize_sched(); | 479 | schedule_on_each_cpu(ftrace_sync); |
480 | |||
462 | 481 | ||
463 | return 0; | 482 | return 0; |
464 | } | 483 | } |
@@ -622,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v) | |||
622 | if (rec->counter <= 1) | 641 | if (rec->counter <= 1) |
623 | stddev = 0; | 642 | stddev = 0; |
624 | else { | 643 | else { |
625 | stddev = rec->time_squared - rec->counter * avg * avg; | 644 | /* |
645 | * Apply Welford's method: | ||
646 | * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) | ||
647 | */ | ||
648 | stddev = rec->counter * rec->time_squared - | ||
649 | rec->time * rec->time; | ||
650 | |||
626 | /* | 651 | /* |
627 | * Divide only 1000 for ns^2 -> us^2 conversion. | 652 | * Divide only 1000 for ns^2 -> us^2 conversion. |
628 | * trace_print_graph_duration will divide 1000 again. | 653 | * trace_print_graph_duration will divide 1000 again. |
629 | */ | 654 | */ |
630 | do_div(stddev, (rec->counter - 1) * 1000); | 655 | do_div(stddev, rec->counter * (rec->counter - 1) * 1000); |
631 | } | 656 | } |
632 | 657 | ||
633 | trace_seq_init(&s); | 658 | trace_seq_init(&s); |
@@ -3512,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); | |||
3512 | static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; | 3537 | static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; |
3513 | static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; | 3538 | static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; |
3514 | 3539 | ||
3540 | /* Used by function selftest to not test if filter is set */ | ||
3541 | bool ftrace_filter_param __initdata; | ||
3542 | |||
3515 | static int __init set_ftrace_notrace(char *str) | 3543 | static int __init set_ftrace_notrace(char *str) |
3516 | { | 3544 | { |
3545 | ftrace_filter_param = true; | ||
3517 | strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); | 3546 | strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); |
3518 | return 1; | 3547 | return 1; |
3519 | } | 3548 | } |
@@ -3521,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace); | |||
3521 | 3550 | ||
3522 | static int __init set_ftrace_filter(char *str) | 3551 | static int __init set_ftrace_filter(char *str) |
3523 | { | 3552 | { |
3553 | ftrace_filter_param = true; | ||
3524 | strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); | 3554 | strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); |
3525 | return 1; | 3555 | return 1; |
3526 | } | 3556 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e444ff88f0a4..cc2f66f68dc5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s) | |||
36 | { | 36 | { |
37 | int ret; | 37 | int ret; |
38 | 38 | ||
39 | ret = trace_seq_printf(s, "# compressed entry header\n"); | 39 | ret = trace_seq_puts(s, "# compressed entry header\n"); |
40 | ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); | 40 | ret = trace_seq_puts(s, "\ttype_len : 5 bits\n"); |
41 | ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); | 41 | ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n"); |
42 | ret = trace_seq_printf(s, "\tarray : 32 bits\n"); | 42 | ret = trace_seq_puts(s, "\tarray : 32 bits\n"); |
43 | ret = trace_seq_printf(s, "\n"); | 43 | ret = trace_seq_putc(s, '\n'); |
44 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", | 44 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", |
45 | RINGBUF_TYPE_PADDING); | 45 | RINGBUF_TYPE_PADDING); |
46 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", | 46 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", |
@@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, | |||
1066 | } | 1066 | } |
1067 | 1067 | ||
1068 | /** | 1068 | /** |
1069 | * check_pages - integrity check of buffer pages | 1069 | * rb_check_pages - integrity check of buffer pages |
1070 | * @cpu_buffer: CPU buffer with pages to test | 1070 | * @cpu_buffer: CPU buffer with pages to test |
1071 | * | 1071 | * |
1072 | * As a safety measure we check to make sure the data pages have not | 1072 | * As a safety measure we check to make sure the data pages have not |
@@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
1258 | #endif | 1258 | #endif |
1259 | 1259 | ||
1260 | /** | 1260 | /** |
1261 | * ring_buffer_alloc - allocate a new ring_buffer | 1261 | * __ring_buffer_alloc - allocate a new ring_buffer |
1262 | * @size: the size in bytes per cpu that is needed. | 1262 | * @size: the size in bytes per cpu that is needed. |
1263 | * @flags: attributes to set for the ring buffer. | 1263 | * @flags: attributes to set for the ring buffer. |
1264 | * | 1264 | * |
@@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work) | |||
1607 | * ring_buffer_resize - resize the ring buffer | 1607 | * ring_buffer_resize - resize the ring buffer |
1608 | * @buffer: the buffer to resize. | 1608 | * @buffer: the buffer to resize. |
1609 | * @size: the new size. | 1609 | * @size: the new size. |
1610 | * @cpu_id: the cpu buffer to resize | ||
1610 | * | 1611 | * |
1611 | * Minimum size is 2 * BUF_PAGE_SIZE. | 1612 | * Minimum size is 2 * BUF_PAGE_SIZE. |
1612 | * | 1613 | * |
@@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); | |||
3956 | * expected. | 3957 | * expected. |
3957 | * | 3958 | * |
3958 | * After a sequence of ring_buffer_read_prepare calls, the user is | 3959 | * After a sequence of ring_buffer_read_prepare calls, the user is |
3959 | * expected to make at least one call to ring_buffer_prepare_sync. | 3960 | * expected to make at least one call to ring_buffer_read_prepare_sync. |
3960 | * Afterwards, ring_buffer_read_start is invoked to get things going | 3961 | * Afterwards, ring_buffer_read_start is invoked to get things going |
3961 | * for real. | 3962 | * for real. |
3962 | * | 3963 | * |
3963 | * This overall must be paired with ring_buffer_finish. | 3964 | * This overall must be paired with ring_buffer_read_finish. |
3964 | */ | 3965 | */ |
3965 | struct ring_buffer_iter * | 3966 | struct ring_buffer_iter * |
3966 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) | 3967 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
@@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | |||
4009 | * an intervening ring_buffer_read_prepare_sync must have been | 4010 | * an intervening ring_buffer_read_prepare_sync must have been |
4010 | * performed. | 4011 | * performed. |
4011 | * | 4012 | * |
4012 | * Must be paired with ring_buffer_finish. | 4013 | * Must be paired with ring_buffer_read_finish. |
4013 | */ | 4014 | */ |
4014 | void | 4015 | void |
4015 | ring_buffer_read_start(struct ring_buffer_iter *iter) | 4016 | ring_buffer_read_start(struct ring_buffer_iter *iter) |
@@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter) | |||
4031 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 4032 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
4032 | 4033 | ||
4033 | /** | 4034 | /** |
4034 | * ring_buffer_finish - finish reading the iterator of the buffer | 4035 | * ring_buffer_read_finish - finish reading the iterator of the buffer |
4035 | * @iter: The iterator retrieved by ring_buffer_start | 4036 | * @iter: The iterator retrieved by ring_buffer_start |
4036 | * | 4037 | * |
4037 | * This re-enables the recording to the buffer, and frees the | 4038 | * This re-enables the recording to the buffer, and frees the |
@@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | |||
4346 | /** | 4347 | /** |
4347 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 4348 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
4348 | * @buffer: the buffer to allocate for. | 4349 | * @buffer: the buffer to allocate for. |
4350 | * @cpu: the cpu buffer to allocate. | ||
4349 | * | 4351 | * |
4350 | * This function is used in conjunction with ring_buffer_read_page. | 4352 | * This function is used in conjunction with ring_buffer_read_page. |
4351 | * When reading a full page from the ring buffer, these functions | 4353 | * When reading a full page from the ring buffer, these functions |
@@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | |||
4403 | * to swap with a page in the ring buffer. | 4405 | * to swap with a page in the ring buffer. |
4404 | * | 4406 | * |
4405 | * for example: | 4407 | * for example: |
4406 | * rpage = ring_buffer_alloc_read_page(buffer); | 4408 | * rpage = ring_buffer_alloc_read_page(buffer, cpu); |
4407 | * if (!rpage) | 4409 | * if (!rpage) |
4408 | * return error; | 4410 | * return error; |
4409 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); | 4411 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be4a6ee..3f2477713aca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask; | |||
115 | 115 | ||
116 | enum ftrace_dump_mode ftrace_dump_on_oops; | 116 | enum ftrace_dump_mode ftrace_dump_on_oops; |
117 | 117 | ||
118 | /* When set, tracing will stop when a WARN*() is hit */ | ||
119 | int __disable_trace_on_warning; | ||
120 | |||
118 | static int tracing_set_tracer(const char *buf); | 121 | static int tracing_set_tracer(const char *buf); |
119 | 122 | ||
120 | #define MAX_TRACER_SIZE 100 | 123 | #define MAX_TRACER_SIZE 100 |
@@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str) | |||
149 | } | 152 | } |
150 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 153 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
151 | 154 | ||
155 | static int __init stop_trace_on_warning(char *str) | ||
156 | { | ||
157 | __disable_trace_on_warning = 1; | ||
158 | return 1; | ||
159 | } | ||
160 | __setup("traceoff_on_warning=", stop_trace_on_warning); | ||
161 | |||
152 | static int __init boot_alloc_snapshot(char *str) | 162 | static int __init boot_alloc_snapshot(char *str) |
153 | { | 163 | { |
154 | allocate_snapshot = true; | 164 | allocate_snapshot = true; |
@@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str) | |||
170 | } | 180 | } |
171 | __setup("trace_options=", set_trace_boot_options); | 181 | __setup("trace_options=", set_trace_boot_options); |
172 | 182 | ||
183 | |||
173 | unsigned long long ns2usecs(cycle_t nsec) | 184 | unsigned long long ns2usecs(cycle_t nsec) |
174 | { | 185 | { |
175 | nsec += 500; | 186 | nsec += 500; |
@@ -193,6 +204,37 @@ static struct trace_array global_trace; | |||
193 | 204 | ||
194 | LIST_HEAD(ftrace_trace_arrays); | 205 | LIST_HEAD(ftrace_trace_arrays); |
195 | 206 | ||
207 | int trace_array_get(struct trace_array *this_tr) | ||
208 | { | ||
209 | struct trace_array *tr; | ||
210 | int ret = -ENODEV; | ||
211 | |||
212 | mutex_lock(&trace_types_lock); | ||
213 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | ||
214 | if (tr == this_tr) { | ||
215 | tr->ref++; | ||
216 | ret = 0; | ||
217 | break; | ||
218 | } | ||
219 | } | ||
220 | mutex_unlock(&trace_types_lock); | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | static void __trace_array_put(struct trace_array *this_tr) | ||
226 | { | ||
227 | WARN_ON(!this_tr->ref); | ||
228 | this_tr->ref--; | ||
229 | } | ||
230 | |||
231 | void trace_array_put(struct trace_array *this_tr) | ||
232 | { | ||
233 | mutex_lock(&trace_types_lock); | ||
234 | __trace_array_put(this_tr); | ||
235 | mutex_unlock(&trace_types_lock); | ||
236 | } | ||
237 | |||
196 | int filter_current_check_discard(struct ring_buffer *buffer, | 238 | int filter_current_check_discard(struct ring_buffer *buffer, |
197 | struct ftrace_event_call *call, void *rec, | 239 | struct ftrace_event_call *call, void *rec, |
198 | struct ring_buffer_event *event) | 240 | struct ring_buffer_event *event) |
@@ -215,9 +257,24 @@ cycle_t ftrace_now(int cpu) | |||
215 | return ts; | 257 | return ts; |
216 | } | 258 | } |
217 | 259 | ||
260 | /** | ||
261 | * tracing_is_enabled - Show if global_trace has been disabled | ||
262 | * | ||
263 | * Shows if the global trace has been enabled or not. It uses the | ||
264 | * mirror flag "buffer_disabled" to be used in fast paths such as for | ||
265 | * the irqsoff tracer. But it may be inaccurate due to races. If you | ||
266 | * need to know the accurate state, use tracing_is_on() which is a little | ||
267 | * slower, but accurate. | ||
268 | */ | ||
218 | int tracing_is_enabled(void) | 269 | int tracing_is_enabled(void) |
219 | { | 270 | { |
220 | return tracing_is_on(); | 271 | /* |
272 | * For quick access (irqsoff uses this in fast path), just | ||
273 | * return the mirror variable of the state of the ring buffer. | ||
274 | * It's a little racy, but we don't really care. | ||
275 | */ | ||
276 | smp_rmb(); | ||
277 | return !global_trace.buffer_disabled; | ||
221 | } | 278 | } |
222 | 279 | ||
223 | /* | 280 | /* |
@@ -240,7 +297,7 @@ static struct tracer *trace_types __read_mostly; | |||
240 | /* | 297 | /* |
241 | * trace_types_lock is used to protect the trace_types list. | 298 | * trace_types_lock is used to protect the trace_types list. |
242 | */ | 299 | */ |
243 | static DEFINE_MUTEX(trace_types_lock); | 300 | DEFINE_MUTEX(trace_types_lock); |
244 | 301 | ||
245 | /* | 302 | /* |
246 | * serialize the access of the ring buffer | 303 | * serialize the access of the ring buffer |
@@ -330,6 +387,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
330 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | | 387 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | |
331 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; | 388 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; |
332 | 389 | ||
390 | static void tracer_tracing_on(struct trace_array *tr) | ||
391 | { | ||
392 | if (tr->trace_buffer.buffer) | ||
393 | ring_buffer_record_on(tr->trace_buffer.buffer); | ||
394 | /* | ||
395 | * This flag is looked at when buffers haven't been allocated | ||
396 | * yet, or by some tracers (like irqsoff), that just want to | ||
397 | * know if the ring buffer has been disabled, but it can handle | ||
398 | * races of where it gets disabled but we still do a record. | ||
399 | * As the check is in the fast path of the tracers, it is more | ||
400 | * important to be fast than accurate. | ||
401 | */ | ||
402 | tr->buffer_disabled = 0; | ||
403 | /* Make the flag seen by readers */ | ||
404 | smp_wmb(); | ||
405 | } | ||
406 | |||
333 | /** | 407 | /** |
334 | * tracing_on - enable tracing buffers | 408 | * tracing_on - enable tracing buffers |
335 | * | 409 | * |
@@ -338,15 +412,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
338 | */ | 412 | */ |
339 | void tracing_on(void) | 413 | void tracing_on(void) |
340 | { | 414 | { |
341 | if (global_trace.trace_buffer.buffer) | 415 | tracer_tracing_on(&global_trace); |
342 | ring_buffer_record_on(global_trace.trace_buffer.buffer); | ||
343 | /* | ||
344 | * This flag is only looked at when buffers haven't been | ||
345 | * allocated yet. We don't really care about the race | ||
346 | * between setting this flag and actually turning | ||
347 | * on the buffer. | ||
348 | */ | ||
349 | global_trace.buffer_disabled = 0; | ||
350 | } | 416 | } |
351 | EXPORT_SYMBOL_GPL(tracing_on); | 417 | EXPORT_SYMBOL_GPL(tracing_on); |
352 | 418 | ||
@@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void) | |||
540 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); | 606 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); |
541 | #endif /* CONFIG_TRACER_SNAPSHOT */ | 607 | #endif /* CONFIG_TRACER_SNAPSHOT */ |
542 | 608 | ||
609 | static void tracer_tracing_off(struct trace_array *tr) | ||
610 | { | ||
611 | if (tr->trace_buffer.buffer) | ||
612 | ring_buffer_record_off(tr->trace_buffer.buffer); | ||
613 | /* | ||
614 | * This flag is looked at when buffers haven't been allocated | ||
615 | * yet, or by some tracers (like irqsoff), that just want to | ||
616 | * know if the ring buffer has been disabled, but it can handle | ||
617 | * races of where it gets disabled but we still do a record. | ||
618 | * As the check is in the fast path of the tracers, it is more | ||
619 | * important to be fast than accurate. | ||
620 | */ | ||
621 | tr->buffer_disabled = 1; | ||
622 | /* Make the flag seen by readers */ | ||
623 | smp_wmb(); | ||
624 | } | ||
625 | |||
543 | /** | 626 | /** |
544 | * tracing_off - turn off tracing buffers | 627 | * tracing_off - turn off tracing buffers |
545 | * | 628 | * |
@@ -550,26 +633,35 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); | |||
550 | */ | 633 | */ |
551 | void tracing_off(void) | 634 | void tracing_off(void) |
552 | { | 635 | { |
553 | if (global_trace.trace_buffer.buffer) | 636 | tracer_tracing_off(&global_trace); |
554 | ring_buffer_record_off(global_trace.trace_buffer.buffer); | ||
555 | /* | ||
556 | * This flag is only looked at when buffers haven't been | ||
557 | * allocated yet. We don't really care about the race | ||
558 | * between setting this flag and actually turning | ||
559 | * on the buffer. | ||
560 | */ | ||
561 | global_trace.buffer_disabled = 1; | ||
562 | } | 637 | } |
563 | EXPORT_SYMBOL_GPL(tracing_off); | 638 | EXPORT_SYMBOL_GPL(tracing_off); |
564 | 639 | ||
640 | void disable_trace_on_warning(void) | ||
641 | { | ||
642 | if (__disable_trace_on_warning) | ||
643 | tracing_off(); | ||
644 | } | ||
645 | |||
646 | /** | ||
647 | * tracer_tracing_is_on - show real state of ring buffer enabled | ||
648 | * @tr : the trace array to know if ring buffer is enabled | ||
649 | * | ||
650 | * Shows real state of the ring buffer if it is enabled or not. | ||
651 | */ | ||
652 | static int tracer_tracing_is_on(struct trace_array *tr) | ||
653 | { | ||
654 | if (tr->trace_buffer.buffer) | ||
655 | return ring_buffer_record_is_on(tr->trace_buffer.buffer); | ||
656 | return !tr->buffer_disabled; | ||
657 | } | ||
658 | |||
565 | /** | 659 | /** |
566 | * tracing_is_on - show state of ring buffers enabled | 660 | * tracing_is_on - show state of ring buffers enabled |
567 | */ | 661 | */ |
568 | int tracing_is_on(void) | 662 | int tracing_is_on(void) |
569 | { | 663 | { |
570 | if (global_trace.trace_buffer.buffer) | 664 | return tracer_tracing_is_on(&global_trace); |
571 | return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); | ||
572 | return !global_trace.buffer_disabled; | ||
573 | } | 665 | } |
574 | EXPORT_SYMBOL_GPL(tracing_is_on); | 666 | EXPORT_SYMBOL_GPL(tracing_is_on); |
575 | 667 | ||
@@ -1543,15 +1635,6 @@ trace_function(struct trace_array *tr, | |||
1543 | __buffer_unlock_commit(buffer, event); | 1635 | __buffer_unlock_commit(buffer, event); |
1544 | } | 1636 | } |
1545 | 1637 | ||
1546 | void | ||
1547 | ftrace(struct trace_array *tr, struct trace_array_cpu *data, | ||
1548 | unsigned long ip, unsigned long parent_ip, unsigned long flags, | ||
1549 | int pc) | ||
1550 | { | ||
1551 | if (likely(!atomic_read(&data->disabled))) | ||
1552 | trace_function(tr, ip, parent_ip, flags, pc); | ||
1553 | } | ||
1554 | |||
1555 | #ifdef CONFIG_STACKTRACE | 1638 | #ifdef CONFIG_STACKTRACE |
1556 | 1639 | ||
1557 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) | 1640 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) |
@@ -2768,10 +2851,9 @@ static const struct seq_operations tracer_seq_ops = { | |||
2768 | }; | 2851 | }; |
2769 | 2852 | ||
2770 | static struct trace_iterator * | 2853 | static struct trace_iterator * |
2771 | __tracing_open(struct inode *inode, struct file *file, bool snapshot) | 2854 | __tracing_open(struct trace_array *tr, struct trace_cpu *tc, |
2855 | struct inode *inode, struct file *file, bool snapshot) | ||
2772 | { | 2856 | { |
2773 | struct trace_cpu *tc = inode->i_private; | ||
2774 | struct trace_array *tr = tc->tr; | ||
2775 | struct trace_iterator *iter; | 2857 | struct trace_iterator *iter; |
2776 | int cpu; | 2858 | int cpu; |
2777 | 2859 | ||
@@ -2850,8 +2932,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
2850 | tracing_iter_reset(iter, cpu); | 2932 | tracing_iter_reset(iter, cpu); |
2851 | } | 2933 | } |
2852 | 2934 | ||
2853 | tr->ref++; | ||
2854 | |||
2855 | mutex_unlock(&trace_types_lock); | 2935 | mutex_unlock(&trace_types_lock); |
2856 | 2936 | ||
2857 | return iter; | 2937 | return iter; |
@@ -2874,6 +2954,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) | |||
2874 | return 0; | 2954 | return 0; |
2875 | } | 2955 | } |
2876 | 2956 | ||
2957 | /* | ||
2958 | * Open and update trace_array ref count. | ||
2959 | * Must have the current trace_array passed to it. | ||
2960 | */ | ||
2961 | static int tracing_open_generic_tr(struct inode *inode, struct file *filp) | ||
2962 | { | ||
2963 | struct trace_array *tr = inode->i_private; | ||
2964 | |||
2965 | if (tracing_disabled) | ||
2966 | return -ENODEV; | ||
2967 | |||
2968 | if (trace_array_get(tr) < 0) | ||
2969 | return -ENODEV; | ||
2970 | |||
2971 | filp->private_data = inode->i_private; | ||
2972 | |||
2973 | return 0; | ||
2974 | |||
2975 | } | ||
2976 | |||
2977 | static int tracing_open_generic_tc(struct inode *inode, struct file *filp) | ||
2978 | { | ||
2979 | struct trace_cpu *tc = inode->i_private; | ||
2980 | struct trace_array *tr = tc->tr; | ||
2981 | |||
2982 | if (tracing_disabled) | ||
2983 | return -ENODEV; | ||
2984 | |||
2985 | if (trace_array_get(tr) < 0) | ||
2986 | return -ENODEV; | ||
2987 | |||
2988 | filp->private_data = inode->i_private; | ||
2989 | |||
2990 | return 0; | ||
2991 | |||
2992 | } | ||
2993 | |||
2877 | static int tracing_release(struct inode *inode, struct file *file) | 2994 | static int tracing_release(struct inode *inode, struct file *file) |
2878 | { | 2995 | { |
2879 | struct seq_file *m = file->private_data; | 2996 | struct seq_file *m = file->private_data; |
@@ -2881,17 +2998,19 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2881 | struct trace_array *tr; | 2998 | struct trace_array *tr; |
2882 | int cpu; | 2999 | int cpu; |
2883 | 3000 | ||
2884 | if (!(file->f_mode & FMODE_READ)) | 3001 | /* Writes do not use seq_file, need to grab tr from inode */ |
3002 | if (!(file->f_mode & FMODE_READ)) { | ||
3003 | struct trace_cpu *tc = inode->i_private; | ||
3004 | |||
3005 | trace_array_put(tc->tr); | ||
2885 | return 0; | 3006 | return 0; |
3007 | } | ||
2886 | 3008 | ||
2887 | iter = m->private; | 3009 | iter = m->private; |
2888 | tr = iter->tr; | 3010 | tr = iter->tr; |
2889 | 3011 | ||
2890 | mutex_lock(&trace_types_lock); | 3012 | mutex_lock(&trace_types_lock); |
2891 | 3013 | ||
2892 | WARN_ON(!tr->ref); | ||
2893 | tr->ref--; | ||
2894 | |||
2895 | for_each_tracing_cpu(cpu) { | 3014 | for_each_tracing_cpu(cpu) { |
2896 | if (iter->buffer_iter[cpu]) | 3015 | if (iter->buffer_iter[cpu]) |
2897 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 3016 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
@@ -2903,6 +3022,9 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2903 | if (!iter->snapshot) | 3022 | if (!iter->snapshot) |
2904 | /* reenable tracing if it was previously enabled */ | 3023 | /* reenable tracing if it was previously enabled */ |
2905 | tracing_start_tr(tr); | 3024 | tracing_start_tr(tr); |
3025 | |||
3026 | __trace_array_put(tr); | ||
3027 | |||
2906 | mutex_unlock(&trace_types_lock); | 3028 | mutex_unlock(&trace_types_lock); |
2907 | 3029 | ||
2908 | mutex_destroy(&iter->mutex); | 3030 | mutex_destroy(&iter->mutex); |
@@ -2910,20 +3032,49 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2910 | kfree(iter->trace); | 3032 | kfree(iter->trace); |
2911 | kfree(iter->buffer_iter); | 3033 | kfree(iter->buffer_iter); |
2912 | seq_release_private(inode, file); | 3034 | seq_release_private(inode, file); |
3035 | |||
2913 | return 0; | 3036 | return 0; |
2914 | } | 3037 | } |
2915 | 3038 | ||
3039 | static int tracing_release_generic_tr(struct inode *inode, struct file *file) | ||
3040 | { | ||
3041 | struct trace_array *tr = inode->i_private; | ||
3042 | |||
3043 | trace_array_put(tr); | ||
3044 | return 0; | ||
3045 | } | ||
3046 | |||
3047 | static int tracing_release_generic_tc(struct inode *inode, struct file *file) | ||
3048 | { | ||
3049 | struct trace_cpu *tc = inode->i_private; | ||
3050 | struct trace_array *tr = tc->tr; | ||
3051 | |||
3052 | trace_array_put(tr); | ||
3053 | return 0; | ||
3054 | } | ||
3055 | |||
3056 | static int tracing_single_release_tr(struct inode *inode, struct file *file) | ||
3057 | { | ||
3058 | struct trace_array *tr = inode->i_private; | ||
3059 | |||
3060 | trace_array_put(tr); | ||
3061 | |||
3062 | return single_release(inode, file); | ||
3063 | } | ||
3064 | |||
2916 | static int tracing_open(struct inode *inode, struct file *file) | 3065 | static int tracing_open(struct inode *inode, struct file *file) |
2917 | { | 3066 | { |
3067 | struct trace_cpu *tc = inode->i_private; | ||
3068 | struct trace_array *tr = tc->tr; | ||
2918 | struct trace_iterator *iter; | 3069 | struct trace_iterator *iter; |
2919 | int ret = 0; | 3070 | int ret = 0; |
2920 | 3071 | ||
3072 | if (trace_array_get(tr) < 0) | ||
3073 | return -ENODEV; | ||
3074 | |||
2921 | /* If this file was open for write, then erase contents */ | 3075 | /* If this file was open for write, then erase contents */ |
2922 | if ((file->f_mode & FMODE_WRITE) && | 3076 | if ((file->f_mode & FMODE_WRITE) && |
2923 | (file->f_flags & O_TRUNC)) { | 3077 | (file->f_flags & O_TRUNC)) { |
2924 | struct trace_cpu *tc = inode->i_private; | ||
2925 | struct trace_array *tr = tc->tr; | ||
2926 | |||
2927 | if (tc->cpu == RING_BUFFER_ALL_CPUS) | 3078 | if (tc->cpu == RING_BUFFER_ALL_CPUS) |
2928 | tracing_reset_online_cpus(&tr->trace_buffer); | 3079 | tracing_reset_online_cpus(&tr->trace_buffer); |
2929 | else | 3080 | else |
@@ -2931,12 +3082,16 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
2931 | } | 3082 | } |
2932 | 3083 | ||
2933 | if (file->f_mode & FMODE_READ) { | 3084 | if (file->f_mode & FMODE_READ) { |
2934 | iter = __tracing_open(inode, file, false); | 3085 | iter = __tracing_open(tr, tc, inode, file, false); |
2935 | if (IS_ERR(iter)) | 3086 | if (IS_ERR(iter)) |
2936 | ret = PTR_ERR(iter); | 3087 | ret = PTR_ERR(iter); |
2937 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) | 3088 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) |
2938 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | 3089 | iter->iter_flags |= TRACE_FILE_LAT_FMT; |
2939 | } | 3090 | } |
3091 | |||
3092 | if (ret < 0) | ||
3093 | trace_array_put(tr); | ||
3094 | |||
2940 | return ret; | 3095 | return ret; |
2941 | } | 3096 | } |
2942 | 3097 | ||
@@ -3293,17 +3448,27 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
3293 | 3448 | ||
3294 | static int tracing_trace_options_open(struct inode *inode, struct file *file) | 3449 | static int tracing_trace_options_open(struct inode *inode, struct file *file) |
3295 | { | 3450 | { |
3451 | struct trace_array *tr = inode->i_private; | ||
3452 | int ret; | ||
3453 | |||
3296 | if (tracing_disabled) | 3454 | if (tracing_disabled) |
3297 | return -ENODEV; | 3455 | return -ENODEV; |
3298 | 3456 | ||
3299 | return single_open(file, tracing_trace_options_show, inode->i_private); | 3457 | if (trace_array_get(tr) < 0) |
3458 | return -ENODEV; | ||
3459 | |||
3460 | ret = single_open(file, tracing_trace_options_show, inode->i_private); | ||
3461 | if (ret < 0) | ||
3462 | trace_array_put(tr); | ||
3463 | |||
3464 | return ret; | ||
3300 | } | 3465 | } |
3301 | 3466 | ||
3302 | static const struct file_operations tracing_iter_fops = { | 3467 | static const struct file_operations tracing_iter_fops = { |
3303 | .open = tracing_trace_options_open, | 3468 | .open = tracing_trace_options_open, |
3304 | .read = seq_read, | 3469 | .read = seq_read, |
3305 | .llseek = seq_lseek, | 3470 | .llseek = seq_lseek, |
3306 | .release = single_release, | 3471 | .release = tracing_single_release_tr, |
3307 | .write = tracing_trace_options_write, | 3472 | .write = tracing_trace_options_write, |
3308 | }; | 3473 | }; |
3309 | 3474 | ||
@@ -3379,14 +3544,14 @@ static const char readme_msg[] = | |||
3379 | "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" | 3544 | "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" |
3380 | "\t\t\t Read the contents for more information\n" | 3545 | "\t\t\t Read the contents for more information\n" |
3381 | #endif | 3546 | #endif |
3382 | #ifdef CONFIG_STACKTRACE | 3547 | #ifdef CONFIG_STACK_TRACER |
3383 | " stack_trace\t\t- Shows the max stack trace when active\n" | 3548 | " stack_trace\t\t- Shows the max stack trace when active\n" |
3384 | " stack_max_size\t- Shows current max stack size that was traced\n" | 3549 | " stack_max_size\t- Shows current max stack size that was traced\n" |
3385 | "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" | 3550 | "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" |
3386 | #ifdef CONFIG_DYNAMIC_FTRACE | 3551 | #ifdef CONFIG_DYNAMIC_FTRACE |
3387 | " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" | 3552 | " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" |
3388 | #endif | 3553 | #endif |
3389 | #endif /* CONFIG_STACKTRACE */ | 3554 | #endif /* CONFIG_STACK_TRACER */ |
3390 | ; | 3555 | ; |
3391 | 3556 | ||
3392 | static ssize_t | 3557 | static ssize_t |
@@ -3791,12 +3956,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
3791 | if (tracing_disabled) | 3956 | if (tracing_disabled) |
3792 | return -ENODEV; | 3957 | return -ENODEV; |
3793 | 3958 | ||
3959 | if (trace_array_get(tr) < 0) | ||
3960 | return -ENODEV; | ||
3961 | |||
3794 | mutex_lock(&trace_types_lock); | 3962 | mutex_lock(&trace_types_lock); |
3795 | 3963 | ||
3796 | /* create a buffer to store the information to pass to userspace */ | 3964 | /* create a buffer to store the information to pass to userspace */ |
3797 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 3965 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
3798 | if (!iter) { | 3966 | if (!iter) { |
3799 | ret = -ENOMEM; | 3967 | ret = -ENOMEM; |
3968 | __trace_array_put(tr); | ||
3800 | goto out; | 3969 | goto out; |
3801 | } | 3970 | } |
3802 | 3971 | ||
@@ -3843,6 +4012,7 @@ out: | |||
3843 | fail: | 4012 | fail: |
3844 | kfree(iter->trace); | 4013 | kfree(iter->trace); |
3845 | kfree(iter); | 4014 | kfree(iter); |
4015 | __trace_array_put(tr); | ||
3846 | mutex_unlock(&trace_types_lock); | 4016 | mutex_unlock(&trace_types_lock); |
3847 | return ret; | 4017 | return ret; |
3848 | } | 4018 | } |
@@ -3850,6 +4020,8 @@ fail: | |||
3850 | static int tracing_release_pipe(struct inode *inode, struct file *file) | 4020 | static int tracing_release_pipe(struct inode *inode, struct file *file) |
3851 | { | 4021 | { |
3852 | struct trace_iterator *iter = file->private_data; | 4022 | struct trace_iterator *iter = file->private_data; |
4023 | struct trace_cpu *tc = inode->i_private; | ||
4024 | struct trace_array *tr = tc->tr; | ||
3853 | 4025 | ||
3854 | mutex_lock(&trace_types_lock); | 4026 | mutex_lock(&trace_types_lock); |
3855 | 4027 | ||
@@ -3863,6 +4035,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
3863 | kfree(iter->trace); | 4035 | kfree(iter->trace); |
3864 | kfree(iter); | 4036 | kfree(iter); |
3865 | 4037 | ||
4038 | trace_array_put(tr); | ||
4039 | |||
3866 | return 0; | 4040 | return 0; |
3867 | } | 4041 | } |
3868 | 4042 | ||
@@ -3939,7 +4113,7 @@ static int tracing_wait_pipe(struct file *filp) | |||
3939 | * | 4113 | * |
3940 | * iter->pos will be 0 if we haven't read anything. | 4114 | * iter->pos will be 0 if we haven't read anything. |
3941 | */ | 4115 | */ |
3942 | if (!tracing_is_enabled() && iter->pos) | 4116 | if (!tracing_is_on() && iter->pos) |
3943 | break; | 4117 | break; |
3944 | } | 4118 | } |
3945 | 4119 | ||
@@ -4320,6 +4494,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) | |||
4320 | /* resize the ring buffer to 0 */ | 4494 | /* resize the ring buffer to 0 */ |
4321 | tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); | 4495 | tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); |
4322 | 4496 | ||
4497 | trace_array_put(tr); | ||
4498 | |||
4323 | return 0; | 4499 | return 0; |
4324 | } | 4500 | } |
4325 | 4501 | ||
@@ -4328,6 +4504,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
4328 | size_t cnt, loff_t *fpos) | 4504 | size_t cnt, loff_t *fpos) |
4329 | { | 4505 | { |
4330 | unsigned long addr = (unsigned long)ubuf; | 4506 | unsigned long addr = (unsigned long)ubuf; |
4507 | struct trace_array *tr = filp->private_data; | ||
4331 | struct ring_buffer_event *event; | 4508 | struct ring_buffer_event *event; |
4332 | struct ring_buffer *buffer; | 4509 | struct ring_buffer *buffer; |
4333 | struct print_entry *entry; | 4510 | struct print_entry *entry; |
@@ -4387,7 +4564,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
4387 | 4564 | ||
4388 | local_save_flags(irq_flags); | 4565 | local_save_flags(irq_flags); |
4389 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ | 4566 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ |
4390 | buffer = global_trace.trace_buffer.buffer; | 4567 | buffer = tr->trace_buffer.buffer; |
4391 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | 4568 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
4392 | irq_flags, preempt_count()); | 4569 | irq_flags, preempt_count()); |
4393 | if (!event) { | 4570 | if (!event) { |
@@ -4495,10 +4672,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | |||
4495 | 4672 | ||
4496 | static int tracing_clock_open(struct inode *inode, struct file *file) | 4673 | static int tracing_clock_open(struct inode *inode, struct file *file) |
4497 | { | 4674 | { |
4675 | struct trace_array *tr = inode->i_private; | ||
4676 | int ret; | ||
4677 | |||
4498 | if (tracing_disabled) | 4678 | if (tracing_disabled) |
4499 | return -ENODEV; | 4679 | return -ENODEV; |
4500 | 4680 | ||
4501 | return single_open(file, tracing_clock_show, inode->i_private); | 4681 | if (trace_array_get(tr)) |
4682 | return -ENODEV; | ||
4683 | |||
4684 | ret = single_open(file, tracing_clock_show, inode->i_private); | ||
4685 | if (ret < 0) | ||
4686 | trace_array_put(tr); | ||
4687 | |||
4688 | return ret; | ||
4502 | } | 4689 | } |
4503 | 4690 | ||
4504 | struct ftrace_buffer_info { | 4691 | struct ftrace_buffer_info { |
@@ -4511,30 +4698,40 @@ struct ftrace_buffer_info { | |||
4511 | static int tracing_snapshot_open(struct inode *inode, struct file *file) | 4698 | static int tracing_snapshot_open(struct inode *inode, struct file *file) |
4512 | { | 4699 | { |
4513 | struct trace_cpu *tc = inode->i_private; | 4700 | struct trace_cpu *tc = inode->i_private; |
4701 | struct trace_array *tr = tc->tr; | ||
4514 | struct trace_iterator *iter; | 4702 | struct trace_iterator *iter; |
4515 | struct seq_file *m; | 4703 | struct seq_file *m; |
4516 | int ret = 0; | 4704 | int ret = 0; |
4517 | 4705 | ||
4706 | if (trace_array_get(tr) < 0) | ||
4707 | return -ENODEV; | ||
4708 | |||
4518 | if (file->f_mode & FMODE_READ) { | 4709 | if (file->f_mode & FMODE_READ) { |
4519 | iter = __tracing_open(inode, file, true); | 4710 | iter = __tracing_open(tr, tc, inode, file, true); |
4520 | if (IS_ERR(iter)) | 4711 | if (IS_ERR(iter)) |
4521 | ret = PTR_ERR(iter); | 4712 | ret = PTR_ERR(iter); |
4522 | } else { | 4713 | } else { |
4523 | /* Writes still need the seq_file to hold the private data */ | 4714 | /* Writes still need the seq_file to hold the private data */ |
4715 | ret = -ENOMEM; | ||
4524 | m = kzalloc(sizeof(*m), GFP_KERNEL); | 4716 | m = kzalloc(sizeof(*m), GFP_KERNEL); |
4525 | if (!m) | 4717 | if (!m) |
4526 | return -ENOMEM; | 4718 | goto out; |
4527 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 4719 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
4528 | if (!iter) { | 4720 | if (!iter) { |
4529 | kfree(m); | 4721 | kfree(m); |
4530 | return -ENOMEM; | 4722 | goto out; |
4531 | } | 4723 | } |
4532 | iter->tr = tc->tr; | 4724 | ret = 0; |
4725 | |||
4726 | iter->tr = tr; | ||
4533 | iter->trace_buffer = &tc->tr->max_buffer; | 4727 | iter->trace_buffer = &tc->tr->max_buffer; |
4534 | iter->cpu_file = tc->cpu; | 4728 | iter->cpu_file = tc->cpu; |
4535 | m->private = iter; | 4729 | m->private = iter; |
4536 | file->private_data = m; | 4730 | file->private_data = m; |
4537 | } | 4731 | } |
4732 | out: | ||
4733 | if (ret < 0) | ||
4734 | trace_array_put(tr); | ||
4538 | 4735 | ||
4539 | return ret; | 4736 | return ret; |
4540 | } | 4737 | } |
@@ -4616,9 +4813,12 @@ out: | |||
4616 | static int tracing_snapshot_release(struct inode *inode, struct file *file) | 4813 | static int tracing_snapshot_release(struct inode *inode, struct file *file) |
4617 | { | 4814 | { |
4618 | struct seq_file *m = file->private_data; | 4815 | struct seq_file *m = file->private_data; |
4816 | int ret; | ||
4817 | |||
4818 | ret = tracing_release(inode, file); | ||
4619 | 4819 | ||
4620 | if (file->f_mode & FMODE_READ) | 4820 | if (file->f_mode & FMODE_READ) |
4621 | return tracing_release(inode, file); | 4821 | return ret; |
4622 | 4822 | ||
4623 | /* If write only, the seq_file is just a stub */ | 4823 | /* If write only, the seq_file is just a stub */ |
4624 | if (m) | 4824 | if (m) |
@@ -4684,34 +4884,38 @@ static const struct file_operations tracing_pipe_fops = { | |||
4684 | }; | 4884 | }; |
4685 | 4885 | ||
4686 | static const struct file_operations tracing_entries_fops = { | 4886 | static const struct file_operations tracing_entries_fops = { |
4687 | .open = tracing_open_generic, | 4887 | .open = tracing_open_generic_tc, |
4688 | .read = tracing_entries_read, | 4888 | .read = tracing_entries_read, |
4689 | .write = tracing_entries_write, | 4889 | .write = tracing_entries_write, |
4690 | .llseek = generic_file_llseek, | 4890 | .llseek = generic_file_llseek, |
4891 | .release = tracing_release_generic_tc, | ||
4691 | }; | 4892 | }; |
4692 | 4893 | ||
4693 | static const struct file_operations tracing_total_entries_fops = { | 4894 | static const struct file_operations tracing_total_entries_fops = { |
4694 | .open = tracing_open_generic, | 4895 | .open = tracing_open_generic_tr, |
4695 | .read = tracing_total_entries_read, | 4896 | .read = tracing_total_entries_read, |
4696 | .llseek = generic_file_llseek, | 4897 | .llseek = generic_file_llseek, |
4898 | .release = tracing_release_generic_tr, | ||
4697 | }; | 4899 | }; |
4698 | 4900 | ||
4699 | static const struct file_operations tracing_free_buffer_fops = { | 4901 | static const struct file_operations tracing_free_buffer_fops = { |
4902 | .open = tracing_open_generic_tr, | ||
4700 | .write = tracing_free_buffer_write, | 4903 | .write = tracing_free_buffer_write, |
4701 | .release = tracing_free_buffer_release, | 4904 | .release = tracing_free_buffer_release, |
4702 | }; | 4905 | }; |
4703 | 4906 | ||
4704 | static const struct file_operations tracing_mark_fops = { | 4907 | static const struct file_operations tracing_mark_fops = { |
4705 | .open = tracing_open_generic, | 4908 | .open = tracing_open_generic_tr, |
4706 | .write = tracing_mark_write, | 4909 | .write = tracing_mark_write, |
4707 | .llseek = generic_file_llseek, | 4910 | .llseek = generic_file_llseek, |
4911 | .release = tracing_release_generic_tr, | ||
4708 | }; | 4912 | }; |
4709 | 4913 | ||
4710 | static const struct file_operations trace_clock_fops = { | 4914 | static const struct file_operations trace_clock_fops = { |
4711 | .open = tracing_clock_open, | 4915 | .open = tracing_clock_open, |
4712 | .read = seq_read, | 4916 | .read = seq_read, |
4713 | .llseek = seq_lseek, | 4917 | .llseek = seq_lseek, |
4714 | .release = single_release, | 4918 | .release = tracing_single_release_tr, |
4715 | .write = tracing_clock_write, | 4919 | .write = tracing_clock_write, |
4716 | }; | 4920 | }; |
4717 | 4921 | ||
@@ -4739,18 +4943,22 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
4739 | struct trace_cpu *tc = inode->i_private; | 4943 | struct trace_cpu *tc = inode->i_private; |
4740 | struct trace_array *tr = tc->tr; | 4944 | struct trace_array *tr = tc->tr; |
4741 | struct ftrace_buffer_info *info; | 4945 | struct ftrace_buffer_info *info; |
4946 | int ret; | ||
4742 | 4947 | ||
4743 | if (tracing_disabled) | 4948 | if (tracing_disabled) |
4744 | return -ENODEV; | 4949 | return -ENODEV; |
4745 | 4950 | ||
4951 | if (trace_array_get(tr) < 0) | ||
4952 | return -ENODEV; | ||
4953 | |||
4746 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 4954 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
4747 | if (!info) | 4955 | if (!info) { |
4956 | trace_array_put(tr); | ||
4748 | return -ENOMEM; | 4957 | return -ENOMEM; |
4958 | } | ||
4749 | 4959 | ||
4750 | mutex_lock(&trace_types_lock); | 4960 | mutex_lock(&trace_types_lock); |
4751 | 4961 | ||
4752 | tr->ref++; | ||
4753 | |||
4754 | info->iter.tr = tr; | 4962 | info->iter.tr = tr; |
4755 | info->iter.cpu_file = tc->cpu; | 4963 | info->iter.cpu_file = tc->cpu; |
4756 | info->iter.trace = tr->current_trace; | 4964 | info->iter.trace = tr->current_trace; |
@@ -4763,7 +4971,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
4763 | 4971 | ||
4764 | mutex_unlock(&trace_types_lock); | 4972 | mutex_unlock(&trace_types_lock); |
4765 | 4973 | ||
4766 | return nonseekable_open(inode, filp); | 4974 | ret = nonseekable_open(inode, filp); |
4975 | if (ret < 0) | ||
4976 | trace_array_put(tr); | ||
4977 | |||
4978 | return ret; | ||
4767 | } | 4979 | } |
4768 | 4980 | ||
4769 | static unsigned int | 4981 | static unsigned int |
@@ -4863,8 +5075,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) | |||
4863 | 5075 | ||
4864 | mutex_lock(&trace_types_lock); | 5076 | mutex_lock(&trace_types_lock); |
4865 | 5077 | ||
4866 | WARN_ON(!iter->tr->ref); | 5078 | __trace_array_put(iter->tr); |
4867 | iter->tr->ref--; | ||
4868 | 5079 | ||
4869 | if (info->spare) | 5080 | if (info->spare) |
4870 | ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); | 5081 | ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); |
@@ -5126,9 +5337,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
5126 | } | 5337 | } |
5127 | 5338 | ||
5128 | static const struct file_operations tracing_stats_fops = { | 5339 | static const struct file_operations tracing_stats_fops = { |
5129 | .open = tracing_open_generic, | 5340 | .open = tracing_open_generic_tc, |
5130 | .read = tracing_stats_read, | 5341 | .read = tracing_stats_read, |
5131 | .llseek = generic_file_llseek, | 5342 | .llseek = generic_file_llseek, |
5343 | .release = tracing_release_generic_tc, | ||
5132 | }; | 5344 | }; |
5133 | 5345 | ||
5134 | #ifdef CONFIG_DYNAMIC_FTRACE | 5346 | #ifdef CONFIG_DYNAMIC_FTRACE |
@@ -5612,15 +5824,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, | |||
5612 | size_t cnt, loff_t *ppos) | 5824 | size_t cnt, loff_t *ppos) |
5613 | { | 5825 | { |
5614 | struct trace_array *tr = filp->private_data; | 5826 | struct trace_array *tr = filp->private_data; |
5615 | struct ring_buffer *buffer = tr->trace_buffer.buffer; | ||
5616 | char buf[64]; | 5827 | char buf[64]; |
5617 | int r; | 5828 | int r; |
5618 | 5829 | ||
5619 | if (buffer) | 5830 | r = tracer_tracing_is_on(tr); |
5620 | r = ring_buffer_record_is_on(buffer); | ||
5621 | else | ||
5622 | r = 0; | ||
5623 | |||
5624 | r = sprintf(buf, "%d\n", r); | 5831 | r = sprintf(buf, "%d\n", r); |
5625 | 5832 | ||
5626 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 5833 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
@@ -5642,11 +5849,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
5642 | if (buffer) { | 5849 | if (buffer) { |
5643 | mutex_lock(&trace_types_lock); | 5850 | mutex_lock(&trace_types_lock); |
5644 | if (val) { | 5851 | if (val) { |
5645 | ring_buffer_record_on(buffer); | 5852 | tracer_tracing_on(tr); |
5646 | if (tr->current_trace->start) | 5853 | if (tr->current_trace->start) |
5647 | tr->current_trace->start(tr); | 5854 | tr->current_trace->start(tr); |
5648 | } else { | 5855 | } else { |
5649 | ring_buffer_record_off(buffer); | 5856 | tracer_tracing_off(tr); |
5650 | if (tr->current_trace->stop) | 5857 | if (tr->current_trace->stop) |
5651 | tr->current_trace->stop(tr); | 5858 | tr->current_trace->stop(tr); |
5652 | } | 5859 | } |
@@ -5659,9 +5866,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
5659 | } | 5866 | } |
5660 | 5867 | ||
5661 | static const struct file_operations rb_simple_fops = { | 5868 | static const struct file_operations rb_simple_fops = { |
5662 | .open = tracing_open_generic, | 5869 | .open = tracing_open_generic_tr, |
5663 | .read = rb_simple_read, | 5870 | .read = rb_simple_read, |
5664 | .write = rb_simple_write, | 5871 | .write = rb_simple_write, |
5872 | .release = tracing_release_generic_tr, | ||
5665 | .llseek = default_llseek, | 5873 | .llseek = default_llseek, |
5666 | }; | 5874 | }; |
5667 | 5875 | ||
@@ -5775,8 +5983,10 @@ static int new_instance_create(const char *name) | |||
5775 | goto out_free_tr; | 5983 | goto out_free_tr; |
5776 | 5984 | ||
5777 | ret = event_trace_add_tracer(tr->dir, tr); | 5985 | ret = event_trace_add_tracer(tr->dir, tr); |
5778 | if (ret) | 5986 | if (ret) { |
5987 | debugfs_remove_recursive(tr->dir); | ||
5779 | goto out_free_tr; | 5988 | goto out_free_tr; |
5989 | } | ||
5780 | 5990 | ||
5781 | init_tracer_debugfs(tr, tr->dir); | 5991 | init_tracer_debugfs(tr, tr->dir); |
5782 | 5992 | ||
@@ -5933,7 +6143,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) | |||
5933 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, | 6143 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, |
5934 | tr, &tracing_total_entries_fops); | 6144 | tr, &tracing_total_entries_fops); |
5935 | 6145 | ||
5936 | trace_create_file("free_buffer", 0644, d_tracer, | 6146 | trace_create_file("free_buffer", 0200, d_tracer, |
5937 | tr, &tracing_free_buffer_fops); | 6147 | tr, &tracing_free_buffer_fops); |
5938 | 6148 | ||
5939 | trace_create_file("trace_marker", 0220, d_tracer, | 6149 | trace_create_file("trace_marker", 0220, d_tracer, |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed88c5c..e7d643b8a907 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -214,7 +214,6 @@ struct trace_array { | |||
214 | struct dentry *event_dir; | 214 | struct dentry *event_dir; |
215 | struct list_head systems; | 215 | struct list_head systems; |
216 | struct list_head events; | 216 | struct list_head events; |
217 | struct task_struct *waiter; | ||
218 | int ref; | 217 | int ref; |
219 | }; | 218 | }; |
220 | 219 | ||
@@ -224,6 +223,11 @@ enum { | |||
224 | 223 | ||
225 | extern struct list_head ftrace_trace_arrays; | 224 | extern struct list_head ftrace_trace_arrays; |
226 | 225 | ||
226 | extern struct mutex trace_types_lock; | ||
227 | |||
228 | extern int trace_array_get(struct trace_array *tr); | ||
229 | extern void trace_array_put(struct trace_array *tr); | ||
230 | |||
227 | /* | 231 | /* |
228 | * The global tracer (top) should be the first trace array added, | 232 | * The global tracer (top) should be the first trace array added, |
229 | * but we check the flag anyway. | 233 | * but we check the flag anyway. |
@@ -554,11 +558,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu); | |||
554 | 558 | ||
555 | void poll_wait_pipe(struct trace_iterator *iter); | 559 | void poll_wait_pipe(struct trace_iterator *iter); |
556 | 560 | ||
557 | void ftrace(struct trace_array *tr, | ||
558 | struct trace_array_cpu *data, | ||
559 | unsigned long ip, | ||
560 | unsigned long parent_ip, | ||
561 | unsigned long flags, int pc); | ||
562 | void tracing_sched_switch_trace(struct trace_array *tr, | 561 | void tracing_sched_switch_trace(struct trace_array *tr, |
563 | struct task_struct *prev, | 562 | struct task_struct *prev, |
564 | struct task_struct *next, | 563 | struct task_struct *next, |
@@ -680,6 +679,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, | |||
680 | struct trace_array *tr); | 679 | struct trace_array *tr); |
681 | extern int trace_selftest_startup_branch(struct tracer *trace, | 680 | extern int trace_selftest_startup_branch(struct tracer *trace, |
682 | struct trace_array *tr); | 681 | struct trace_array *tr); |
682 | /* | ||
683 | * Tracer data references selftest functions that only occur | ||
684 | * on boot up. These can be __init functions. Thus, when selftests | ||
685 | * are enabled, then the tracers need to reference __init functions. | ||
686 | */ | ||
687 | #define __tracer_data __refdata | ||
688 | #else | ||
689 | /* Tracers are seldom changed. Optimize when selftests are disabled. */ | ||
690 | #define __tracer_data __read_mostly | ||
683 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 691 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
684 | 692 | ||
685 | extern void *head_page(struct trace_array_cpu *data); | 693 | extern void *head_page(struct trace_array_cpu *data); |
@@ -774,6 +782,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
774 | extern struct list_head ftrace_pids; | 782 | extern struct list_head ftrace_pids; |
775 | 783 | ||
776 | #ifdef CONFIG_FUNCTION_TRACER | 784 | #ifdef CONFIG_FUNCTION_TRACER |
785 | extern bool ftrace_filter_param __initdata; | ||
777 | static inline int ftrace_trace_task(struct task_struct *task) | 786 | static inline int ftrace_trace_task(struct task_struct *task) |
778 | { | 787 | { |
779 | if (list_empty(&ftrace_pids)) | 788 | if (list_empty(&ftrace_pids)) |
@@ -899,12 +908,6 @@ static inline void trace_branch_disable(void) | |||
899 | /* set ring buffers to default size if not already done so */ | 908 | /* set ring buffers to default size if not already done so */ |
900 | int tracing_update_buffers(void); | 909 | int tracing_update_buffers(void); |
901 | 910 | ||
902 | /* trace event type bit fields, not numeric */ | ||
903 | enum { | ||
904 | TRACE_EVENT_TYPE_PRINTF = 1, | ||
905 | TRACE_EVENT_TYPE_RAW = 2, | ||
906 | }; | ||
907 | |||
908 | struct ftrace_event_field { | 911 | struct ftrace_event_field { |
909 | struct list_head link; | 912 | struct list_head link; |
910 | const char *name; | 913 | const char *name; |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 84b1e045faba..80c36bcf66e8 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
236 | 236 | ||
237 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 237 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
238 | 238 | ||
239 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
240 | "perf buffer not large enough")) | ||
241 | return NULL; | ||
242 | |||
239 | pc = preempt_count(); | 243 | pc = preempt_count(); |
240 | 244 | ||
241 | *rctxp = perf_swevent_get_recursion_context(); | 245 | *rctxp = perf_swevent_get_recursion_context(); |
@@ -266,6 +270,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, | |||
266 | struct pt_regs regs; | 270 | struct pt_regs regs; |
267 | int rctx; | 271 | int rctx; |
268 | 272 | ||
273 | head = this_cpu_ptr(event_function.perf_events); | ||
274 | if (hlist_empty(head)) | ||
275 | return; | ||
276 | |||
269 | #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ | 277 | #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ |
270 | sizeof(u64)) - sizeof(u32)) | 278 | sizeof(u64)) - sizeof(u32)) |
271 | 279 | ||
@@ -279,8 +287,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, | |||
279 | 287 | ||
280 | entry->ip = ip; | 288 | entry->ip = ip; |
281 | entry->parent_ip = parent_ip; | 289 | entry->parent_ip = parent_ip; |
282 | |||
283 | head = this_cpu_ptr(event_function.perf_events); | ||
284 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, | 290 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, |
285 | 1, ®s, head, NULL); | 291 | 1, ®s, head, NULL); |
286 | 292 | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2bf4bf..898f868833f2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); | |||
41 | static struct kmem_cache *field_cachep; | 41 | static struct kmem_cache *field_cachep; |
42 | static struct kmem_cache *file_cachep; | 42 | static struct kmem_cache *file_cachep; |
43 | 43 | ||
44 | #define SYSTEM_FL_FREE_NAME (1 << 31) | ||
45 | |||
46 | static inline int system_refcount(struct event_subsystem *system) | ||
47 | { | ||
48 | return system->ref_count & ~SYSTEM_FL_FREE_NAME; | ||
49 | } | ||
50 | |||
51 | static int system_refcount_inc(struct event_subsystem *system) | ||
52 | { | ||
53 | return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; | ||
54 | } | ||
55 | |||
56 | static int system_refcount_dec(struct event_subsystem *system) | ||
57 | { | ||
58 | return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; | ||
59 | } | ||
60 | |||
44 | /* Double loops, do not use break, only goto's work */ | 61 | /* Double loops, do not use break, only goto's work */ |
45 | #define do_for_each_event_file(tr, file) \ | 62 | #define do_for_each_event_file(tr, file) \ |
46 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ | 63 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ |
@@ -97,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type, | |||
97 | 114 | ||
98 | field = kmem_cache_alloc(field_cachep, GFP_TRACE); | 115 | field = kmem_cache_alloc(field_cachep, GFP_TRACE); |
99 | if (!field) | 116 | if (!field) |
100 | goto err; | 117 | return -ENOMEM; |
101 | 118 | ||
102 | field->name = name; | 119 | field->name = name; |
103 | field->type = type; | 120 | field->type = type; |
@@ -114,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type, | |||
114 | list_add(&field->link, head); | 131 | list_add(&field->link, head); |
115 | 132 | ||
116 | return 0; | 133 | return 0; |
117 | |||
118 | err: | ||
119 | kmem_cache_free(field_cachep, field); | ||
120 | |||
121 | return -ENOMEM; | ||
122 | } | 134 | } |
123 | 135 | ||
124 | int trace_define_field(struct ftrace_event_call *call, const char *type, | 136 | int trace_define_field(struct ftrace_event_call *call, const char *type, |
@@ -279,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, | |||
279 | } | 291 | } |
280 | call->class->reg(call, TRACE_REG_UNREGISTER, file); | 292 | call->class->reg(call, TRACE_REG_UNREGISTER, file); |
281 | } | 293 | } |
282 | /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ | 294 | /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ |
283 | if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) | 295 | if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) |
284 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | 296 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); |
297 | else | ||
298 | clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | ||
285 | break; | 299 | break; |
286 | case 1: | 300 | case 1: |
287 | /* | 301 | /* |
@@ -349,8 +363,8 @@ static void __put_system(struct event_subsystem *system) | |||
349 | { | 363 | { |
350 | struct event_filter *filter = system->filter; | 364 | struct event_filter *filter = system->filter; |
351 | 365 | ||
352 | WARN_ON_ONCE(system->ref_count == 0); | 366 | WARN_ON_ONCE(system_refcount(system) == 0); |
353 | if (--system->ref_count) | 367 | if (system_refcount_dec(system)) |
354 | return; | 368 | return; |
355 | 369 | ||
356 | list_del(&system->list); | 370 | list_del(&system->list); |
@@ -359,13 +373,15 @@ static void __put_system(struct event_subsystem *system) | |||
359 | kfree(filter->filter_string); | 373 | kfree(filter->filter_string); |
360 | kfree(filter); | 374 | kfree(filter); |
361 | } | 375 | } |
376 | if (system->ref_count & SYSTEM_FL_FREE_NAME) | ||
377 | kfree(system->name); | ||
362 | kfree(system); | 378 | kfree(system); |
363 | } | 379 | } |
364 | 380 | ||
365 | static void __get_system(struct event_subsystem *system) | 381 | static void __get_system(struct event_subsystem *system) |
366 | { | 382 | { |
367 | WARN_ON_ONCE(system->ref_count == 0); | 383 | WARN_ON_ONCE(system_refcount(system) == 0); |
368 | system->ref_count++; | 384 | system_refcount_inc(system); |
369 | } | 385 | } |
370 | 386 | ||
371 | static void __get_system_dir(struct ftrace_subsystem_dir *dir) | 387 | static void __get_system_dir(struct ftrace_subsystem_dir *dir) |
@@ -379,7 +395,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) | |||
379 | { | 395 | { |
380 | WARN_ON_ONCE(dir->ref_count == 0); | 396 | WARN_ON_ONCE(dir->ref_count == 0); |
381 | /* If the subsystem is about to be freed, the dir must be too */ | 397 | /* If the subsystem is about to be freed, the dir must be too */ |
382 | WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); | 398 | WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); |
383 | 399 | ||
384 | __put_system(dir->subsystem); | 400 | __put_system(dir->subsystem); |
385 | if (!--dir->ref_count) | 401 | if (!--dir->ref_count) |
@@ -394,16 +410,45 @@ static void put_system(struct ftrace_subsystem_dir *dir) | |||
394 | } | 410 | } |
395 | 411 | ||
396 | /* | 412 | /* |
413 | * Open and update trace_array ref count. | ||
414 | * Must have the current trace_array passed to it. | ||
415 | */ | ||
416 | static int tracing_open_generic_file(struct inode *inode, struct file *filp) | ||
417 | { | ||
418 | struct ftrace_event_file *file = inode->i_private; | ||
419 | struct trace_array *tr = file->tr; | ||
420 | int ret; | ||
421 | |||
422 | if (trace_array_get(tr) < 0) | ||
423 | return -ENODEV; | ||
424 | |||
425 | ret = tracing_open_generic(inode, filp); | ||
426 | if (ret < 0) | ||
427 | trace_array_put(tr); | ||
428 | return ret; | ||
429 | } | ||
430 | |||
431 | static int tracing_release_generic_file(struct inode *inode, struct file *filp) | ||
432 | { | ||
433 | struct ftrace_event_file *file = inode->i_private; | ||
434 | struct trace_array *tr = file->tr; | ||
435 | |||
436 | trace_array_put(tr); | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | /* | ||
397 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. | 442 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. |
398 | */ | 443 | */ |
399 | static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, | 444 | static int |
400 | const char *sub, const char *event, int set) | 445 | __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, |
446 | const char *sub, const char *event, int set) | ||
401 | { | 447 | { |
402 | struct ftrace_event_file *file; | 448 | struct ftrace_event_file *file; |
403 | struct ftrace_event_call *call; | 449 | struct ftrace_event_call *call; |
404 | int ret = -EINVAL; | 450 | int ret = -EINVAL; |
405 | 451 | ||
406 | mutex_lock(&event_mutex); | ||
407 | list_for_each_entry(file, &tr->events, list) { | 452 | list_for_each_entry(file, &tr->events, list) { |
408 | 453 | ||
409 | call = file->event_call; | 454 | call = file->event_call; |
@@ -429,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, | |||
429 | 474 | ||
430 | ret = 0; | 475 | ret = 0; |
431 | } | 476 | } |
477 | |||
478 | return ret; | ||
479 | } | ||
480 | |||
481 | static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, | ||
482 | const char *sub, const char *event, int set) | ||
483 | { | ||
484 | int ret; | ||
485 | |||
486 | mutex_lock(&event_mutex); | ||
487 | ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); | ||
432 | mutex_unlock(&event_mutex); | 488 | mutex_unlock(&event_mutex); |
433 | 489 | ||
434 | return ret; | 490 | return ret; |
@@ -624,17 +680,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
624 | loff_t *ppos) | 680 | loff_t *ppos) |
625 | { | 681 | { |
626 | struct ftrace_event_file *file = filp->private_data; | 682 | struct ftrace_event_file *file = filp->private_data; |
627 | char *buf; | 683 | char buf[4] = "0"; |
628 | 684 | ||
629 | if (file->flags & FTRACE_EVENT_FL_ENABLED) { | 685 | if (file->flags & FTRACE_EVENT_FL_ENABLED && |
630 | if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) | 686 | !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) |
631 | buf = "0*\n"; | 687 | strcpy(buf, "1"); |
632 | else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) | 688 | |
633 | buf = "1*\n"; | 689 | if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || |
634 | else | 690 | file->flags & FTRACE_EVENT_FL_SOFT_MODE) |
635 | buf = "1\n"; | 691 | strcat(buf, "*"); |
636 | } else | 692 | |
637 | buf = "0\n"; | 693 | strcat(buf, "\n"); |
638 | 694 | ||
639 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); | 695 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); |
640 | } | 696 | } |
@@ -770,59 +826,33 @@ enum { | |||
770 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) | 826 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) |
771 | { | 827 | { |
772 | struct ftrace_event_call *call = m->private; | 828 | struct ftrace_event_call *call = m->private; |
773 | struct ftrace_event_field *field; | ||
774 | struct list_head *common_head = &ftrace_common_fields; | 829 | struct list_head *common_head = &ftrace_common_fields; |
775 | struct list_head *head = trace_get_fields(call); | 830 | struct list_head *head = trace_get_fields(call); |
831 | struct list_head *node = v; | ||
776 | 832 | ||
777 | (*pos)++; | 833 | (*pos)++; |
778 | 834 | ||
779 | switch ((unsigned long)v) { | 835 | switch ((unsigned long)v) { |
780 | case FORMAT_HEADER: | 836 | case FORMAT_HEADER: |
781 | if (unlikely(list_empty(common_head))) | 837 | node = common_head; |
782 | return NULL; | 838 | break; |
783 | |||
784 | field = list_entry(common_head->prev, | ||
785 | struct ftrace_event_field, link); | ||
786 | return field; | ||
787 | 839 | ||
788 | case FORMAT_FIELD_SEPERATOR: | 840 | case FORMAT_FIELD_SEPERATOR: |
789 | if (unlikely(list_empty(head))) | 841 | node = head; |
790 | return NULL; | 842 | break; |
791 | |||
792 | field = list_entry(head->prev, struct ftrace_event_field, link); | ||
793 | return field; | ||
794 | 843 | ||
795 | case FORMAT_PRINTFMT: | 844 | case FORMAT_PRINTFMT: |
796 | /* all done */ | 845 | /* all done */ |
797 | return NULL; | 846 | return NULL; |
798 | } | 847 | } |
799 | 848 | ||
800 | field = v; | 849 | node = node->prev; |
801 | if (field->link.prev == common_head) | 850 | if (node == common_head) |
802 | return (void *)FORMAT_FIELD_SEPERATOR; | 851 | return (void *)FORMAT_FIELD_SEPERATOR; |
803 | else if (field->link.prev == head) | 852 | else if (node == head) |
804 | return (void *)FORMAT_PRINTFMT; | 853 | return (void *)FORMAT_PRINTFMT; |
805 | 854 | else | |
806 | field = list_entry(field->link.prev, struct ftrace_event_field, link); | 855 | return node; |
807 | |||
808 | return field; | ||
809 | } | ||
810 | |||
811 | static void *f_start(struct seq_file *m, loff_t *pos) | ||
812 | { | ||
813 | loff_t l = 0; | ||
814 | void *p; | ||
815 | |||
816 | /* Start by showing the header */ | ||
817 | if (!*pos) | ||
818 | return (void *)FORMAT_HEADER; | ||
819 | |||
820 | p = (void *)FORMAT_HEADER; | ||
821 | do { | ||
822 | p = f_next(m, p, &l); | ||
823 | } while (p && l < *pos); | ||
824 | |||
825 | return p; | ||
826 | } | 856 | } |
827 | 857 | ||
828 | static int f_show(struct seq_file *m, void *v) | 858 | static int f_show(struct seq_file *m, void *v) |
@@ -848,8 +878,7 @@ static int f_show(struct seq_file *m, void *v) | |||
848 | return 0; | 878 | return 0; |
849 | } | 879 | } |
850 | 880 | ||
851 | field = v; | 881 | field = list_entry(v, struct ftrace_event_field, link); |
852 | |||
853 | /* | 882 | /* |
854 | * Smartly shows the array type(except dynamic array). | 883 | * Smartly shows the array type(except dynamic array). |
855 | * Normal: | 884 | * Normal: |
@@ -876,6 +905,17 @@ static int f_show(struct seq_file *m, void *v) | |||
876 | return 0; | 905 | return 0; |
877 | } | 906 | } |
878 | 907 | ||
908 | static void *f_start(struct seq_file *m, loff_t *pos) | ||
909 | { | ||
910 | void *p = (void *)FORMAT_HEADER; | ||
911 | loff_t l = 0; | ||
912 | |||
913 | while (l < *pos && p) | ||
914 | p = f_next(m, p, &l); | ||
915 | |||
916 | return p; | ||
917 | } | ||
918 | |||
879 | static void f_stop(struct seq_file *m, void *p) | 919 | static void f_stop(struct seq_file *m, void *p) |
880 | { | 920 | { |
881 | } | 921 | } |
@@ -907,23 +947,14 @@ static ssize_t | |||
907 | event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | 947 | event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) |
908 | { | 948 | { |
909 | struct ftrace_event_call *call = filp->private_data; | 949 | struct ftrace_event_call *call = filp->private_data; |
910 | struct trace_seq *s; | 950 | char buf[32]; |
911 | int r; | 951 | int len; |
912 | 952 | ||
913 | if (*ppos) | 953 | if (*ppos) |
914 | return 0; | 954 | return 0; |
915 | 955 | ||
916 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 956 | len = sprintf(buf, "%d\n", call->event.type); |
917 | if (!s) | 957 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); |
918 | return -ENOMEM; | ||
919 | |||
920 | trace_seq_init(s); | ||
921 | trace_seq_printf(s, "%d\n", call->event.type); | ||
922 | |||
923 | r = simple_read_from_buffer(ubuf, cnt, ppos, | ||
924 | s->buffer, s->len); | ||
925 | kfree(s); | ||
926 | return r; | ||
927 | } | 958 | } |
928 | 959 | ||
929 | static ssize_t | 960 | static ssize_t |
@@ -992,6 +1023,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) | |||
992 | int ret; | 1023 | int ret; |
993 | 1024 | ||
994 | /* Make sure the system still exists */ | 1025 | /* Make sure the system still exists */ |
1026 | mutex_lock(&trace_types_lock); | ||
995 | mutex_lock(&event_mutex); | 1027 | mutex_lock(&event_mutex); |
996 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | 1028 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { |
997 | list_for_each_entry(dir, &tr->systems, list) { | 1029 | list_for_each_entry(dir, &tr->systems, list) { |
@@ -1007,6 +1039,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) | |||
1007 | } | 1039 | } |
1008 | exit_loop: | 1040 | exit_loop: |
1009 | mutex_unlock(&event_mutex); | 1041 | mutex_unlock(&event_mutex); |
1042 | mutex_unlock(&trace_types_lock); | ||
1010 | 1043 | ||
1011 | if (!system) | 1044 | if (!system) |
1012 | return -ENODEV; | 1045 | return -ENODEV; |
@@ -1014,9 +1047,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) | |||
1014 | /* Some versions of gcc think dir can be uninitialized here */ | 1047 | /* Some versions of gcc think dir can be uninitialized here */ |
1015 | WARN_ON(!dir); | 1048 | WARN_ON(!dir); |
1016 | 1049 | ||
1050 | /* Still need to increment the ref count of the system */ | ||
1051 | if (trace_array_get(tr) < 0) { | ||
1052 | put_system(dir); | ||
1053 | return -ENODEV; | ||
1054 | } | ||
1055 | |||
1017 | ret = tracing_open_generic(inode, filp); | 1056 | ret = tracing_open_generic(inode, filp); |
1018 | if (ret < 0) | 1057 | if (ret < 0) { |
1058 | trace_array_put(tr); | ||
1019 | put_system(dir); | 1059 | put_system(dir); |
1060 | } | ||
1020 | 1061 | ||
1021 | return ret; | 1062 | return ret; |
1022 | } | 1063 | } |
@@ -1027,16 +1068,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) | |||
1027 | struct trace_array *tr = inode->i_private; | 1068 | struct trace_array *tr = inode->i_private; |
1028 | int ret; | 1069 | int ret; |
1029 | 1070 | ||
1071 | if (trace_array_get(tr) < 0) | ||
1072 | return -ENODEV; | ||
1073 | |||
1030 | /* Make a temporary dir that has no system but points to tr */ | 1074 | /* Make a temporary dir that has no system but points to tr */ |
1031 | dir = kzalloc(sizeof(*dir), GFP_KERNEL); | 1075 | dir = kzalloc(sizeof(*dir), GFP_KERNEL); |
1032 | if (!dir) | 1076 | if (!dir) { |
1077 | trace_array_put(tr); | ||
1033 | return -ENOMEM; | 1078 | return -ENOMEM; |
1079 | } | ||
1034 | 1080 | ||
1035 | dir->tr = tr; | 1081 | dir->tr = tr; |
1036 | 1082 | ||
1037 | ret = tracing_open_generic(inode, filp); | 1083 | ret = tracing_open_generic(inode, filp); |
1038 | if (ret < 0) | 1084 | if (ret < 0) { |
1085 | trace_array_put(tr); | ||
1039 | kfree(dir); | 1086 | kfree(dir); |
1087 | } | ||
1040 | 1088 | ||
1041 | filp->private_data = dir; | 1089 | filp->private_data = dir; |
1042 | 1090 | ||
@@ -1047,6 +1095,8 @@ static int subsystem_release(struct inode *inode, struct file *file) | |||
1047 | { | 1095 | { |
1048 | struct ftrace_subsystem_dir *dir = file->private_data; | 1096 | struct ftrace_subsystem_dir *dir = file->private_data; |
1049 | 1097 | ||
1098 | trace_array_put(dir->tr); | ||
1099 | |||
1050 | /* | 1100 | /* |
1051 | * If dir->subsystem is NULL, then this is a temporary | 1101 | * If dir->subsystem is NULL, then this is a temporary |
1052 | * descriptor that was made for a trace_array to enable | 1102 | * descriptor that was made for a trace_array to enable |
@@ -1143,6 +1193,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | |||
1143 | 1193 | ||
1144 | static int ftrace_event_avail_open(struct inode *inode, struct file *file); | 1194 | static int ftrace_event_avail_open(struct inode *inode, struct file *file); |
1145 | static int ftrace_event_set_open(struct inode *inode, struct file *file); | 1195 | static int ftrace_event_set_open(struct inode *inode, struct file *file); |
1196 | static int ftrace_event_release(struct inode *inode, struct file *file); | ||
1146 | 1197 | ||
1147 | static const struct seq_operations show_event_seq_ops = { | 1198 | static const struct seq_operations show_event_seq_ops = { |
1148 | .start = t_start, | 1199 | .start = t_start, |
@@ -1170,13 +1221,14 @@ static const struct file_operations ftrace_set_event_fops = { | |||
1170 | .read = seq_read, | 1221 | .read = seq_read, |
1171 | .write = ftrace_event_write, | 1222 | .write = ftrace_event_write, |
1172 | .llseek = seq_lseek, | 1223 | .llseek = seq_lseek, |
1173 | .release = seq_release, | 1224 | .release = ftrace_event_release, |
1174 | }; | 1225 | }; |
1175 | 1226 | ||
1176 | static const struct file_operations ftrace_enable_fops = { | 1227 | static const struct file_operations ftrace_enable_fops = { |
1177 | .open = tracing_open_generic, | 1228 | .open = tracing_open_generic_file, |
1178 | .read = event_enable_read, | 1229 | .read = event_enable_read, |
1179 | .write = event_enable_write, | 1230 | .write = event_enable_write, |
1231 | .release = tracing_release_generic_file, | ||
1180 | .llseek = default_llseek, | 1232 | .llseek = default_llseek, |
1181 | }; | 1233 | }; |
1182 | 1234 | ||
@@ -1247,6 +1299,15 @@ ftrace_event_open(struct inode *inode, struct file *file, | |||
1247 | return ret; | 1299 | return ret; |
1248 | } | 1300 | } |
1249 | 1301 | ||
1302 | static int ftrace_event_release(struct inode *inode, struct file *file) | ||
1303 | { | ||
1304 | struct trace_array *tr = inode->i_private; | ||
1305 | |||
1306 | trace_array_put(tr); | ||
1307 | |||
1308 | return seq_release(inode, file); | ||
1309 | } | ||
1310 | |||
1250 | static int | 1311 | static int |
1251 | ftrace_event_avail_open(struct inode *inode, struct file *file) | 1312 | ftrace_event_avail_open(struct inode *inode, struct file *file) |
1252 | { | 1313 | { |
@@ -1260,12 +1321,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file) | |||
1260 | { | 1321 | { |
1261 | const struct seq_operations *seq_ops = &show_set_event_seq_ops; | 1322 | const struct seq_operations *seq_ops = &show_set_event_seq_ops; |
1262 | struct trace_array *tr = inode->i_private; | 1323 | struct trace_array *tr = inode->i_private; |
1324 | int ret; | ||
1325 | |||
1326 | if (trace_array_get(tr) < 0) | ||
1327 | return -ENODEV; | ||
1263 | 1328 | ||
1264 | if ((file->f_mode & FMODE_WRITE) && | 1329 | if ((file->f_mode & FMODE_WRITE) && |
1265 | (file->f_flags & O_TRUNC)) | 1330 | (file->f_flags & O_TRUNC)) |
1266 | ftrace_clear_events(tr); | 1331 | ftrace_clear_events(tr); |
1267 | 1332 | ||
1268 | return ftrace_event_open(inode, file, seq_ops); | 1333 | ret = ftrace_event_open(inode, file, seq_ops); |
1334 | if (ret < 0) | ||
1335 | trace_array_put(tr); | ||
1336 | return ret; | ||
1269 | } | 1337 | } |
1270 | 1338 | ||
1271 | static struct event_subsystem * | 1339 | static struct event_subsystem * |
@@ -1279,7 +1347,15 @@ create_new_subsystem(const char *name) | |||
1279 | return NULL; | 1347 | return NULL; |
1280 | 1348 | ||
1281 | system->ref_count = 1; | 1349 | system->ref_count = 1; |
1282 | system->name = name; | 1350 | |
1351 | /* Only allocate if dynamic (kprobes and modules) */ | ||
1352 | if (!core_kernel_data((unsigned long)name)) { | ||
1353 | system->ref_count |= SYSTEM_FL_FREE_NAME; | ||
1354 | system->name = kstrdup(name, GFP_KERNEL); | ||
1355 | if (!system->name) | ||
1356 | goto out_free; | ||
1357 | } else | ||
1358 | system->name = name; | ||
1283 | 1359 | ||
1284 | system->filter = NULL; | 1360 | system->filter = NULL; |
1285 | 1361 | ||
@@ -1292,6 +1368,8 @@ create_new_subsystem(const char *name) | |||
1292 | return system; | 1368 | return system; |
1293 | 1369 | ||
1294 | out_free: | 1370 | out_free: |
1371 | if (system->ref_count & SYSTEM_FL_FREE_NAME) | ||
1372 | kfree(system->name); | ||
1295 | kfree(system); | 1373 | kfree(system); |
1296 | return NULL; | 1374 | return NULL; |
1297 | } | 1375 | } |
@@ -1591,6 +1669,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, | |||
1591 | int trace_add_event_call(struct ftrace_event_call *call) | 1669 | int trace_add_event_call(struct ftrace_event_call *call) |
1592 | { | 1670 | { |
1593 | int ret; | 1671 | int ret; |
1672 | mutex_lock(&trace_types_lock); | ||
1594 | mutex_lock(&event_mutex); | 1673 | mutex_lock(&event_mutex); |
1595 | 1674 | ||
1596 | ret = __register_event(call, NULL); | 1675 | ret = __register_event(call, NULL); |
@@ -1598,11 +1677,13 @@ int trace_add_event_call(struct ftrace_event_call *call) | |||
1598 | __add_event_to_tracers(call, NULL); | 1677 | __add_event_to_tracers(call, NULL); |
1599 | 1678 | ||
1600 | mutex_unlock(&event_mutex); | 1679 | mutex_unlock(&event_mutex); |
1680 | mutex_unlock(&trace_types_lock); | ||
1601 | return ret; | 1681 | return ret; |
1602 | } | 1682 | } |
1603 | 1683 | ||
1604 | /* | 1684 | /* |
1605 | * Must be called under locking both of event_mutex and trace_event_sem. | 1685 | * Must be called under locking of trace_types_lock, event_mutex and |
1686 | * trace_event_sem. | ||
1606 | */ | 1687 | */ |
1607 | static void __trace_remove_event_call(struct ftrace_event_call *call) | 1688 | static void __trace_remove_event_call(struct ftrace_event_call *call) |
1608 | { | 1689 | { |
@@ -1614,11 +1695,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) | |||
1614 | /* Remove an event_call */ | 1695 | /* Remove an event_call */ |
1615 | void trace_remove_event_call(struct ftrace_event_call *call) | 1696 | void trace_remove_event_call(struct ftrace_event_call *call) |
1616 | { | 1697 | { |
1698 | mutex_lock(&trace_types_lock); | ||
1617 | mutex_lock(&event_mutex); | 1699 | mutex_lock(&event_mutex); |
1618 | down_write(&trace_event_sem); | 1700 | down_write(&trace_event_sem); |
1619 | __trace_remove_event_call(call); | 1701 | __trace_remove_event_call(call); |
1620 | up_write(&trace_event_sem); | 1702 | up_write(&trace_event_sem); |
1621 | mutex_unlock(&event_mutex); | 1703 | mutex_unlock(&event_mutex); |
1704 | mutex_unlock(&trace_types_lock); | ||
1622 | } | 1705 | } |
1623 | 1706 | ||
1624 | #define for_each_event(event, start, end) \ | 1707 | #define for_each_event(event, start, end) \ |
@@ -1762,6 +1845,7 @@ static int trace_module_notify(struct notifier_block *self, | |||
1762 | { | 1845 | { |
1763 | struct module *mod = data; | 1846 | struct module *mod = data; |
1764 | 1847 | ||
1848 | mutex_lock(&trace_types_lock); | ||
1765 | mutex_lock(&event_mutex); | 1849 | mutex_lock(&event_mutex); |
1766 | switch (val) { | 1850 | switch (val) { |
1767 | case MODULE_STATE_COMING: | 1851 | case MODULE_STATE_COMING: |
@@ -1772,6 +1856,7 @@ static int trace_module_notify(struct notifier_block *self, | |||
1772 | break; | 1856 | break; |
1773 | } | 1857 | } |
1774 | mutex_unlock(&event_mutex); | 1858 | mutex_unlock(&event_mutex); |
1859 | mutex_unlock(&trace_types_lock); | ||
1775 | 1860 | ||
1776 | return 0; | 1861 | return 0; |
1777 | } | 1862 | } |
@@ -2011,10 +2096,7 @@ event_enable_func(struct ftrace_hash *hash, | |||
2011 | int ret; | 2096 | int ret; |
2012 | 2097 | ||
2013 | /* hash funcs only work with set_ftrace_filter */ | 2098 | /* hash funcs only work with set_ftrace_filter */ |
2014 | if (!enabled) | 2099 | if (!enabled || !param) |
2015 | return -EINVAL; | ||
2016 | |||
2017 | if (!param) | ||
2018 | return -EINVAL; | 2100 | return -EINVAL; |
2019 | 2101 | ||
2020 | system = strsep(¶m, ":"); | 2102 | system = strsep(¶m, ":"); |
@@ -2329,11 +2411,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) | |||
2329 | 2411 | ||
2330 | int event_trace_del_tracer(struct trace_array *tr) | 2412 | int event_trace_del_tracer(struct trace_array *tr) |
2331 | { | 2413 | { |
2332 | /* Disable any running events */ | ||
2333 | __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); | ||
2334 | |||
2335 | mutex_lock(&event_mutex); | 2414 | mutex_lock(&event_mutex); |
2336 | 2415 | ||
2416 | /* Disable any running events */ | ||
2417 | __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); | ||
2418 | |||
2337 | down_write(&trace_event_sem); | 2419 | down_write(&trace_event_sem); |
2338 | __trace_remove_event_dirs(tr); | 2420 | __trace_remove_event_dirs(tr); |
2339 | debugfs_remove_recursive(tr->event_dir); | 2421 | debugfs_remove_recursive(tr->event_dir); |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f7e1ca..0c7b75a8acc8 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -44,6 +44,7 @@ enum filter_op_ids | |||
44 | OP_LE, | 44 | OP_LE, |
45 | OP_GT, | 45 | OP_GT, |
46 | OP_GE, | 46 | OP_GE, |
47 | OP_BAND, | ||
47 | OP_NONE, | 48 | OP_NONE, |
48 | OP_OPEN_PAREN, | 49 | OP_OPEN_PAREN, |
49 | }; | 50 | }; |
@@ -54,6 +55,7 @@ struct filter_op { | |||
54 | int precedence; | 55 | int precedence; |
55 | }; | 56 | }; |
56 | 57 | ||
58 | /* Order must be the same as enum filter_op_ids above */ | ||
57 | static struct filter_op filter_ops[] = { | 59 | static struct filter_op filter_ops[] = { |
58 | { OP_OR, "||", 1 }, | 60 | { OP_OR, "||", 1 }, |
59 | { OP_AND, "&&", 2 }, | 61 | { OP_AND, "&&", 2 }, |
@@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = { | |||
64 | { OP_LE, "<=", 5 }, | 66 | { OP_LE, "<=", 5 }, |
65 | { OP_GT, ">", 5 }, | 67 | { OP_GT, ">", 5 }, |
66 | { OP_GE, ">=", 5 }, | 68 | { OP_GE, ">=", 5 }, |
69 | { OP_BAND, "&", 6 }, | ||
67 | { OP_NONE, "OP_NONE", 0 }, | 70 | { OP_NONE, "OP_NONE", 0 }, |
68 | { OP_OPEN_PAREN, "(", 0 }, | 71 | { OP_OPEN_PAREN, "(", 0 }, |
69 | }; | 72 | }; |
@@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ | |||
156 | case OP_GE: \ | 159 | case OP_GE: \ |
157 | match = (*addr >= val); \ | 160 | match = (*addr >= val); \ |
158 | break; \ | 161 | break; \ |
162 | case OP_BAND: \ | ||
163 | match = (*addr & val); \ | ||
164 | break; \ | ||
159 | default: \ | 165 | default: \ |
160 | break; \ | 166 | break; \ |
161 | } \ | 167 | } \ |
@@ -640,7 +646,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) | |||
640 | if (filter && filter->filter_string) | 646 | if (filter && filter->filter_string) |
641 | trace_seq_printf(s, "%s\n", filter->filter_string); | 647 | trace_seq_printf(s, "%s\n", filter->filter_string); |
642 | else | 648 | else |
643 | trace_seq_printf(s, "none\n"); | 649 | trace_seq_puts(s, "none\n"); |
644 | mutex_unlock(&event_mutex); | 650 | mutex_unlock(&event_mutex); |
645 | } | 651 | } |
646 | 652 | ||
@@ -654,7 +660,7 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
654 | if (filter && filter->filter_string) | 660 | if (filter && filter->filter_string) |
655 | trace_seq_printf(s, "%s\n", filter->filter_string); | 661 | trace_seq_printf(s, "%s\n", filter->filter_string); |
656 | else | 662 | else |
657 | trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); | 663 | trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); |
658 | mutex_unlock(&event_mutex); | 664 | mutex_unlock(&event_mutex); |
659 | } | 665 | } |
660 | 666 | ||
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c4d6d7191988..38fe1483c508 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) | |||
199 | return 0; | 199 | return 0; |
200 | } | 200 | } |
201 | 201 | ||
202 | static struct tracer function_trace __read_mostly = | 202 | static struct tracer function_trace __tracer_data = |
203 | { | 203 | { |
204 | .name = "function", | 204 | .name = "function", |
205 | .init = function_trace_init, | 205 | .init = function_trace_init, |
@@ -290,6 +290,21 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) | |||
290 | trace_dump_stack(STACK_SKIP); | 290 | trace_dump_stack(STACK_SKIP); |
291 | } | 291 | } |
292 | 292 | ||
293 | static void | ||
294 | ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) | ||
295 | { | ||
296 | if (update_count(data)) | ||
297 | ftrace_dump(DUMP_ALL); | ||
298 | } | ||
299 | |||
300 | /* Only dump the current CPU buffer. */ | ||
301 | static void | ||
302 | ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) | ||
303 | { | ||
304 | if (update_count(data)) | ||
305 | ftrace_dump(DUMP_ORIG); | ||
306 | } | ||
307 | |||
293 | static int | 308 | static int |
294 | ftrace_probe_print(const char *name, struct seq_file *m, | 309 | ftrace_probe_print(const char *name, struct seq_file *m, |
295 | unsigned long ip, void *data) | 310 | unsigned long ip, void *data) |
@@ -327,6 +342,20 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, | |||
327 | return ftrace_probe_print("stacktrace", m, ip, data); | 342 | return ftrace_probe_print("stacktrace", m, ip, data); |
328 | } | 343 | } |
329 | 344 | ||
345 | static int | ||
346 | ftrace_dump_print(struct seq_file *m, unsigned long ip, | ||
347 | struct ftrace_probe_ops *ops, void *data) | ||
348 | { | ||
349 | return ftrace_probe_print("dump", m, ip, data); | ||
350 | } | ||
351 | |||
352 | static int | ||
353 | ftrace_cpudump_print(struct seq_file *m, unsigned long ip, | ||
354 | struct ftrace_probe_ops *ops, void *data) | ||
355 | { | ||
356 | return ftrace_probe_print("cpudump", m, ip, data); | ||
357 | } | ||
358 | |||
330 | static struct ftrace_probe_ops traceon_count_probe_ops = { | 359 | static struct ftrace_probe_ops traceon_count_probe_ops = { |
331 | .func = ftrace_traceon_count, | 360 | .func = ftrace_traceon_count, |
332 | .print = ftrace_traceon_print, | 361 | .print = ftrace_traceon_print, |
@@ -342,6 +371,16 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = { | |||
342 | .print = ftrace_stacktrace_print, | 371 | .print = ftrace_stacktrace_print, |
343 | }; | 372 | }; |
344 | 373 | ||
374 | static struct ftrace_probe_ops dump_probe_ops = { | ||
375 | .func = ftrace_dump_probe, | ||
376 | .print = ftrace_dump_print, | ||
377 | }; | ||
378 | |||
379 | static struct ftrace_probe_ops cpudump_probe_ops = { | ||
380 | .func = ftrace_cpudump_probe, | ||
381 | .print = ftrace_cpudump_print, | ||
382 | }; | ||
383 | |||
345 | static struct ftrace_probe_ops traceon_probe_ops = { | 384 | static struct ftrace_probe_ops traceon_probe_ops = { |
346 | .func = ftrace_traceon, | 385 | .func = ftrace_traceon, |
347 | .print = ftrace_traceon_print, | 386 | .print = ftrace_traceon_print, |
@@ -425,6 +464,32 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash, | |||
425 | param, enable); | 464 | param, enable); |
426 | } | 465 | } |
427 | 466 | ||
467 | static int | ||
468 | ftrace_dump_callback(struct ftrace_hash *hash, | ||
469 | char *glob, char *cmd, char *param, int enable) | ||
470 | { | ||
471 | struct ftrace_probe_ops *ops; | ||
472 | |||
473 | ops = &dump_probe_ops; | ||
474 | |||
475 | /* Only dump once. */ | ||
476 | return ftrace_trace_probe_callback(ops, hash, glob, cmd, | ||
477 | "1", enable); | ||
478 | } | ||
479 | |||
480 | static int | ||
481 | ftrace_cpudump_callback(struct ftrace_hash *hash, | ||
482 | char *glob, char *cmd, char *param, int enable) | ||
483 | { | ||
484 | struct ftrace_probe_ops *ops; | ||
485 | |||
486 | ops = &cpudump_probe_ops; | ||
487 | |||
488 | /* Only dump once. */ | ||
489 | return ftrace_trace_probe_callback(ops, hash, glob, cmd, | ||
490 | "1", enable); | ||
491 | } | ||
492 | |||
428 | static struct ftrace_func_command ftrace_traceon_cmd = { | 493 | static struct ftrace_func_command ftrace_traceon_cmd = { |
429 | .name = "traceon", | 494 | .name = "traceon", |
430 | .func = ftrace_trace_onoff_callback, | 495 | .func = ftrace_trace_onoff_callback, |
@@ -440,6 +505,16 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = { | |||
440 | .func = ftrace_stacktrace_callback, | 505 | .func = ftrace_stacktrace_callback, |
441 | }; | 506 | }; |
442 | 507 | ||
508 | static struct ftrace_func_command ftrace_dump_cmd = { | ||
509 | .name = "dump", | ||
510 | .func = ftrace_dump_callback, | ||
511 | }; | ||
512 | |||
513 | static struct ftrace_func_command ftrace_cpudump_cmd = { | ||
514 | .name = "cpudump", | ||
515 | .func = ftrace_cpudump_callback, | ||
516 | }; | ||
517 | |||
443 | static int __init init_func_cmd_traceon(void) | 518 | static int __init init_func_cmd_traceon(void) |
444 | { | 519 | { |
445 | int ret; | 520 | int ret; |
@@ -450,13 +525,31 @@ static int __init init_func_cmd_traceon(void) | |||
450 | 525 | ||
451 | ret = register_ftrace_command(&ftrace_traceon_cmd); | 526 | ret = register_ftrace_command(&ftrace_traceon_cmd); |
452 | if (ret) | 527 | if (ret) |
453 | unregister_ftrace_command(&ftrace_traceoff_cmd); | 528 | goto out_free_traceoff; |
454 | 529 | ||
455 | ret = register_ftrace_command(&ftrace_stacktrace_cmd); | 530 | ret = register_ftrace_command(&ftrace_stacktrace_cmd); |
456 | if (ret) { | 531 | if (ret) |
457 | unregister_ftrace_command(&ftrace_traceoff_cmd); | 532 | goto out_free_traceon; |
458 | unregister_ftrace_command(&ftrace_traceon_cmd); | 533 | |
459 | } | 534 | ret = register_ftrace_command(&ftrace_dump_cmd); |
535 | if (ret) | ||
536 | goto out_free_stacktrace; | ||
537 | |||
538 | ret = register_ftrace_command(&ftrace_cpudump_cmd); | ||
539 | if (ret) | ||
540 | goto out_free_dump; | ||
541 | |||
542 | return 0; | ||
543 | |||
544 | out_free_dump: | ||
545 | unregister_ftrace_command(&ftrace_dump_cmd); | ||
546 | out_free_stacktrace: | ||
547 | unregister_ftrace_command(&ftrace_stacktrace_cmd); | ||
548 | out_free_traceon: | ||
549 | unregister_ftrace_command(&ftrace_traceon_cmd); | ||
550 | out_free_traceoff: | ||
551 | unregister_ftrace_command(&ftrace_traceoff_cmd); | ||
552 | |||
460 | return ret; | 553 | return ret; |
461 | } | 554 | } |
462 | #else | 555 | #else |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8388bc99f2ee..b5c09242683d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
446 | 446 | ||
447 | /* First spaces to align center */ | 447 | /* First spaces to align center */ |
448 | for (i = 0; i < spaces / 2; i++) { | 448 | for (i = 0; i < spaces / 2; i++) { |
449 | ret = trace_seq_printf(s, " "); | 449 | ret = trace_seq_putc(s, ' '); |
450 | if (!ret) | 450 | if (!ret) |
451 | return TRACE_TYPE_PARTIAL_LINE; | 451 | return TRACE_TYPE_PARTIAL_LINE; |
452 | } | 452 | } |
@@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
457 | 457 | ||
458 | /* Last spaces to align center */ | 458 | /* Last spaces to align center */ |
459 | for (i = 0; i < spaces - (spaces / 2); i++) { | 459 | for (i = 0; i < spaces - (spaces / 2); i++) { |
460 | ret = trace_seq_printf(s, " "); | 460 | ret = trace_seq_putc(s, ' '); |
461 | if (!ret) | 461 | if (!ret) |
462 | return TRACE_TYPE_PARTIAL_LINE; | 462 | return TRACE_TYPE_PARTIAL_LINE; |
463 | } | 463 | } |
@@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
503 | ------------------------------------------ | 503 | ------------------------------------------ |
504 | 504 | ||
505 | */ | 505 | */ |
506 | ret = trace_seq_printf(s, | 506 | ret = trace_seq_puts(s, |
507 | " ------------------------------------------\n"); | 507 | " ------------------------------------------\n"); |
508 | if (!ret) | 508 | if (!ret) |
509 | return TRACE_TYPE_PARTIAL_LINE; | 509 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
516 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 516 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
517 | return TRACE_TYPE_PARTIAL_LINE; | 517 | return TRACE_TYPE_PARTIAL_LINE; |
518 | 518 | ||
519 | ret = trace_seq_printf(s, " => "); | 519 | ret = trace_seq_puts(s, " => "); |
520 | if (!ret) | 520 | if (!ret) |
521 | return TRACE_TYPE_PARTIAL_LINE; | 521 | return TRACE_TYPE_PARTIAL_LINE; |
522 | 522 | ||
@@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
524 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 524 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
525 | return TRACE_TYPE_PARTIAL_LINE; | 525 | return TRACE_TYPE_PARTIAL_LINE; |
526 | 526 | ||
527 | ret = trace_seq_printf(s, | 527 | ret = trace_seq_puts(s, |
528 | "\n ------------------------------------------\n\n"); | 528 | "\n ------------------------------------------\n\n"); |
529 | if (!ret) | 529 | if (!ret) |
530 | return TRACE_TYPE_PARTIAL_LINE; | 530 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
645 | ret = print_graph_proc(s, pid); | 645 | ret = print_graph_proc(s, pid); |
646 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 646 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
647 | return TRACE_TYPE_PARTIAL_LINE; | 647 | return TRACE_TYPE_PARTIAL_LINE; |
648 | ret = trace_seq_printf(s, " | "); | 648 | ret = trace_seq_puts(s, " | "); |
649 | if (!ret) | 649 | if (!ret) |
650 | return TRACE_TYPE_PARTIAL_LINE; | 650 | return TRACE_TYPE_PARTIAL_LINE; |
651 | } | 651 | } |
@@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
657 | return ret; | 657 | return ret; |
658 | 658 | ||
659 | if (type == TRACE_GRAPH_ENT) | 659 | if (type == TRACE_GRAPH_ENT) |
660 | ret = trace_seq_printf(s, "==========>"); | 660 | ret = trace_seq_puts(s, "==========>"); |
661 | else | 661 | else |
662 | ret = trace_seq_printf(s, "<=========="); | 662 | ret = trace_seq_puts(s, "<=========="); |
663 | 663 | ||
664 | if (!ret) | 664 | if (!ret) |
665 | return TRACE_TYPE_PARTIAL_LINE; | 665 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
668 | if (ret != TRACE_TYPE_HANDLED) | 668 | if (ret != TRACE_TYPE_HANDLED) |
669 | return ret; | 669 | return ret; |
670 | 670 | ||
671 | ret = trace_seq_printf(s, "\n"); | 671 | ret = trace_seq_putc(s, '\n'); |
672 | 672 | ||
673 | if (!ret) | 673 | if (!ret) |
674 | return TRACE_TYPE_PARTIAL_LINE; | 674 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
705 | len += strlen(nsecs_str); | 705 | len += strlen(nsecs_str); |
706 | } | 706 | } |
707 | 707 | ||
708 | ret = trace_seq_printf(s, " us "); | 708 | ret = trace_seq_puts(s, " us "); |
709 | if (!ret) | 709 | if (!ret) |
710 | return TRACE_TYPE_PARTIAL_LINE; | 710 | return TRACE_TYPE_PARTIAL_LINE; |
711 | 711 | ||
712 | /* Print remaining spaces to fit the row's width */ | 712 | /* Print remaining spaces to fit the row's width */ |
713 | for (i = len; i < 7; i++) { | 713 | for (i = len; i < 7; i++) { |
714 | ret = trace_seq_printf(s, " "); | 714 | ret = trace_seq_putc(s, ' '); |
715 | if (!ret) | 715 | if (!ret) |
716 | return TRACE_TYPE_PARTIAL_LINE; | 716 | return TRACE_TYPE_PARTIAL_LINE; |
717 | } | 717 | } |
@@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
731 | /* No real adata, just filling the column with spaces */ | 731 | /* No real adata, just filling the column with spaces */ |
732 | switch (duration) { | 732 | switch (duration) { |
733 | case DURATION_FILL_FULL: | 733 | case DURATION_FILL_FULL: |
734 | ret = trace_seq_printf(s, " | "); | 734 | ret = trace_seq_puts(s, " | "); |
735 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 735 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
736 | case DURATION_FILL_START: | 736 | case DURATION_FILL_START: |
737 | ret = trace_seq_printf(s, " "); | 737 | ret = trace_seq_puts(s, " "); |
738 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 738 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
739 | case DURATION_FILL_END: | 739 | case DURATION_FILL_END: |
740 | ret = trace_seq_printf(s, " |"); | 740 | ret = trace_seq_puts(s, " |"); |
741 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 741 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
742 | } | 742 | } |
743 | 743 | ||
@@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
745 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | 745 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { |
746 | /* Duration exceeded 100 msecs */ | 746 | /* Duration exceeded 100 msecs */ |
747 | if (duration > 100000ULL) | 747 | if (duration > 100000ULL) |
748 | ret = trace_seq_printf(s, "! "); | 748 | ret = trace_seq_puts(s, "! "); |
749 | /* Duration exceeded 10 msecs */ | 749 | /* Duration exceeded 10 msecs */ |
750 | else if (duration > 10000ULL) | 750 | else if (duration > 10000ULL) |
751 | ret = trace_seq_printf(s, "+ "); | 751 | ret = trace_seq_puts(s, "+ "); |
752 | } | 752 | } |
753 | 753 | ||
754 | /* | 754 | /* |
@@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
757 | * to fill out the space. | 757 | * to fill out the space. |
758 | */ | 758 | */ |
759 | if (ret == -1) | 759 | if (ret == -1) |
760 | ret = trace_seq_printf(s, " "); | 760 | ret = trace_seq_puts(s, " "); |
761 | 761 | ||
762 | /* Catching here any failure happenned above */ | 762 | /* Catching here any failure happenned above */ |
763 | if (!ret) | 763 | if (!ret) |
@@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
767 | if (ret != TRACE_TYPE_HANDLED) | 767 | if (ret != TRACE_TYPE_HANDLED) |
768 | return ret; | 768 | return ret; |
769 | 769 | ||
770 | ret = trace_seq_printf(s, "| "); | 770 | ret = trace_seq_puts(s, "| "); |
771 | if (!ret) | 771 | if (!ret) |
772 | return TRACE_TYPE_PARTIAL_LINE; | 772 | return TRACE_TYPE_PARTIAL_LINE; |
773 | 773 | ||
@@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
817 | 817 | ||
818 | /* Function */ | 818 | /* Function */ |
819 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 819 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
820 | ret = trace_seq_printf(s, " "); | 820 | ret = trace_seq_putc(s, ' '); |
821 | if (!ret) | 821 | if (!ret) |
822 | return TRACE_TYPE_PARTIAL_LINE; | 822 | return TRACE_TYPE_PARTIAL_LINE; |
823 | } | 823 | } |
@@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
858 | 858 | ||
859 | /* Function */ | 859 | /* Function */ |
860 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 860 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
861 | ret = trace_seq_printf(s, " "); | 861 | ret = trace_seq_putc(s, ' '); |
862 | if (!ret) | 862 | if (!ret) |
863 | return TRACE_TYPE_PARTIAL_LINE; | 863 | return TRACE_TYPE_PARTIAL_LINE; |
864 | } | 864 | } |
@@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
917 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 917 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
918 | return TRACE_TYPE_PARTIAL_LINE; | 918 | return TRACE_TYPE_PARTIAL_LINE; |
919 | 919 | ||
920 | ret = trace_seq_printf(s, " | "); | 920 | ret = trace_seq_puts(s, " | "); |
921 | if (!ret) | 921 | if (!ret) |
922 | return TRACE_TYPE_PARTIAL_LINE; | 922 | return TRACE_TYPE_PARTIAL_LINE; |
923 | } | 923 | } |
@@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
1117 | 1117 | ||
1118 | /* Closing brace */ | 1118 | /* Closing brace */ |
1119 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { | 1119 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { |
1120 | ret = trace_seq_printf(s, " "); | 1120 | ret = trace_seq_putc(s, ' '); |
1121 | if (!ret) | 1121 | if (!ret) |
1122 | return TRACE_TYPE_PARTIAL_LINE; | 1122 | return TRACE_TYPE_PARTIAL_LINE; |
1123 | } | 1123 | } |
@@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
1129 | * belongs to, write out the function name. | 1129 | * belongs to, write out the function name. |
1130 | */ | 1130 | */ |
1131 | if (func_match) { | 1131 | if (func_match) { |
1132 | ret = trace_seq_printf(s, "}\n"); | 1132 | ret = trace_seq_puts(s, "}\n"); |
1133 | if (!ret) | 1133 | if (!ret) |
1134 | return TRACE_TYPE_PARTIAL_LINE; | 1134 | return TRACE_TYPE_PARTIAL_LINE; |
1135 | } else { | 1135 | } else { |
@@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1179 | /* Indentation */ | 1179 | /* Indentation */ |
1180 | if (depth > 0) | 1180 | if (depth > 0) |
1181 | for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { | 1181 | for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { |
1182 | ret = trace_seq_printf(s, " "); | 1182 | ret = trace_seq_putc(s, ' '); |
1183 | if (!ret) | 1183 | if (!ret) |
1184 | return TRACE_TYPE_PARTIAL_LINE; | 1184 | return TRACE_TYPE_PARTIAL_LINE; |
1185 | } | 1185 | } |
1186 | 1186 | ||
1187 | /* The comment */ | 1187 | /* The comment */ |
1188 | ret = trace_seq_printf(s, "/* "); | 1188 | ret = trace_seq_puts(s, "/* "); |
1189 | if (!ret) | 1189 | if (!ret) |
1190 | return TRACE_TYPE_PARTIAL_LINE; | 1190 | return TRACE_TYPE_PARTIAL_LINE; |
1191 | 1191 | ||
@@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1216 | s->len--; | 1216 | s->len--; |
1217 | } | 1217 | } |
1218 | 1218 | ||
1219 | ret = trace_seq_printf(s, " */\n"); | 1219 | ret = trace_seq_puts(s, " */\n"); |
1220 | if (!ret) | 1220 | if (!ret) |
1221 | return TRACE_TYPE_PARTIAL_LINE; | 1221 | return TRACE_TYPE_PARTIAL_LINE; |
1222 | 1222 | ||
@@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = { | |||
1448 | .funcs = &graph_functions | 1448 | .funcs = &graph_functions |
1449 | }; | 1449 | }; |
1450 | 1450 | ||
1451 | static struct tracer graph_trace __read_mostly = { | 1451 | static struct tracer graph_trace __tracer_data = { |
1452 | .name = "function_graph", | 1452 | .name = "function_graph", |
1453 | .open = graph_trace_open, | 1453 | .open = graph_trace_open, |
1454 | .pipe_open = graph_trace_open, | 1454 | .pipe_open = graph_trace_open, |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065a28cb..2aefbee93a6d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
373 | struct trace_array_cpu *data; | 373 | struct trace_array_cpu *data; |
374 | unsigned long flags; | 374 | unsigned long flags; |
375 | 375 | ||
376 | if (likely(!tracer_enabled)) | 376 | if (!tracer_enabled || !tracing_is_enabled()) |
377 | return; | 377 | return; |
378 | 378 | ||
379 | cpu = raw_smp_processor_id(); | 379 | cpu = raw_smp_processor_id(); |
@@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
416 | else | 416 | else |
417 | return; | 417 | return; |
418 | 418 | ||
419 | if (!tracer_enabled) | 419 | if (!tracer_enabled || !tracing_is_enabled()) |
420 | return; | 420 | return; |
421 | 421 | ||
422 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); | 422 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98ba8f2..3811487e7a7a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -35,12 +35,17 @@ struct trace_probe { | |||
35 | const char *symbol; /* symbol name */ | 35 | const char *symbol; /* symbol name */ |
36 | struct ftrace_event_class class; | 36 | struct ftrace_event_class class; |
37 | struct ftrace_event_call call; | 37 | struct ftrace_event_call call; |
38 | struct ftrace_event_file * __rcu *files; | 38 | struct list_head files; |
39 | ssize_t size; /* trace entry size */ | 39 | ssize_t size; /* trace entry size */ |
40 | unsigned int nr_args; | 40 | unsigned int nr_args; |
41 | struct probe_arg args[]; | 41 | struct probe_arg args[]; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | struct event_file_link { | ||
45 | struct ftrace_event_file *file; | ||
46 | struct list_head list; | ||
47 | }; | ||
48 | |||
44 | #define SIZEOF_TRACE_PROBE(n) \ | 49 | #define SIZEOF_TRACE_PROBE(n) \ |
45 | (offsetof(struct trace_probe, args) + \ | 50 | (offsetof(struct trace_probe, args) + \ |
46 | (sizeof(struct probe_arg) * (n))) | 51 | (sizeof(struct probe_arg) * (n))) |
@@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, | |||
150 | goto error; | 155 | goto error; |
151 | 156 | ||
152 | INIT_LIST_HEAD(&tp->list); | 157 | INIT_LIST_HEAD(&tp->list); |
158 | INIT_LIST_HEAD(&tp->files); | ||
153 | return tp; | 159 | return tp; |
154 | error: | 160 | error: |
155 | kfree(tp->call.name); | 161 | kfree(tp->call.name); |
@@ -183,25 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event, | |||
183 | return NULL; | 189 | return NULL; |
184 | } | 190 | } |
185 | 191 | ||
186 | static int trace_probe_nr_files(struct trace_probe *tp) | ||
187 | { | ||
188 | struct ftrace_event_file **file; | ||
189 | int ret = 0; | ||
190 | |||
191 | /* | ||
192 | * Since all tp->files updater is protected by probe_enable_lock, | ||
193 | * we don't need to lock an rcu_read_lock. | ||
194 | */ | ||
195 | file = rcu_dereference_raw(tp->files); | ||
196 | if (file) | ||
197 | while (*(file++)) | ||
198 | ret++; | ||
199 | |||
200 | return ret; | ||
201 | } | ||
202 | |||
203 | static DEFINE_MUTEX(probe_enable_lock); | ||
204 | |||
205 | /* | 192 | /* |
206 | * Enable trace_probe | 193 | * Enable trace_probe |
207 | * if the file is NULL, enable "perf" handler, or enable "trace" handler. | 194 | * if the file is NULL, enable "perf" handler, or enable "trace" handler. |
@@ -211,67 +198,42 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) | |||
211 | { | 198 | { |
212 | int ret = 0; | 199 | int ret = 0; |
213 | 200 | ||
214 | mutex_lock(&probe_enable_lock); | ||
215 | |||
216 | if (file) { | 201 | if (file) { |
217 | struct ftrace_event_file **new, **old; | 202 | struct event_file_link *link; |
218 | int n = trace_probe_nr_files(tp); | 203 | |
219 | 204 | link = kmalloc(sizeof(*link), GFP_KERNEL); | |
220 | old = rcu_dereference_raw(tp->files); | 205 | if (!link) { |
221 | /* 1 is for new one and 1 is for stopper */ | ||
222 | new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), | ||
223 | GFP_KERNEL); | ||
224 | if (!new) { | ||
225 | ret = -ENOMEM; | 206 | ret = -ENOMEM; |
226 | goto out_unlock; | 207 | goto out; |
227 | } | 208 | } |
228 | memcpy(new, old, n * sizeof(struct ftrace_event_file *)); | ||
229 | new[n] = file; | ||
230 | /* The last one keeps a NULL */ | ||
231 | 209 | ||
232 | rcu_assign_pointer(tp->files, new); | 210 | link->file = file; |
233 | tp->flags |= TP_FLAG_TRACE; | 211 | list_add_tail_rcu(&link->list, &tp->files); |
234 | 212 | ||
235 | if (old) { | 213 | tp->flags |= TP_FLAG_TRACE; |
236 | /* Make sure the probe is done with old files */ | ||
237 | synchronize_sched(); | ||
238 | kfree(old); | ||
239 | } | ||
240 | } else | 214 | } else |
241 | tp->flags |= TP_FLAG_PROFILE; | 215 | tp->flags |= TP_FLAG_PROFILE; |
242 | 216 | ||
243 | if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && | 217 | if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { |
244 | !trace_probe_has_gone(tp)) { | ||
245 | if (trace_probe_is_return(tp)) | 218 | if (trace_probe_is_return(tp)) |
246 | ret = enable_kretprobe(&tp->rp); | 219 | ret = enable_kretprobe(&tp->rp); |
247 | else | 220 | else |
248 | ret = enable_kprobe(&tp->rp.kp); | 221 | ret = enable_kprobe(&tp->rp.kp); |
249 | } | 222 | } |
250 | 223 | out: | |
251 | out_unlock: | ||
252 | mutex_unlock(&probe_enable_lock); | ||
253 | |||
254 | return ret; | 224 | return ret; |
255 | } | 225 | } |
256 | 226 | ||
257 | static int | 227 | static struct event_file_link * |
258 | trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) | 228 | find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) |
259 | { | 229 | { |
260 | struct ftrace_event_file **files; | 230 | struct event_file_link *link; |
261 | int i; | ||
262 | 231 | ||
263 | /* | 232 | list_for_each_entry(link, &tp->files, list) |
264 | * Since all tp->files updater is protected by probe_enable_lock, | 233 | if (link->file == file) |
265 | * we don't need to lock an rcu_read_lock. | 234 | return link; |
266 | */ | ||
267 | files = rcu_dereference_raw(tp->files); | ||
268 | if (files) { | ||
269 | for (i = 0; files[i]; i++) | ||
270 | if (files[i] == file) | ||
271 | return i; | ||
272 | } | ||
273 | 235 | ||
274 | return -1; | 236 | return NULL; |
275 | } | 237 | } |
276 | 238 | ||
277 | /* | 239 | /* |
@@ -281,43 +243,23 @@ trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) | |||
281 | static int | 243 | static int |
282 | disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) | 244 | disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) |
283 | { | 245 | { |
246 | struct event_file_link *link = NULL; | ||
247 | int wait = 0; | ||
284 | int ret = 0; | 248 | int ret = 0; |
285 | 249 | ||
286 | mutex_lock(&probe_enable_lock); | ||
287 | |||
288 | if (file) { | 250 | if (file) { |
289 | struct ftrace_event_file **new, **old; | 251 | link = find_event_file_link(tp, file); |
290 | int n = trace_probe_nr_files(tp); | 252 | if (!link) { |
291 | int i, j; | ||
292 | |||
293 | old = rcu_dereference_raw(tp->files); | ||
294 | if (n == 0 || trace_probe_file_index(tp, file) < 0) { | ||
295 | ret = -EINVAL; | 253 | ret = -EINVAL; |
296 | goto out_unlock; | 254 | goto out; |
297 | } | ||
298 | |||
299 | if (n == 1) { /* Remove the last file */ | ||
300 | tp->flags &= ~TP_FLAG_TRACE; | ||
301 | new = NULL; | ||
302 | } else { | ||
303 | new = kzalloc(n * sizeof(struct ftrace_event_file *), | ||
304 | GFP_KERNEL); | ||
305 | if (!new) { | ||
306 | ret = -ENOMEM; | ||
307 | goto out_unlock; | ||
308 | } | ||
309 | |||
310 | /* This copy & check loop copies the NULL stopper too */ | ||
311 | for (i = 0, j = 0; j < n && i < n + 1; i++) | ||
312 | if (old[i] != file) | ||
313 | new[j++] = old[i]; | ||
314 | } | 255 | } |
315 | 256 | ||
316 | rcu_assign_pointer(tp->files, new); | 257 | list_del_rcu(&link->list); |
258 | wait = 1; | ||
259 | if (!list_empty(&tp->files)) | ||
260 | goto out; | ||
317 | 261 | ||
318 | /* Make sure the probe is done with old files */ | 262 | tp->flags &= ~TP_FLAG_TRACE; |
319 | synchronize_sched(); | ||
320 | kfree(old); | ||
321 | } else | 263 | } else |
322 | tp->flags &= ~TP_FLAG_PROFILE; | 264 | tp->flags &= ~TP_FLAG_PROFILE; |
323 | 265 | ||
@@ -326,10 +268,21 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) | |||
326 | disable_kretprobe(&tp->rp); | 268 | disable_kretprobe(&tp->rp); |
327 | else | 269 | else |
328 | disable_kprobe(&tp->rp.kp); | 270 | disable_kprobe(&tp->rp.kp); |
271 | wait = 1; | ||
272 | } | ||
273 | out: | ||
274 | if (wait) { | ||
275 | /* | ||
276 | * Synchronize with kprobe_trace_func/kretprobe_trace_func | ||
277 | * to ensure disabled (all running handlers are finished). | ||
278 | * This is not only for kfree(), but also the caller, | ||
279 | * trace_remove_event_call() supposes it for releasing | ||
280 | * event_call related objects, which will be accessed in | ||
281 | * the kprobe_trace_func/kretprobe_trace_func. | ||
282 | */ | ||
283 | synchronize_sched(); | ||
284 | kfree(link); /* Ignored if link == NULL */ | ||
329 | } | 285 | } |
330 | |||
331 | out_unlock: | ||
332 | mutex_unlock(&probe_enable_lock); | ||
333 | 286 | ||
334 | return ret; | 287 | return ret; |
335 | } | 288 | } |
@@ -885,20 +838,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, | |||
885 | static __kprobes void | 838 | static __kprobes void |
886 | kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) | 839 | kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) |
887 | { | 840 | { |
888 | /* | 841 | struct event_file_link *link; |
889 | * Note: preempt is already disabled around the kprobe handler. | ||
890 | * However, we still need an smp_read_barrier_depends() corresponding | ||
891 | * to smp_wmb() in rcu_assign_pointer() to access the pointer. | ||
892 | */ | ||
893 | struct ftrace_event_file **file = rcu_dereference_raw(tp->files); | ||
894 | |||
895 | if (unlikely(!file)) | ||
896 | return; | ||
897 | 842 | ||
898 | while (*file) { | 843 | list_for_each_entry_rcu(link, &tp->files, list) |
899 | __kprobe_trace_func(tp, regs, *file); | 844 | __kprobe_trace_func(tp, regs, link->file); |
900 | file++; | ||
901 | } | ||
902 | } | 845 | } |
903 | 846 | ||
904 | /* Kretprobe handler */ | 847 | /* Kretprobe handler */ |
@@ -945,20 +888,10 @@ static __kprobes void | |||
945 | kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, | 888 | kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, |
946 | struct pt_regs *regs) | 889 | struct pt_regs *regs) |
947 | { | 890 | { |
948 | /* | 891 | struct event_file_link *link; |
949 | * Note: preempt is already disabled around the kprobe handler. | ||
950 | * However, we still need an smp_read_barrier_depends() corresponding | ||
951 | * to smp_wmb() in rcu_assign_pointer() to access the pointer. | ||
952 | */ | ||
953 | struct ftrace_event_file **file = rcu_dereference_raw(tp->files); | ||
954 | |||
955 | if (unlikely(!file)) | ||
956 | return; | ||
957 | 892 | ||
958 | while (*file) { | 893 | list_for_each_entry_rcu(link, &tp->files, list) |
959 | __kretprobe_trace_func(tp, ri, regs, *file); | 894 | __kretprobe_trace_func(tp, ri, regs, link->file); |
960 | file++; | ||
961 | } | ||
962 | } | 895 | } |
963 | 896 | ||
964 | /* Event entry printers */ | 897 | /* Event entry printers */ |
@@ -1157,13 +1090,14 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) | |||
1157 | int size, __size, dsize; | 1090 | int size, __size, dsize; |
1158 | int rctx; | 1091 | int rctx; |
1159 | 1092 | ||
1093 | head = this_cpu_ptr(call->perf_events); | ||
1094 | if (hlist_empty(head)) | ||
1095 | return; | ||
1096 | |||
1160 | dsize = __get_data_size(tp, regs); | 1097 | dsize = __get_data_size(tp, regs); |
1161 | __size = sizeof(*entry) + tp->size + dsize; | 1098 | __size = sizeof(*entry) + tp->size + dsize; |
1162 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1099 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1163 | size -= sizeof(u32); | 1100 | size -= sizeof(u32); |
1164 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
1165 | "profile buffer not large enough")) | ||
1166 | return; | ||
1167 | 1101 | ||
1168 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1102 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
1169 | if (!entry) | 1103 | if (!entry) |
@@ -1172,10 +1106,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) | |||
1172 | entry->ip = (unsigned long)tp->rp.kp.addr; | 1106 | entry->ip = (unsigned long)tp->rp.kp.addr; |
1173 | memset(&entry[1], 0, dsize); | 1107 | memset(&entry[1], 0, dsize); |
1174 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1108 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
1175 | 1109 | perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); | |
1176 | head = this_cpu_ptr(call->perf_events); | ||
1177 | perf_trace_buf_submit(entry, size, rctx, | ||
1178 | entry->ip, 1, regs, head, NULL); | ||
1179 | } | 1110 | } |
1180 | 1111 | ||
1181 | /* Kretprobe profile handler */ | 1112 | /* Kretprobe profile handler */ |
@@ -1189,13 +1120,14 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, | |||
1189 | int size, __size, dsize; | 1120 | int size, __size, dsize; |
1190 | int rctx; | 1121 | int rctx; |
1191 | 1122 | ||
1123 | head = this_cpu_ptr(call->perf_events); | ||
1124 | if (hlist_empty(head)) | ||
1125 | return; | ||
1126 | |||
1192 | dsize = __get_data_size(tp, regs); | 1127 | dsize = __get_data_size(tp, regs); |
1193 | __size = sizeof(*entry) + tp->size + dsize; | 1128 | __size = sizeof(*entry) + tp->size + dsize; |
1194 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1129 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1195 | size -= sizeof(u32); | 1130 | size -= sizeof(u32); |
1196 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
1197 | "profile buffer not large enough")) | ||
1198 | return; | ||
1199 | 1131 | ||
1200 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1132 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
1201 | if (!entry) | 1133 | if (!entry) |
@@ -1204,13 +1136,16 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, | |||
1204 | entry->func = (unsigned long)tp->rp.kp.addr; | 1136 | entry->func = (unsigned long)tp->rp.kp.addr; |
1205 | entry->ret_ip = (unsigned long)ri->ret_addr; | 1137 | entry->ret_ip = (unsigned long)ri->ret_addr; |
1206 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1138 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
1207 | 1139 | perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); | |
1208 | head = this_cpu_ptr(call->perf_events); | ||
1209 | perf_trace_buf_submit(entry, size, rctx, | ||
1210 | entry->ret_ip, 1, regs, head, NULL); | ||
1211 | } | 1140 | } |
1212 | #endif /* CONFIG_PERF_EVENTS */ | 1141 | #endif /* CONFIG_PERF_EVENTS */ |
1213 | 1142 | ||
1143 | /* | ||
1144 | * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. | ||
1145 | * | ||
1146 | * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe | ||
1147 | * lockless, but we can't race with this __init function. | ||
1148 | */ | ||
1214 | static __kprobes | 1149 | static __kprobes |
1215 | int kprobe_register(struct ftrace_event_call *event, | 1150 | int kprobe_register(struct ftrace_event_call *event, |
1216 | enum trace_reg type, void *data) | 1151 | enum trace_reg type, void *data) |
@@ -1376,6 +1311,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) | |||
1376 | return NULL; | 1311 | return NULL; |
1377 | } | 1312 | } |
1378 | 1313 | ||
1314 | /* | ||
1315 | * Nobody but us can call enable_trace_probe/disable_trace_probe at this | ||
1316 | * stage, we can do this lockless. | ||
1317 | */ | ||
1379 | static __init int kprobe_trace_self_tests_init(void) | 1318 | static __init int kprobe_trace_self_tests_init(void) |
1380 | { | 1319 | { |
1381 | int ret, warn = 0; | 1320 | int ret, warn = 0; |
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index a5e8f4878bfa..b3dcfb2f0fef 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
@@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) | |||
90 | if (drv) | 90 | if (drv) |
91 | ret += trace_seq_printf(s, " %s\n", drv->name); | 91 | ret += trace_seq_printf(s, " %s\n", drv->name); |
92 | else | 92 | else |
93 | ret += trace_seq_printf(s, " \n"); | 93 | ret += trace_seq_puts(s, " \n"); |
94 | return ret; | 94 | return ret; |
95 | } | 95 | } |
96 | 96 | ||
@@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter) | |||
107 | struct header_iter *hiter; | 107 | struct header_iter *hiter; |
108 | struct trace_seq *s = &iter->seq; | 108 | struct trace_seq *s = &iter->seq; |
109 | 109 | ||
110 | trace_seq_printf(s, "VERSION 20070824\n"); | 110 | trace_seq_puts(s, "VERSION 20070824\n"); |
111 | 111 | ||
112 | hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); | 112 | hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); |
113 | if (!hiter) | 113 | if (!hiter) |
@@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) | |||
209 | (rw->value >> 0) & 0xff, rw->pc, 0); | 209 | (rw->value >> 0) & 0xff, rw->pc, 0); |
210 | break; | 210 | break; |
211 | default: | 211 | default: |
212 | ret = trace_seq_printf(s, "rw what?\n"); | 212 | ret = trace_seq_puts(s, "rw what?\n"); |
213 | break; | 213 | break; |
214 | } | 214 | } |
215 | if (ret) | 215 | if (ret) |
@@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) | |||
245 | secs, usec_rem, m->map_id, 0UL, 0); | 245 | secs, usec_rem, m->map_id, 0UL, 0); |
246 | break; | 246 | break; |
247 | default: | 247 | default: |
248 | ret = trace_seq_printf(s, "map what?\n"); | 248 | ret = trace_seq_puts(s, "map what?\n"); |
249 | break; | 249 | break; |
250 | } | 250 | } |
251 | if (ret) | 251 | if (ret) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bb922d9ee51b..34e7cbac0c9c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) | |||
78 | 78 | ||
79 | trace_assign_type(field, entry); | 79 | trace_assign_type(field, entry); |
80 | 80 | ||
81 | ret = trace_seq_printf(s, "%s", field->buf); | 81 | ret = trace_seq_puts(s, field->buf); |
82 | if (!ret) | 82 | if (!ret) |
83 | return TRACE_TYPE_PARTIAL_LINE; | 83 | return TRACE_TYPE_PARTIAL_LINE; |
84 | 84 | ||
@@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
558 | if (ret) | 558 | if (ret) |
559 | ret = trace_seq_puts(s, "??"); | 559 | ret = trace_seq_puts(s, "??"); |
560 | if (ret) | 560 | if (ret) |
561 | ret = trace_seq_puts(s, "\n"); | 561 | ret = trace_seq_putc(s, '\n'); |
562 | continue; | 562 | continue; |
563 | } | 563 | } |
564 | if (!ret) | 564 | if (!ret) |
565 | break; | 565 | break; |
566 | if (ret) | 566 | if (ret) |
567 | ret = seq_print_user_ip(s, mm, ip, sym_flags); | 567 | ret = seq_print_user_ip(s, mm, ip, sym_flags); |
568 | ret = trace_seq_puts(s, "\n"); | 568 | ret = trace_seq_putc(s, '\n'); |
569 | } | 569 | } |
570 | 570 | ||
571 | if (mm) | 571 | if (mm) |
@@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) | |||
579 | int ret; | 579 | int ret; |
580 | 580 | ||
581 | if (!ip) | 581 | if (!ip) |
582 | return trace_seq_printf(s, "0"); | 582 | return trace_seq_putc(s, '0'); |
583 | 583 | ||
584 | if (sym_flags & TRACE_ITER_SYM_OFFSET) | 584 | if (sym_flags & TRACE_ITER_SYM_OFFSET) |
585 | ret = seq_print_sym_offset(s, "%s", ip); | 585 | ret = seq_print_sym_offset(s, "%s", ip); |
@@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, | |||
964 | goto partial; | 964 | goto partial; |
965 | 965 | ||
966 | if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { | 966 | if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { |
967 | if (!trace_seq_printf(s, " <-")) | 967 | if (!trace_seq_puts(s, " <-")) |
968 | goto partial; | 968 | goto partial; |
969 | if (!seq_print_ip_sym(s, | 969 | if (!seq_print_ip_sym(s, |
970 | field->parent_ip, | 970 | field->parent_ip, |
971 | flags)) | 971 | flags)) |
972 | goto partial; | 972 | goto partial; |
973 | } | 973 | } |
974 | if (!trace_seq_printf(s, "\n")) | 974 | if (!trace_seq_putc(s, '\n')) |
975 | goto partial; | 975 | goto partial; |
976 | 976 | ||
977 | return TRACE_TYPE_HANDLED; | 977 | return TRACE_TYPE_HANDLED; |
@@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
1210 | 1210 | ||
1211 | if (!seq_print_ip_sym(s, *p, flags)) | 1211 | if (!seq_print_ip_sym(s, *p, flags)) |
1212 | goto partial; | 1212 | goto partial; |
1213 | if (!trace_seq_puts(s, "\n")) | 1213 | if (!trace_seq_putc(s, '\n')) |
1214 | goto partial; | 1214 | goto partial; |
1215 | } | 1215 | } |
1216 | 1216 | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 2901e3b88590..a7329b7902f8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -640,13 +640,20 @@ out: | |||
640 | * Enable ftrace, sleep 1/10 second, and then read the trace | 640 | * Enable ftrace, sleep 1/10 second, and then read the trace |
641 | * buffer to see if all is in order. | 641 | * buffer to see if all is in order. |
642 | */ | 642 | */ |
643 | int | 643 | __init int |
644 | trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) | 644 | trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) |
645 | { | 645 | { |
646 | int save_ftrace_enabled = ftrace_enabled; | 646 | int save_ftrace_enabled = ftrace_enabled; |
647 | unsigned long count; | 647 | unsigned long count; |
648 | int ret; | 648 | int ret; |
649 | 649 | ||
650 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
651 | if (ftrace_filter_param) { | ||
652 | printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); | ||
653 | return 0; | ||
654 | } | ||
655 | #endif | ||
656 | |||
650 | /* make sure msleep has been recorded */ | 657 | /* make sure msleep has been recorded */ |
651 | msleep(1); | 658 | msleep(1); |
652 | 659 | ||
@@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) | |||
727 | * Pretty much the same than for the function tracer from which the selftest | 734 | * Pretty much the same than for the function tracer from which the selftest |
728 | * has been borrowed. | 735 | * has been borrowed. |
729 | */ | 736 | */ |
730 | int | 737 | __init int |
731 | trace_selftest_startup_function_graph(struct tracer *trace, | 738 | trace_selftest_startup_function_graph(struct tracer *trace, |
732 | struct trace_array *tr) | 739 | struct trace_array *tr) |
733 | { | 740 | { |
734 | int ret; | 741 | int ret; |
735 | unsigned long count; | 742 | unsigned long count; |
736 | 743 | ||
744 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
745 | if (ftrace_filter_param) { | ||
746 | printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); | ||
747 | return 0; | ||
748 | } | ||
749 | #endif | ||
750 | |||
737 | /* | 751 | /* |
738 | * Simulate the init() callback but we attach a watchdog callback | 752 | * Simulate the init() callback but we attach a watchdog callback |
739 | * to detect and recover from possible hangs | 753 | * to detect and recover from possible hangs |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73c7a5f..8fd03657bc7d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags, | |||
175 | entry = syscall_nr_to_meta(syscall); | 175 | entry = syscall_nr_to_meta(syscall); |
176 | 176 | ||
177 | if (!entry) { | 177 | if (!entry) { |
178 | trace_seq_printf(s, "\n"); | 178 | trace_seq_putc(s, '\n'); |
179 | return TRACE_TYPE_HANDLED; | 179 | return TRACE_TYPE_HANDLED; |
180 | } | 180 | } |
181 | 181 | ||
@@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
306 | struct syscall_metadata *sys_data; | 306 | struct syscall_metadata *sys_data; |
307 | struct ring_buffer_event *event; | 307 | struct ring_buffer_event *event; |
308 | struct ring_buffer *buffer; | 308 | struct ring_buffer *buffer; |
309 | unsigned long irq_flags; | ||
310 | int pc; | ||
309 | int syscall_nr; | 311 | int syscall_nr; |
310 | int size; | 312 | int size; |
311 | 313 | ||
@@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
321 | 323 | ||
322 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; | 324 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; |
323 | 325 | ||
326 | local_save_flags(irq_flags); | ||
327 | pc = preempt_count(); | ||
328 | |||
324 | buffer = tr->trace_buffer.buffer; | 329 | buffer = tr->trace_buffer.buffer; |
325 | event = trace_buffer_lock_reserve(buffer, | 330 | event = trace_buffer_lock_reserve(buffer, |
326 | sys_data->enter_event->event.type, size, 0, 0); | 331 | sys_data->enter_event->event.type, size, irq_flags, pc); |
327 | if (!event) | 332 | if (!event) |
328 | return; | 333 | return; |
329 | 334 | ||
@@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
333 | 338 | ||
334 | if (!filter_current_check_discard(buffer, sys_data->enter_event, | 339 | if (!filter_current_check_discard(buffer, sys_data->enter_event, |
335 | entry, event)) | 340 | entry, event)) |
336 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 341 | trace_current_buffer_unlock_commit(buffer, event, |
342 | irq_flags, pc); | ||
337 | } | 343 | } |
338 | 344 | ||
339 | static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | 345 | static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) |
@@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
343 | struct syscall_metadata *sys_data; | 349 | struct syscall_metadata *sys_data; |
344 | struct ring_buffer_event *event; | 350 | struct ring_buffer_event *event; |
345 | struct ring_buffer *buffer; | 351 | struct ring_buffer *buffer; |
352 | unsigned long irq_flags; | ||
353 | int pc; | ||
346 | int syscall_nr; | 354 | int syscall_nr; |
347 | 355 | ||
348 | syscall_nr = trace_get_syscall_nr(current, regs); | 356 | syscall_nr = trace_get_syscall_nr(current, regs); |
@@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
355 | if (!sys_data) | 363 | if (!sys_data) |
356 | return; | 364 | return; |
357 | 365 | ||
366 | local_save_flags(irq_flags); | ||
367 | pc = preempt_count(); | ||
368 | |||
358 | buffer = tr->trace_buffer.buffer; | 369 | buffer = tr->trace_buffer.buffer; |
359 | event = trace_buffer_lock_reserve(buffer, | 370 | event = trace_buffer_lock_reserve(buffer, |
360 | sys_data->exit_event->event.type, sizeof(*entry), 0, 0); | 371 | sys_data->exit_event->event.type, sizeof(*entry), |
372 | irq_flags, pc); | ||
361 | if (!event) | 373 | if (!event) |
362 | return; | 374 | return; |
363 | 375 | ||
@@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
367 | 379 | ||
368 | if (!filter_current_check_discard(buffer, sys_data->exit_event, | 380 | if (!filter_current_check_discard(buffer, sys_data->exit_event, |
369 | entry, event)) | 381 | entry, event)) |
370 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 382 | trace_current_buffer_unlock_commit(buffer, event, |
383 | irq_flags, pc); | ||
371 | } | 384 | } |
372 | 385 | ||
373 | static int reg_event_syscall_enter(struct ftrace_event_file *file, | 386 | static int reg_event_syscall_enter(struct ftrace_event_file *file, |
@@ -553,15 +566,15 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
553 | if (!sys_data) | 566 | if (!sys_data) |
554 | return; | 567 | return; |
555 | 568 | ||
569 | head = this_cpu_ptr(sys_data->enter_event->perf_events); | ||
570 | if (hlist_empty(head)) | ||
571 | return; | ||
572 | |||
556 | /* get the size after alignment with the u32 buffer size field */ | 573 | /* get the size after alignment with the u32 buffer size field */ |
557 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); | 574 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); |
558 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | 575 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
559 | size -= sizeof(u32); | 576 | size -= sizeof(u32); |
560 | 577 | ||
561 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
562 | "perf buffer not large enough")) | ||
563 | return; | ||
564 | |||
565 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | 578 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, |
566 | sys_data->enter_event->event.type, regs, &rctx); | 579 | sys_data->enter_event->event.type, regs, &rctx); |
567 | if (!rec) | 580 | if (!rec) |
@@ -570,8 +583,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
570 | rec->nr = syscall_nr; | 583 | rec->nr = syscall_nr; |
571 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 584 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
572 | (unsigned long *)&rec->args); | 585 | (unsigned long *)&rec->args); |
573 | |||
574 | head = this_cpu_ptr(sys_data->enter_event->perf_events); | ||
575 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); | 586 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); |
576 | } | 587 | } |
577 | 588 | ||
@@ -629,18 +640,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
629 | if (!sys_data) | 640 | if (!sys_data) |
630 | return; | 641 | return; |
631 | 642 | ||
643 | head = this_cpu_ptr(sys_data->exit_event->perf_events); | ||
644 | if (hlist_empty(head)) | ||
645 | return; | ||
646 | |||
632 | /* We can probably do that at build time */ | 647 | /* We can probably do that at build time */ |
633 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); | 648 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
634 | size -= sizeof(u32); | 649 | size -= sizeof(u32); |
635 | 650 | ||
636 | /* | ||
637 | * Impossible, but be paranoid with the future | ||
638 | * How to put this check outside runtime? | ||
639 | */ | ||
640 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
641 | "exit event has grown above perf buffer size")) | ||
642 | return; | ||
643 | |||
644 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | 651 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, |
645 | sys_data->exit_event->event.type, regs, &rctx); | 652 | sys_data->exit_event->event.type, regs, &rctx); |
646 | if (!rec) | 653 | if (!rec) |
@@ -648,8 +655,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
648 | 655 | ||
649 | rec->nr = syscall_nr; | 656 | rec->nr = syscall_nr; |
650 | rec->ret = syscall_get_return_value(current, regs); | 657 | rec->ret = syscall_get_return_value(current, regs); |
651 | |||
652 | head = this_cpu_ptr(sys_data->exit_event->perf_events); | ||
653 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); | 658 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); |
654 | } | 659 | } |
655 | 660 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0ee64..a23d2d71188e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) | |||
283 | return -EINVAL; | 283 | return -EINVAL; |
284 | } | 284 | } |
285 | arg = strchr(argv[1], ':'); | 285 | arg = strchr(argv[1], ':'); |
286 | if (!arg) | 286 | if (!arg) { |
287 | ret = -EINVAL; | ||
287 | goto fail_address_parse; | 288 | goto fail_address_parse; |
289 | } | ||
288 | 290 | ||
289 | *arg++ = '\0'; | 291 | *arg++ = '\0'; |
290 | filename = argv[1]; | 292 | filename = argv[1]; |
@@ -816,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu, | |||
816 | 818 | ||
817 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); | 819 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
818 | size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); | 820 | size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); |
819 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) | ||
820 | return; | ||
821 | 821 | ||
822 | preempt_disable(); | 822 | preempt_disable(); |
823 | head = this_cpu_ptr(call->perf_events); | 823 | head = this_cpu_ptr(call->perf_events); |
diff --git a/kernel/wait.c b/kernel/wait.c index ce0daa320a26..dec68bd4e9d8 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
333 | prepare_to_wait(wq, &q->wait, mode); | 333 | prepare_to_wait(wq, &q->wait, mode); |
334 | val = q->key.flags; | 334 | val = q->key.flags; |
335 | if (atomic_read(val) == 0) | 335 | if (atomic_read(val) == 0) |
336 | ret = (*action)(val); | 336 | break; |
337 | ret = (*action)(val); | ||
337 | } while (!ret && atomic_read(val) != 0); | 338 | } while (!ret && atomic_read(val) != 0); |
338 | finish_wait(wq, &q->wait); | 339 | finish_wait(wq, &q->wait); |
339 | return ret; | 340 | return ret; |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e348f07..1241d8c91d5e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -29,9 +29,9 @@ | |||
29 | #include <linux/kvm_para.h> | 29 | #include <linux/kvm_para.h> |
30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | 31 | ||
32 | int watchdog_enabled = 1; | 32 | int watchdog_user_enabled = 1; |
33 | int __read_mostly watchdog_thresh = 10; | 33 | int __read_mostly watchdog_thresh = 10; |
34 | static int __read_mostly watchdog_disabled; | 34 | static int __read_mostly watchdog_running; |
35 | static u64 __read_mostly sample_period; | 35 | static u64 __read_mostly sample_period; |
36 | 36 | ||
37 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 37 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
@@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str) | |||
63 | else if (!strncmp(str, "nopanic", 7)) | 63 | else if (!strncmp(str, "nopanic", 7)) |
64 | hardlockup_panic = 0; | 64 | hardlockup_panic = 0; |
65 | else if (!strncmp(str, "0", 1)) | 65 | else if (!strncmp(str, "0", 1)) |
66 | watchdog_enabled = 0; | 66 | watchdog_user_enabled = 0; |
67 | return 1; | 67 | return 1; |
68 | } | 68 | } |
69 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 69 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
@@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
82 | 82 | ||
83 | static int __init nowatchdog_setup(char *str) | 83 | static int __init nowatchdog_setup(char *str) |
84 | { | 84 | { |
85 | watchdog_enabled = 0; | 85 | watchdog_user_enabled = 0; |
86 | return 1; | 86 | return 1; |
87 | } | 87 | } |
88 | __setup("nowatchdog", nowatchdog_setup); | 88 | __setup("nowatchdog", nowatchdog_setup); |
@@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup); | |||
90 | /* deprecated */ | 90 | /* deprecated */ |
91 | static int __init nosoftlockup_setup(char *str) | 91 | static int __init nosoftlockup_setup(char *str) |
92 | { | 92 | { |
93 | watchdog_enabled = 0; | 93 | watchdog_user_enabled = 0; |
94 | return 1; | 94 | return 1; |
95 | } | 95 | } |
96 | __setup("nosoftlockup", nosoftlockup_setup); | 96 | __setup("nosoftlockup", nosoftlockup_setup); |
@@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void) | |||
158 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 158 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
159 | void touch_nmi_watchdog(void) | 159 | void touch_nmi_watchdog(void) |
160 | { | 160 | { |
161 | if (watchdog_enabled) { | 161 | if (watchdog_user_enabled) { |
162 | unsigned cpu; | 162 | unsigned cpu; |
163 | 163 | ||
164 | for_each_present_cpu(cpu) { | 164 | for_each_present_cpu(cpu) { |
@@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu) | |||
347 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 347 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
348 | hrtimer->function = watchdog_timer_fn; | 348 | hrtimer->function = watchdog_timer_fn; |
349 | 349 | ||
350 | if (!watchdog_enabled) { | ||
351 | kthread_park(current); | ||
352 | return; | ||
353 | } | ||
354 | |||
355 | /* Enable the perf event */ | 350 | /* Enable the perf event */ |
356 | watchdog_nmi_enable(cpu); | 351 | watchdog_nmi_enable(cpu); |
357 | 352 | ||
@@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu) | |||
374 | watchdog_nmi_disable(cpu); | 369 | watchdog_nmi_disable(cpu); |
375 | } | 370 | } |
376 | 371 | ||
372 | static void watchdog_cleanup(unsigned int cpu, bool online) | ||
373 | { | ||
374 | watchdog_disable(cpu); | ||
375 | } | ||
376 | |||
377 | static int watchdog_should_run(unsigned int cpu) | 377 | static int watchdog_should_run(unsigned int cpu) |
378 | { | 378 | { |
379 | return __this_cpu_read(hrtimer_interrupts) != | 379 | return __this_cpu_read(hrtimer_interrupts) != |
@@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; } | |||
475 | static void watchdog_nmi_disable(unsigned int cpu) { return; } | 475 | static void watchdog_nmi_disable(unsigned int cpu) { return; } |
476 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | 476 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
477 | 477 | ||
478 | /* prepare/enable/disable routines */ | 478 | static struct smp_hotplug_thread watchdog_threads = { |
479 | /* sysctl functions */ | 479 | .store = &softlockup_watchdog, |
480 | #ifdef CONFIG_SYSCTL | 480 | .thread_should_run = watchdog_should_run, |
481 | static void watchdog_enable_all_cpus(void) | 481 | .thread_fn = watchdog, |
482 | .thread_comm = "watchdog/%u", | ||
483 | .setup = watchdog_enable, | ||
484 | .cleanup = watchdog_cleanup, | ||
485 | .park = watchdog_disable, | ||
486 | .unpark = watchdog_enable, | ||
487 | }; | ||
488 | |||
489 | static int watchdog_enable_all_cpus(void) | ||
482 | { | 490 | { |
483 | unsigned int cpu; | 491 | int err = 0; |
484 | 492 | ||
485 | if (watchdog_disabled) { | 493 | if (!watchdog_running) { |
486 | watchdog_disabled = 0; | 494 | err = smpboot_register_percpu_thread(&watchdog_threads); |
487 | for_each_online_cpu(cpu) | 495 | if (err) |
488 | kthread_unpark(per_cpu(softlockup_watchdog, cpu)); | 496 | pr_err("Failed to create watchdog threads, disabled\n"); |
497 | else | ||
498 | watchdog_running = 1; | ||
489 | } | 499 | } |
500 | |||
501 | return err; | ||
490 | } | 502 | } |
491 | 503 | ||
504 | /* prepare/enable/disable routines */ | ||
505 | /* sysctl functions */ | ||
506 | #ifdef CONFIG_SYSCTL | ||
492 | static void watchdog_disable_all_cpus(void) | 507 | static void watchdog_disable_all_cpus(void) |
493 | { | 508 | { |
494 | unsigned int cpu; | 509 | if (watchdog_running) { |
495 | 510 | watchdog_running = 0; | |
496 | if (!watchdog_disabled) { | 511 | smpboot_unregister_percpu_thread(&watchdog_threads); |
497 | watchdog_disabled = 1; | ||
498 | for_each_online_cpu(cpu) | ||
499 | kthread_park(per_cpu(softlockup_watchdog, cpu)); | ||
500 | } | 512 | } |
501 | } | 513 | } |
502 | 514 | ||
@@ -507,45 +519,48 @@ static void watchdog_disable_all_cpus(void) | |||
507 | int proc_dowatchdog(struct ctl_table *table, int write, | 519 | int proc_dowatchdog(struct ctl_table *table, int write, |
508 | void __user *buffer, size_t *lenp, loff_t *ppos) | 520 | void __user *buffer, size_t *lenp, loff_t *ppos) |
509 | { | 521 | { |
510 | int ret; | 522 | int err, old_thresh, old_enabled; |
511 | 523 | ||
512 | if (watchdog_disabled < 0) | 524 | old_thresh = ACCESS_ONCE(watchdog_thresh); |
513 | return -ENODEV; | 525 | old_enabled = ACCESS_ONCE(watchdog_user_enabled); |
514 | 526 | ||
515 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 527 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
516 | if (ret || !write) | 528 | if (err || !write) |
517 | return ret; | 529 | return err; |
518 | 530 | ||
519 | set_sample_period(); | 531 | set_sample_period(); |
520 | /* | 532 | /* |
521 | * Watchdog threads shouldn't be enabled if they are | 533 | * Watchdog threads shouldn't be enabled if they are |
522 | * disabled. The 'watchdog_disabled' variable check in | 534 | * disabled. The 'watchdog_running' variable check in |
523 | * watchdog_*_all_cpus() function takes care of this. | 535 | * watchdog_*_all_cpus() function takes care of this. |
524 | */ | 536 | */ |
525 | if (watchdog_enabled && watchdog_thresh) | 537 | if (watchdog_user_enabled && watchdog_thresh) |
526 | watchdog_enable_all_cpus(); | 538 | err = watchdog_enable_all_cpus(); |
527 | else | 539 | else |
528 | watchdog_disable_all_cpus(); | 540 | watchdog_disable_all_cpus(); |
529 | 541 | ||
530 | return ret; | 542 | /* Restore old values on failure */ |
543 | if (err) { | ||
544 | watchdog_thresh = old_thresh; | ||
545 | watchdog_user_enabled = old_enabled; | ||
546 | } | ||
547 | |||
548 | return err; | ||
531 | } | 549 | } |
532 | #endif /* CONFIG_SYSCTL */ | 550 | #endif /* CONFIG_SYSCTL */ |
533 | 551 | ||
534 | static struct smp_hotplug_thread watchdog_threads = { | ||
535 | .store = &softlockup_watchdog, | ||
536 | .thread_should_run = watchdog_should_run, | ||
537 | .thread_fn = watchdog, | ||
538 | .thread_comm = "watchdog/%u", | ||
539 | .setup = watchdog_enable, | ||
540 | .park = watchdog_disable, | ||
541 | .unpark = watchdog_enable, | ||
542 | }; | ||
543 | |||
544 | void __init lockup_detector_init(void) | 552 | void __init lockup_detector_init(void) |
545 | { | 553 | { |
546 | set_sample_period(); | 554 | set_sample_period(); |
547 | if (smpboot_register_percpu_thread(&watchdog_threads)) { | 555 | |
548 | pr_err("Failed to create watchdog threads, disabled\n"); | 556 | #ifdef CONFIG_NO_HZ_FULL |
549 | watchdog_disabled = -ENODEV; | 557 | if (watchdog_user_enabled) { |
558 | watchdog_user_enabled = 0; | ||
559 | pr_warning("Disabled lockup detectors by default for full dynticks\n"); | ||
560 | pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n"); | ||
550 | } | 561 | } |
562 | #endif | ||
563 | |||
564 | if (watchdog_user_enabled) | ||
565 | watchdog_enable_all_cpus(); | ||
551 | } | 566 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f02c4a4a0c3c..0b72e816b8d0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -4644,7 +4644,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) | |||
4644 | * Workqueues should be brought up before normal priority CPU notifiers. | 4644 | * Workqueues should be brought up before normal priority CPU notifiers. |
4645 | * This will be registered high priority CPU notifier. | 4645 | * This will be registered high priority CPU notifier. |
4646 | */ | 4646 | */ |
4647 | static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, | 4647 | static int workqueue_cpu_up_callback(struct notifier_block *nfb, |
4648 | unsigned long action, | 4648 | unsigned long action, |
4649 | void *hcpu) | 4649 | void *hcpu) |
4650 | { | 4650 | { |
@@ -4697,7 +4697,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, | |||
4697 | * Workqueues should be brought down after normal priority CPU notifiers. | 4697 | * Workqueues should be brought down after normal priority CPU notifiers. |
4698 | * This will be registered as low priority CPU notifier. | 4698 | * This will be registered as low priority CPU notifier. |
4699 | */ | 4699 | */ |
4700 | static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, | 4700 | static int workqueue_cpu_down_callback(struct notifier_block *nfb, |
4701 | unsigned long action, | 4701 | unsigned long action, |
4702 | void *hcpu) | 4702 | void *hcpu) |
4703 | { | 4703 | { |